diff --git a/rtl/udp_checksum_gen.v b/rtl/udp_checksum_gen.v
index e6e474cac..adc8decb9 100644
--- a/rtl/udp_checksum_gen.v
+++ b/rtl/udp_checksum_gen.v
@@ -146,7 +146,8 @@ localparam [2:0]
     STATE_SUM_HEADER_2 = 3'd2,
     STATE_SUM_HEADER_3 = 3'd3,
     STATE_SUM_PAYLOAD = 3'd4,
-    STATE_FINISH_SUM = 3'd5;
+    STATE_FINISH_SUM = 3'd5,
+    STATE_COMMIT_HDR = 3'd6;
 
 reg [2:0] state_reg = STATE_IDLE, state_next;
 
@@ -507,6 +508,9 @@ always @* begin
             checksum_part = checksum_reg[15:0] + checksum_reg[31:16];
             checksum_next = ~(checksum_part[15:0] + checksum_part[16]);
             hdr_valid_next = 1;
+            state_next = STATE_COMMIT_HDR;
+        end
+        STATE_COMMIT_HDR: begin
             state_next = STATE_IDLE;
         end
     endcase
diff --git a/rtl/udp_checksum_gen_64.v b/rtl/udp_checksum_gen_64.v
index 9cc7b94c5..2e0f633d4 100644
--- a/rtl/udp_checksum_gen_64.v
+++ b/rtl/udp_checksum_gen_64.v
@@ -147,7 +147,8 @@ localparam [2:0]
     STATE_SUM_HEADER = 3'd1,
     STATE_SUM_PAYLOAD = 3'd2,
     STATE_FINISH_SUM_1 = 3'd3,
-    STATE_FINISH_SUM_2 = 3'd4;
+    STATE_FINISH_SUM_2 = 3'd4,
+    STATE_COMMIT_HDR = 3'd5;
 
 reg [2:0] state_reg = STATE_IDLE, state_next;
 
@@ -541,6 +542,9 @@ always @* begin
             checksum_part = checksum_reg[15:0] + checksum_reg[31:16];
             checksum_next = ~(checksum_part[15:0] + checksum_part[16]);
             hdr_valid_next = 1;
+            state_next = STATE_COMMIT_HDR;
+        end
+        STATE_COMMIT_HDR: begin
             state_next = STATE_IDLE;
         end
     endcase
diff --git a/tb/test_udp_checksum_gen.py b/tb/test_udp_checksum_gen.py
index 38bb28228..c17bf12e1 100755
--- a/tb/test_udp_checksum_gen.py
+++ b/tb/test_udp_checksum_gen.py
@@ -484,6 +484,54 @@ def check():
 
                 yield delay(100)
 
+        yield clk.posedge
+        payload_len = 8
+        print("test 4: header fifo overflow with back-to-back packets, length %d" % payload_len)
+        current_test.next = 4
+
+        test_frame1 = udp_ep.UDPFrame()
+        test_frame1.eth_dest_mac = 0xDAD1D2D3D4D5
+        test_frame1.eth_src_mac = 0x5A5152535455
+        test_frame1.eth_type = 0x0800
+        test_frame1.ip_version = 4
+        test_frame1.ip_ihl = 5
+        test_frame1.ip_length = None
+        test_frame1.ip_identification = 0
+        test_frame1.ip_flags = 2
+        test_frame1.ip_fragment_offset = 0
+        test_frame1.ip_ttl = 64
+        test_frame1.ip_protocol = 0x11
+        test_frame1.ip_header_checksum = None
+        test_frame1.ip_source_ip = 0xc0a80164
+        test_frame1.ip_dest_ip = 0xc0a80165
+        test_frame1.udp_source_port = 1
+        test_frame1.udp_dest_port = 2
+        test_frame1.udp_length = None
+        test_frame1.udp_checksum = None
+        test_frame1.payload = bytearray(range(payload_len))
+        test_frame1.build()
+
+        cnt = 32
+        sink_pause.next = True
+        yield clk.posedge
+
+        for i in range(cnt):
+            source.send(test_frame1)
+
+        i = 8*cnt
+        while i > 0:
+            i = max(0, i-1)
+            yield clk.posedge
+
+        sink_pause.next = False
+        yield clk.posedge
+
+        for i in range(cnt):
+            yield sink.wait()
+            rx_frame = sink.recv()
+        
+        yield delay(100)
+
         raise StopSimulation
 
     return instances()
diff --git a/tb/test_udp_checksum_gen_64.py b/tb/test_udp_checksum_gen_64.py
index c9a70cf36..b92b749b0 100755
--- a/tb/test_udp_checksum_gen_64.py
+++ b/tb/test_udp_checksum_gen_64.py
@@ -489,6 +489,55 @@ def check():
                 assert sink.empty()
 
                 yield delay(100)
+            
+        yield clk.posedge
+        payload_len = 8
+        print("test 4: header fifo overflow with back-to-back packets, length %d" % payload_len)
+        current_test.next = 4
+
+        test_frame1 = udp_ep.UDPFrame()
+        test_frame1.eth_dest_mac = 0xDAD1D2D3D4D5
+        test_frame1.eth_src_mac = 0x5A5152535455
+        test_frame1.eth_type = 0x0800
+        test_frame1.ip_version = 4
+        test_frame1.ip_ihl = 5
+        test_frame1.ip_length = None
+        test_frame1.ip_identification = 0
+        test_frame1.ip_flags = 2
+        test_frame1.ip_fragment_offset = 0
+        test_frame1.ip_ttl = 64
+        test_frame1.ip_protocol = 0x11
+        test_frame1.ip_header_checksum = None
+        test_frame1.ip_source_ip = 0xc0a80164
+        test_frame1.ip_dest_ip = 0xc0a80165
+        test_frame1.udp_source_port = 1
+        test_frame1.udp_dest_port = 2
+        test_frame1.udp_length = None
+        test_frame1.udp_checksum = None
+        test_frame1.payload = bytearray(range(payload_len))
+        test_frame1.build()
+
+        cnt = 16
+        sink_pause.next = True
+        yield clk.posedge
+
+        for i in range(cnt):
+            source.send(test_frame1)
+
+        i = 8*cnt
+        while i > 0:
+            i = max(0, i-1)
+            yield clk.posedge
+
+        sink_pause.next = False
+        yield clk.posedge
+
+        for i in range(cnt):
+            yield sink.wait()
+            rx_frame = sink.recv()
+        
+        yield delay(100)
+
 
         raise StopSimulation