Python Replicas Stage (#NA2) ACK from tester is not sent immediately

HI

I’m stuck on Python Redis Replication Stage #NA2 (Replication - WAIT with multiple commands)

ACK from tester is not sent immediately, so it will respond :0\r\n when the time has run out

Here is my code

about WAIT

     elif 'wait' in commands[0]:
        num_replicas = int(commands[1])
        timeout = int(commands[2]) / 1000  # Convert to seconds
        start_time = time.time()

        # Check how many replicas are up-to-date
        all_synced_replicas = [replica for replica, replica_offset in replicas_offsets.items() if offset == replica_offset]

        print(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {len(all_synced_replicas)} replicas are up-to-date.")
        if len(all_synced_replicas) >= num_replicas:
            response = redis_protocol_encoder('int', len(replicas)).encode()
            return client_socket.send(f":{len(all_synced_replicas)}\r\n".encode())
            # return respond(client_socket, response)

        # If not enough, send GETACK to check all replicas
        updated_to_date_replicas_number = 0
        for index, replica in enumerate(replicas):
            try:
                command = ["REPLCONF", "GETACK", "*"]
                replica_message = redis_protocol_encoder('array', command).encode()
                respond(replica, replica_message)
            except Exception as e:
                print(f"Failed to send REPLCONF GETACK to replica: {e}")

        while time.time() - start_time <= timeout:
            if updated_to_date_replicas_number >= num_replicas:
                break

        response = redis_protocol_encoder('int', updated_to_date_replicas_number).encode()
        print(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - response: {response}")
        client_socket.send(f":{updated_to_date_replicas_number}\r\n".encode())
        # After sending ["REPLCONF", "GETACK", "*"] to all replicas, updated the offset
        offset += len(replica_message)

about ACK (when ACK is return)

    # Connect by Replica
    elif 'replconf' in commands[0]:

        if role == 'master' and 'ack' in commands[1]:
            replica_offset = int(commands[2])
            print(f"offset: {offset}, replica_offset: {replica_offset}")
            replicas_offsets[client_socket] = replica_offset
            if replica_offset == offset: # if offset 
                updated_to_date_replicas_number += 1
            return 

Here are my logs:

remote: [tester::#NA2] client: $ redis-cli WAIT 1 500
remote: [tester::#NA2] client: Sent bytes: "*3\r\n$4\r\nWAIT\r\n$1\r\n1\r\n$3\r\n500\r\n"
remote: [tester::#NA2] Testing Replica : 1
remote: [tester::#NA2] replica-1: Expecting "SET foo 123" to be propagated
remote: [tester::#NA2] replica-1: Received bytes: "*3\r\n$3\r\nSET\r\n$3\r\nfoo\r\n$3\r\n123\r\n"
remote: [tester::#NA2] replica-1: Received RESP array: ["SET", "foo", "123"]
remote: [tester::#NA2] Received ["SET", "foo", "123"]
remote: [tester::#NA2] replica-1: Expecting "REPLCONF GETACK *" from Master
remote: [your_program] 2024-06-07 16:59:58 - Sync data to slave - 2 / total 3
remote: [your_program] 2024-06-07 16:59:58 - Redis server sent command: b'*3\r\n$3\r\nSET\r\n$3\r\nfoo\r\n$3\r\n123\r\n'
remote: [your_program] 2024-06-07 16:59:58 - Sync data to slave - 3 / total 3
remote: [your_program] 2024-06-07 16:59:58 - Redis server sent command: b'*3\r\n$3\r\nSET\r\n$3\r\nfoo\r\n$3\r\n123\r\n'
remote: [your_program] 2024-06-07 16:59:58 - Redis server sent command: b'+OK\r\n'
remote: [your_program] 2024-06-07 16:59:58 - Redis server get a request: b'*3\r\n$4\r\nWAIT\r\n$1\r\n1\r\n$3\r\n500\r\n'
remote: [your_program] 2024-06-07 16:59:58 - Redis server get commands: [['WAIT', '1', '500']]
remote: [your_program] 2024-06-07 16:59:58 - Command is running: ['wait', '1', '500']
remote: [your_program] 2024-06-07 16:59:58 - Redis server sent command: b'*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n'
remote: [your_program] 2024-06-07 16:59:58 - Redis server sent command: b'*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n'
remote: [your_program] 2024-06-07 16:59:58 - Redis server sent command: b'*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n'
remote: [tester::#NA2] replica-1: Received bytes: "*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n"
remote: [tester::#NA2] replica-1: Received RESP array: ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] Received ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] replica-1: Sending ACK to Master
remote: [tester::#NA2] replica-1: $ redis-cli REPLCONF ACK 31
remote: [tester::#NA2] replica-1: Sent bytes: "*3\r\n$8\r\nREPLCONF\r\n$3\r\nACK\r\n$2\r\n31\r\n"
remote: [tester::#NA2] Testing Replica : 2
remote: [tester::#NA2] replica-2: Expecting "SET foo 123" to be propagated
remote: [tester::#NA2] replica-2: Received bytes: "*3\r\n$3\r\nSET\r\n$3\r\nfoo\r\n$3\r\n123\r\n"
remote: [tester::#NA2] replica-2: Received RESP array: ["SET", "foo", "123"]
remote: [tester::#NA2] Received ["SET", "foo", "123"]
remote: [tester::#NA2] replica-2: Expecting "REPLCONF GETACK *" from Master
remote: [tester::#NA2] replica-2: Received bytes: "*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n"
remote: [tester::#NA2] replica-2: Received RESP array: ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] Received ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] replica-2: Not sending ACK to Master
remote: [tester::#NA2] Testing Replica : 3
remote: [tester::#NA2] replica-3: Expecting "SET foo 123" to be propagated
remote: [tester::#NA2] replica-3: Received bytes: "*3\r\n$3\r\nSET\r\n$3\r\nfoo\r\n$3\r\n123\r\n"
remote: [tester::#NA2] replica-3: Received RESP array: ["SET", "foo", "123"]
remote: [tester::#NA2] Received ["SET", "foo", "123"]
remote: [tester::#NA2] replica-3: Expecting "REPLCONF GETACK *" from Master
remote: [tester::#NA2] replica-3: Received bytes: "*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n"
remote: [tester::#NA2] replica-3: Received RESP array: ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] Received ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] replica-3: Not sending ACK to Master
remote: [your_program] 2024-06-07 16:59:58 - response: b':0\r\n'
remote: [your_program] 2024-06-07 16:59:58 - Redis server sent command: b':0\r\n'
remote: [tester::#NA2] client: Received bytes: ":0\r\n"
remote: [tester::#NA2] client: Received RESP integer: 0
remote: [tester::#NA2] Expected 1, got 0
remote: [tester::#NA2] Test failed
remote: [tester::#NA2] Terminating program
remote: [your_program] 2024-06-07 16:59:58 - Redis server get a request: b''
remote: [tester::#NA2] Program terminated successfully

however, if I add time.sleep(1) in wait command, program can get ACK from tester eventually.

    elif 'wait' in commands[0]:
        updated_replicas_number = 0
        num_replicas = int(commands[1])
        timeout = int(commands[2]) / 1000  # Convert to seconds
        start_time = time.time()

        for index, replica in enumerate(replicas):
            try:
                command = ["REPLCONF", "GETACK", "*"]
                replica_message = redis_protocol_encoder('array', command).encode()
                respond(replica, replica_message)
            except Exception as e:
                print(f"Failed to send REPLCONF GETACK to replica: {e}")

  
        while time.time() - start_time <= timeout:
            if updated_replicas_number >= num_replicas:
                break

       time.sleep(1) # waiting for 'ack' coming back

        response = redis_protocol_encoder('int', updated_replicas_number).encode()
        print(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - response: {response}")
        respond(client_socket,response)
        updated_replicas_number = 0

but it will make
Expected WAIT to return exactly after 1000 ms timeout elapsed.

remote: [tester::#NA2] client: $ redis-cli WAIT 3 2000
remote: [tester::#NA2] client: Sent bytes: "*3\r\n$4\r\nWAIT\r\n$1\r\n3\r\n$4\r\n2000\r\n"
remote: [tester::#NA2] Testing Replica : 1
remote: [tester::#NA2] replica-1: Expecting "SET baz 789" to be propagated
remote: [tester::#NA2] replica-1: Received bytes: "*3\r\n$3\r\nSET\r\n$3\r\nbaz\r\n$3\r\n789\r\n"
remote: [tester::#NA2] replica-1: Received RESP array: ["SET", "baz", "789"]
remote: [tester::#NA2] Received ["SET", "baz", "789"]
remote: [tester::#NA2] replica-1: Expecting "REPLCONF GETACK *" from Master
remote: [your_program] 2024-06-07 17:10:22 - Redis server get a request: b'*3\r\n$4\r\nWAIT\r\n$1\r\n3\r\n$4\r\n2000\r\n'
remote: [your_program] 2024-06-07 17:10:22 - Redis server get commands: [['WAIT', '3', '2000']]
remote: [your_program] 2024-06-07 17:10:22 - Command is running: ['wait', '3', '2000']
remote: [your_program] 2024-06-07 17:10:22 - Redis server sent command: b'*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n'
remote: [your_program] 2024-06-07 17:10:22 - Redis server sent command: b'*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n'
remote: [your_program] 2024-06-07 17:10:22 - Redis server sent command: b'*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n'
remote: [your_program] 2024-06-07 17:10:22 - Redis server sent command: b'*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n'
remote: [tester::#NA2] replica-1: Received bytes: "*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n"
remote: [tester::#NA2] replica-1: Received RESP array: ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] Received ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] replica-1: Sending ACK to Master
remote: [tester::#NA2] replica-1: $ redis-cli REPLCONF ACK 99
remote: [tester::#NA2] replica-1: Sent bytes: "*3\r\n$8\r\nREPLCONF\r\n$3\r\nACK\r\n$2\r\n99\r\n"
remote: [tester::#NA2] Testing Replica : 2
remote: [tester::#NA2] replica-2: Expecting "SET baz 789" to be propagated
remote: [tester::#NA2] replica-2: Received bytes: "*3\r\n$3\r\nSET\r\n$3\r\nbaz\r\n$3\r\n789\r\n"
remote: [tester::#NA2] replica-2: Received RESP array: ["SET", "baz", "789"]
remote: [tester::#NA2] Received ["SET", "baz", "789"]
remote: [tester::#NA2] replica-2: Expecting "REPLCONF GETACK *" from Master
remote: [tester::#NA2] replica-2: Received bytes: "*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n"
remote: [tester::#NA2] replica-2: Received RESP array: ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] Received ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] replica-2: Sending ACK to Master
remote: [tester::#NA2] replica-2: $ redis-cli REPLCONF ACK 99
remote: [tester::#NA2] replica-2: Sent bytes: "*3\r\n$8\r\nREPLCONF\r\n$3\r\nACK\r\n$2\r\n99\r\n"
remote: [tester::#NA2] Testing Replica : 3
remote: [tester::#NA2] replica-3: Expecting "SET baz 789" to be propagated
remote: [tester::#NA2] replica-3: Received bytes: "*3\r\n$3\r\nSET\r\n$3\r\nbaz\r\n$3\r\n789\r\n"
remote: [tester::#NA2] replica-3: Received RESP array: ["SET", "baz", "789"]
remote: [tester::#NA2] Received ["SET", "baz", "789"]
remote: [tester::#NA2] replica-3: Expecting "REPLCONF GETACK *" from Master
remote: [tester::#NA2] replica-3: Received bytes: "*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n"
remote: [tester::#NA2] replica-3: Received RESP array: ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] Received ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] replica-3: Not sending ACK to Master
remote: [tester::#NA2] Testing Replica : 4
remote: [tester::#NA2] replica-4: Expecting "SET baz 789" to be propagated
remote: [tester::#NA2] replica-4: Received bytes: "*3\r\n$3\r\nSET\r\n$3\r\nbaz\r\n$3\r\n789\r\n"
remote: [tester::#NA2] replica-4: Received RESP array: ["SET", "baz", "789"]
remote: [tester::#NA2] Received ["SET", "baz", "789"]
remote: [tester::#NA2] replica-4: Expecting "REPLCONF GETACK *" from Master
remote: [your_program] 2024-06-07 17:10:22 - Redis server get a request: b'*3\r\n$8\r\nREPLCONF\r\n$3\r\nACK\r\n$2\r\n99\r\n'
remote: [your_program] 2024-06-07 17:10:22 - Redis server get commands: [['REPLCONF', 'ACK', '99']]
remote: [your_program] 2024-06-07 17:10:22 - Command is running: ['replconf', 'ack', '99']
remote: [your_program] Master Get ACK  updated_replicas_number: 1
remote: [your_program] 2024-06-07 17:10:22 - Redis server get a request: b'*3\r\n$8\r\nREPLCONF\r\n$3\r\nACK\r\n$2\r\n99\r\n'
remote: [your_program] 2024-06-07 17:10:22 - Redis server get commands: [['REPLCONF', 'ACK', '99']]
remote: [your_program] 2024-06-07 17:10:22 - Command is running: ['replconf', 'ack', '99']
remote: [your_program] Master Get ACK  updated_replicas_number: 2
remote: [tester::#NA2] replica-4: Received bytes: "*3\r\n$8\r\nREPLCONF\r\n$6\r\nGETACK\r\n$1\r\n*\r\n"
remote: [tester::#NA2] replica-4: Received RESP array: ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] Received ["REPLCONF", "GETACK", "*"]
remote: [tester::#NA2] replica-4: Not sending ACK to Master
remote: [your_program] 2024-06-07 17:10:25 - response: b':2\r\n'
remote: [your_program] 2024-06-07 17:10:25 - Redis server sent command: b':2\r\n'
remote: [tester::#NA2] client: Received bytes: ":2\r\n"
remote: [tester::#NA2] client: Received RESP integer: 2
remote: [tester::#NA2] WAIT command returned after 3004 ms
remote: [tester::#NA2] Expected WAIT to return exactly after 1000 ms timeout elapsed.
remote: [tester::#NA2] Test failed
remote: [tester::#NA2] Terminating program
remote: [your_program] 2024-06-07 17:10:25 - Redis server get a request: b''
remote: [tester::#NA2] Program terminated successfully

I’m curious:

  • Is it possible which is because the ACK from the tester is too slow, causing the test to fail?
  • or does it make sense to send :0\r\n if ACK is not return immediately

@nissenyeh according to the logs it looks like an ACK is sent immediately (relevant part highlighted above). Can you try logging the exact bytes you receive from each replica before parsing and running logic? That might help narrow this down.

@rohitpaulk

Thanks, I did log exact bytes I receive

if I try to wait for more time(by useing Python time.sleep()), my program did accept ACK.
But it will cause timeout elapsed due to deliberately extend waiting time

remote: [tester::#NA2] client: $ redis-cli WAIT 3 2000
...
remote: [tester::#NA2] replica-1: Sending ACK to Master
remote: [tester::#NA2] replica-1: $ redis-cli REPLCONF ACK 99
remote: [tester::#NA2] replica-1: Sent bytes: "*3\r\n$8\r\nREPLCONF\r\n$3\r\nACK\r\n$2\r\n99\r\n"
...... (waiting for a while)
remote: [your_program] 2024-06-07 19:43:36 - Redis server get a request: b'*3\r\n$8\r\nREPLCONF\r\n$3\r\nACK\r\n$2\r\n99\r\n'
.....
remote: [tester::#NA2] client: Received bytes: ":2\r\n"
remote: [tester::#NA2] client: Received RESP integer: 2
remote: [tester::#NA2] WAIT command returned after 3000 ms
remote: [tester::#NA2] Expected WAIT to return exactly after 1000 ms timeout elapsed.

Could it be caused because the network connection latency between instances?

@rohitpaulk

here is my repo link and partial code. I don’t if it’s useful

https://git.codecrafters.io/599a5638cec9d65e

  • WAIT command
    elif 'wait' in commands[0]:
        num_replicas = int(commands[1])
        timeout = int(commands[2]) / 1000  # Convert to seconds
        start_time = time.time()

        # Check how many replicas are up-to-date
        all_synced_replicas = [replica for replica, replica_offset in replicas_offsets.items() if offset == replica_offset]

        print(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {len(all_synced_replicas)} replicas are up-to-date.")
        if len(all_synced_replicas) >= num_replicas:
            response = redis_protocol_encoder('int', len(replicas)).encode()
            return client_socket.send(f":{len(all_synced_replicas)}\r\n".encode())


        # If not enough, send GETACK to check all replicas
        updated_to_date_replicas_number = 0
        for index, replica in enumerate(replicas):
            try:
                command = ["REPLCONF", "GETACK", "*"]
                replica_message = redis_protocol_encoder('array', command).encode()
                respond(replica, replica_message)
            except Exception as e:
                print(f"Failed to send REPLCONF GETACK to replica: {e}")


        while time.time() - start_time <= timeout:
            if updated_to_date_replicas_number >= num_replicas:
                break
        # time.sleep(1)

        response = redis_protocol_encoder('int', updated_to_date_replicas_number).encode()
        print(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - response: {response}")
        client_socket.send(f":{updated_to_date_replicas_number}\r\n".encode())
        offset += len(replica_message)
  • ACK command
    elif 'replconf' in commands[0]:

        if role == 'master' and 'ack' in commands[1]:
            replica_offset = int(commands[2])
            print(f"offset: {offset}, replica_offset: {replica_offset}")
            replicas_offsets[client_socket] = replica_offset
            if replica_offset == offset: # if offset 
                updated_to_date_replicas_number += 1
            return 
        
        if role == 'master':
            response = f'+OK\r\n'.encode()
            return respond(client_socket, response)
        
        if role == 'slave' and 'getack' in commands[1]:

            remove_offset = redis_protocol_encoder('array', ["REPLCONF", "GETACK", "*"]).encode()
            res = ["REPLCONF", "ACK", str(offset - len(remove_offset))]
            response = redis_protocol_encoder('array',res).encode()
            # start_record_offset = True
            return respond(client_socket, response)

@rohitpaulk
Is any idea or suggestion? thank