Failed to decode gzip: Failed to decompress data: gzip: invalid header

Getting the error in the last stage of gzip compression.

Below is the error logs:

[tester::#CR8] Running tests for Stage #CR8 (HTTP Compression - Gzip compression)
[tester::#CR8] Running program
[tester::#CR8] $ ./your_server.sh
[your_program] Logs from your program will appear here!
[tester::#CR8] Connected to localhost port 4221
[tester::#CR8] $ curl -v http://localhost:4221/echo/pineapple -H "Accept-Encoding: gzip"
[tester::#CR8] > GET /echo/pineapple HTTP/1.1
[tester::#CR8] > Host: localhost:4221
[tester::#CR8] > Accept-Encoding: gzip
[tester::#CR8] >
[tester::#CR8] Sent bytes: "GET /echo/pineapple HTTP/1.1\r\nHost: localhost:4221\r\nAccept-Encoding: gzip\r\n\r\n"
[your_program] accepted connection from the ('127.0.0.1', 35680)
[your_program] data is GET /echo/pineapple HTTP/1.1
[your_program] Host: localhost:4221
[your_program] Accept-Encoding: gzip
[your_program]
[your_program]
[your_program] http_verb is:  GET
[your_program] accept encoding ['Accept-Encoding:', 'gzip']
[tester::#CR8] Received bytes: "HTTP/1.1 200 OK\r\nContent-Encoding: gzip\r\nContent-Type: text/plain\r\nContent-Length: 58\r\n\r\n1f8b08009ac3566602ff2bc8cc4b4d2c28c8490500f7e2b23509000000"
[tester::#CR8] < HTTP/1.1 200 OK
[tester::#CR8] < Content-Encoding: gzip
[tester::#CR8] < Content-Type: text/plain
[tester::#CR8] < Content-Length: 58
[tester::#CR8] <
[tester::#CR8] < 1f8b08009ac3566602ff2bc8cc4b4d2c28c8490500f7e2b23509000000
[tester::#CR8] <
[tester::#CR8] Received response with 200 status code
[your_program] gzip
[your_program] compressed data  1f8b08009ac3566602ff2bc8cc4b4d2c28c8490500f7e2b23509000000
[tester::#CR8] ✓ Content-Encoding header is present
[your_program] HTTP/1.1 200 OK
[tester::#CR8] ✓ Content-Length header is present
[your_program] Content-Encoding: gzip
[tester::#CR8] Failed to decode gzip: Failed to decompress data: gzip: invalid header
[tester::#CR8] Test failed
[tester::#CR8] Terminating program
[your_program] Content-Type: text/plain
[your_program] Content-Length: 58
[your_program]
[your_program] 1f8b08009ac3566602ff2bc8cc4b4d2c28c8490500f7e2b23509000000
[your_program]
[your_program] response is  HTTP/1.1 200 OK
[your_program] Content-Encoding: gzip
[your_program] Content-Type: text/plain
[your_program] Content-Length: 58
[your_program]
[your_program] 1f8b08009ac3566602ff2bc8cc4b4d2c28c8490500f7e2b23509000000
[your_program]
[your_program] /echo/pineapple
[your_program] HTTP/1.1 200 OK
[your_program] Content-Encoding: gzip
[your_program] Content-Type: text/plain
[your_program] Content-Length: 58
[your_program]
[your_program] 1f8b08009ac3566602ff2bc8cc4b4d2c28c8490500f7e2b23509000000
[your_program]
[tester::#CR8] Program terminated successfully

below is my code, can help to correct what is wrong that the last stage is failing with the error

import socket
import threading
import sys
import gzip


def create_server_codecrafter(host, port):
    with socket.create_server((host, port)) as socket_server:
        while True:
            connection, address = socket_server.accept()
            print(f"accepted connection from the {address}")
            # client_connection(connection)
            connection_thread = threading.Thread(
                target=client_connection, args=(connection,)
            )
            connection_thread.start()


def read_directory_data(filtered_data):
    directory = sys.argv[2]
    file_path = f"{directory}{filtered_data}"
    # read the file contents from the directory
    try:
        with open(file_path, "r") as f:
            data = f.read()
    except FileNotFoundError:
        data = None
    return data


def write_to_file(file_name, contents):
    # the idea is to read the data from the post request and write it to a file.
    directory = sys.argv[2]
    file_path = f"{directory}{file_name}"
    print(file_path)
    try:
        with open(file_path, "w") as f:
            f.write(contents)
    except FileNotFoundError as e:
        raise IndexError


def validate_encoding(filtered_data, request_data):
    accept_encoding = request_data[2].split(" ")
    print("accept encoding", accept_encoding)
    # print(type(accept_encoding))
    if "gzip" in accept_encoding:
        print("gzip ")
        compress_filtered_data = gzip.compress(filtered_data.encode("utf-8"))
        hex_data = (
            compress_filtered_data.hex()
        )  # codecs.encode(compress_filtered_data,'hex_codec')
        response = f"HTTP/1.1 200 OK\r\nContent-Encoding: gzip\r\nContent-Type: text/plain\r\nContent-Length: {len(hex_data)}\r\n\r\n{hex_data}\r\n"
        print("compressed data ",hex_data)
        # uncompressed_data = bytes.fromhex(hex_data)
        # decompressed_data = gzip.decompress(uncompressed_data)
        # print("decompressed data ",decompressed_data.decode())
    else:
        response = f"HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nContent-Length: {len(filtered_data)}\r\n\r\n{filtered_data}"
    print(response)
    return response


def get_http_process(filtered_data, request_data):
    content_type = "text/plain"
    if filtered_data == "/user-agent":
        filtered_data = request_data[2].split(" ")[1]
    elif filtered_data.startswith("/files"):
        filtered_data = filtered_data.split("/files/")[1]
        filtered_data = read_directory_data(filtered_data)
        content_type = "application/octet-stream"
        if filtered_data is None:
            raise IndexError
    elif filtered_data.startswith("/echo"):
        filtered_data = filtered_data.split("/echo/")[1]
        # fetch the accept_encoding value
        response = validate_encoding(filtered_data, request_data)
        print("response is ", response)
        return response
    else:
        raise IndexError

    response = f"HTTP/1.1 200 OK\r\nContent-Type: {content_type}\r\nContent-Length: {len(filtered_data)}\r\n\r\n{filtered_data}"
    return response


def client_connection(conn):
    data = conn.recv(
        1024
    ).decode()  # TODO: how can i convert this data to a hashmap? is it even possible?
    print("data is", data)
    try:
        request_data = data.split("\r\n")
        filtered_data = request_data[0].split(" ")[1]
        http_verb = request_data[0].split(" ")[0]
        print("http_verb is: ", http_verb)
        if http_verb == "GET":
            content_type = "text/plain"
            if filtered_data != "/":
                response = get_http_process(filtered_data, request_data)
            else:
                response = f"HTTP/1.1 200 OK\r\nContent-Type: {content_type}\r\nContent-Length: {len(filtered_data)}\r\n\r\n{filtered_data}"
        elif http_verb == "POST":
            # read the data from the server post request
            if filtered_data.startswith("/files"):
                filename = filtered_data.split("/files/")[1]
                print(filename)
                body = data.split("\r\n")[-1]
                print("body is:", body)
                # how to get the content body from the curl request
                write_to_file(filename, body)
            response = f"HTTP/1.1 201 Created\r\n\r\n"
        print(filtered_data)
        # TODO: what is a better way to read the header from the text?
    except IndexError:
        response = "HTTP/1.1 404 Not Found\r\n\r\n"
    print(response)
    conn.sendall(response.encode())
    conn.close()


def main():
    print("Logs from your program will appear here!")
    HOST = "localhost"
    PORT = 4221
    create_server_codecrafter(HOST, PORT)


if __name__ == "__main__":
    main()

Hey @mridulpant2010,

I think the issue is here is that the expected content isn’t the hex-encoded version of gzip-compressed data, rather gzip-compressed data itself.

Here’s one such usage I found in code examples:

if "gzip" in request.headers.get("accept-encoding"):
  response_body = gzip.compress(path_resources[2].encode())
  client_socket.sendall(f"HTTP/1.1 200 OK\r\nContent-Encoding: gzip\r\nContent-Type: text/plain\r\nContent-Length: {len(response_body)}\r\n\r\n".encode() + response_body)

1 Like

Hey @rohitpaulk , thanks for the solution. It worked for me.

Cheers

1 Like

This topic was automatically closed 5 days after the last reply. New replies are no longer allowed.