SQLite retrieving data using index times out when testing

I’m stuck on Stage #nz8.

I’ve tried testing the test queries manually using the 1GB database, everything seems to be fast and not exceed 1 second (I know testing environment is different), but I keep getting 20s timeout when running codecrafters test

Here are my logs:

include relevant logs here (please make sure to keep the backticks around this!)

And here’s a snippet of my code for scanning the index pages and reading each rowId from table pages:


void BTreeNode::readRowById(std::ifstream &file, Metadata& metadata, unsigned long pageNumber, Command& command, unsigned long id) {
    std::vector<unsigned short> offsets;
    unsigned long pageOffset = (pageNumber - 1) * metadata.pageSize;

    for (int i = 0; i < numberOfCells; i++)
    {
        offsets.push_back(Util::readShort(file));
    }

    for (auto offset : offsets)
    {
        file.seekg(pageOffset + offset);
        
        if (type == LEAF_NODE) {
            // payload size
            Util::readVarInt(file);
            
            // row id
            unsigned long rowId = Util::readVarInt(file);

            if (rowId != id)
                continue ;

            
            std::vector<unsigned long> serialTypes = parseRecordHeader(file);

            int i = 0;
            bool conditionFailed = false;
            std::map<std::string, std::string> values;

            for (auto serialType : serialTypes)
            {
                unsigned long columnLength;
                std::string columnType;

                if (serialType >= 13 && serialType % 2 == 1)
                {
                    columnLength = (serialType - 13) / 2;
                    columnType = "string";
                }
                else if (serialType >= 12 && serialType % 2 == 0)
                {
                    columnLength = (serialType - 12) / 2;
                    columnType = "blob";
                }
                else
                {
                    columnLength = serialType;
                    columnType = "int";

                    if (serialType == 5)
                        columnLength = 6;
                    if (serialType == 6)
                        columnLength = 8;
                }

                std::string value;
                if (columnType == "int")
                {
                    unsigned long val;

                    if (columnLength == 1)
                        val = Util::readByte(file);
                    else if (columnLength == 2)
                        val = Util::readShort(file);
                    else if (columnLength == 3)
                        val = Util::readMidInt(file);
                    else if (columnLength == 4)
                        val = Util::readInt(file);
                    else if (columnLength == 6)
                        val = Util::readMidLong(file);
                    else if (columnLength == 8)
                        val = Util::readLong(file);
                    else
                        val = 0;
                    value = std::to_string(val);
                }
                else
                {
                    value = Util::readString(file, columnLength);
                }

                values[metadata.columns[command.table][i]] = value;
                i++;
            }
            
            bool isFirst = true;

            values["id"] = std::to_string(rowId);

            if (!conditionFailed) {
                // display values
                for (auto column: command.columns) {
                    if (!isFirst)
                        std::cout << "|";
                    else
                        isFirst = false;
                    std::cout << values[column];
                }
                std::cout << std::endl;
            }
            return ;
        }
        else if (type == INTERIOR_NODE) {
            unsigned long leftChildPageNumber = Util::readInt(file);
            unsigned long key = Util::readVarInt(file);

            if (id < key) {
                BTreeNode childPage;
    
                file.seekg((leftChildPageNumber - 1) * metadata.pageSize);
                childPage.parseHeader(file, metadata);
                childPage.readRowById(file, metadata, leftChildPageNumber, command, id);
                return ;
            }
        }
    }

    if (type == INTERIOR_NODE) {
            BTreeNode childPage;

            file.seekg((rightMostPointer - 1) * metadata.pageSize);
            childPage.parseHeader(file, metadata);
            childPage.readRowById(file, metadata, rightMostPointer, command, id);
    }
}


std::vector<unsigned long> BTreeNode::scanIndex(std::ifstream &file, Metadata& metadata, unsigned long pageNumber, Command& command) {
    std::vector<unsigned short> offsets;
    std::vector<unsigned long> validIndexes;
    unsigned long pageOffset = (pageNumber - 1) * metadata.pageSize;

    for (int i = 0; i < numberOfCells; i++)
    {
        offsets.push_back(Util::readShort(file));
    }
    
    if (type == LEAF_INDEX_NODE) {
        for (auto offset : offsets)
        {
            file.seekg(pageOffset + offset);

            // payload size
            Util::readVarInt(file);
    
            std::vector<unsigned long> serialTypes = parseRecordHeader(file);
    
            std::vector<std::string> values = parseValues(file, serialTypes);
    
            if (values[0] == command.condition.second)
                validIndexes.push_back(std::stoul(values[1]));
        }
    }
    else if (type == INTERIOR_INDEX_NODE) {
        // use signed integers to avoid unsigned underflow
        int start = 0;
        int end = numberOfCells - 1;
        int mid;
        unsigned long targetChildPage = 0;
        
        // binary search for page to visit
        while (start <= end) {
            mid = (start + end) / 2;
            
            file.seekg(pageOffset + offsets[mid]);

            unsigned long childPageNumber = Util::readInt(file);
    
            Util::readVarInt(file);
    
            std::vector<unsigned long> serialTypes = parseRecordHeader(file);
    
            std::vector<std::string> values = parseValues(file, serialTypes);
            
            if (values[0] == command.condition.second) {
                validIndexes.push_back(std::stoul(values[1]));
                targetChildPage = childPageNumber;
                break;
            }
            else if (values[0] > command.condition.second) {
                targetChildPage = childPageNumber;
                end = mid - 1;
            } else {
                start = mid + 1;
            }
        }

        // no exact match found, check the rightmost pointer
        if (targetChildPage == 0 && rightMostPointer > 0) {
            targetChildPage = rightMostPointer;
        }

        if (targetChildPage > 0) {
            BTreeNode childPage;
    
            file.seekg((targetChildPage - 1) * metadata.pageSize);
            childPage.parseHeader(file, metadata);

            std::vector<unsigned long> childIndexes = childPage.scanIndex(file, metadata, targetChildPage, command);

            validIndexes.insert(validIndexes.end(), childIndexes.begin(), childIndexes.end());
        }
    }
    
    return validIndexes;
}

Hey @Qirall79, I tried running your code against the previous stages, but it’s actually no longer passing a previous stage #RF3 (Filter data with a WHERE clause).

Suggestions:

  1. Use our CLI to test against previous stages by running:
codecrafters test --previous
  1. Focus on fixing the early stages first, as later stages depend on them.

Looks like there is something wrong with our tester as well. Looking into it.

Hello, I just noticed I didn’t push my latest version. It passes the previoud stages now, but the timeout is still an issue for the last stage.

Yes, we’ve confirmed the timeout issue is on our end, and we’re working on a fix. We’ll update you as soon as it’s resolved.

2 Likes

Thank you !

@Qirall79 The timeout issue should now be resolved. Please let us know if you’re still running into it.