I’m stuck on Stage #nz8.
I’ve tried testing the test queries manually using the 1GB database, everything seems to be fast and not exceed 1 second (I know testing environment is different), but I keep getting 20s timeout when running codecrafters test
Here are my logs:
include relevant logs here (please make sure to keep the backticks around this!)
And here’s a snippet of my code for scanning the index pages and reading each rowId from table pages:
void BTreeNode::readRowById(std::ifstream &file, Metadata& metadata, unsigned long pageNumber, Command& command, unsigned long id) {
std::vector<unsigned short> offsets;
unsigned long pageOffset = (pageNumber - 1) * metadata.pageSize;
for (int i = 0; i < numberOfCells; i++)
{
offsets.push_back(Util::readShort(file));
}
for (auto offset : offsets)
{
file.seekg(pageOffset + offset);
if (type == LEAF_NODE) {
// payload size
Util::readVarInt(file);
// row id
unsigned long rowId = Util::readVarInt(file);
if (rowId != id)
continue ;
std::vector<unsigned long> serialTypes = parseRecordHeader(file);
int i = 0;
bool conditionFailed = false;
std::map<std::string, std::string> values;
for (auto serialType : serialTypes)
{
unsigned long columnLength;
std::string columnType;
if (serialType >= 13 && serialType % 2 == 1)
{
columnLength = (serialType - 13) / 2;
columnType = "string";
}
else if (serialType >= 12 && serialType % 2 == 0)
{
columnLength = (serialType - 12) / 2;
columnType = "blob";
}
else
{
columnLength = serialType;
columnType = "int";
if (serialType == 5)
columnLength = 6;
if (serialType == 6)
columnLength = 8;
}
std::string value;
if (columnType == "int")
{
unsigned long val;
if (columnLength == 1)
val = Util::readByte(file);
else if (columnLength == 2)
val = Util::readShort(file);
else if (columnLength == 3)
val = Util::readMidInt(file);
else if (columnLength == 4)
val = Util::readInt(file);
else if (columnLength == 6)
val = Util::readMidLong(file);
else if (columnLength == 8)
val = Util::readLong(file);
else
val = 0;
value = std::to_string(val);
}
else
{
value = Util::readString(file, columnLength);
}
values[metadata.columns[command.table][i]] = value;
i++;
}
bool isFirst = true;
values["id"] = std::to_string(rowId);
if (!conditionFailed) {
// display values
for (auto column: command.columns) {
if (!isFirst)
std::cout << "|";
else
isFirst = false;
std::cout << values[column];
}
std::cout << std::endl;
}
return ;
}
else if (type == INTERIOR_NODE) {
unsigned long leftChildPageNumber = Util::readInt(file);
unsigned long key = Util::readVarInt(file);
if (id < key) {
BTreeNode childPage;
file.seekg((leftChildPageNumber - 1) * metadata.pageSize);
childPage.parseHeader(file, metadata);
childPage.readRowById(file, metadata, leftChildPageNumber, command, id);
return ;
}
}
}
if (type == INTERIOR_NODE) {
BTreeNode childPage;
file.seekg((rightMostPointer - 1) * metadata.pageSize);
childPage.parseHeader(file, metadata);
childPage.readRowById(file, metadata, rightMostPointer, command, id);
}
}
std::vector<unsigned long> BTreeNode::scanIndex(std::ifstream &file, Metadata& metadata, unsigned long pageNumber, Command& command) {
std::vector<unsigned short> offsets;
std::vector<unsigned long> validIndexes;
unsigned long pageOffset = (pageNumber - 1) * metadata.pageSize;
for (int i = 0; i < numberOfCells; i++)
{
offsets.push_back(Util::readShort(file));
}
if (type == LEAF_INDEX_NODE) {
for (auto offset : offsets)
{
file.seekg(pageOffset + offset);
// payload size
Util::readVarInt(file);
std::vector<unsigned long> serialTypes = parseRecordHeader(file);
std::vector<std::string> values = parseValues(file, serialTypes);
if (values[0] == command.condition.second)
validIndexes.push_back(std::stoul(values[1]));
}
}
else if (type == INTERIOR_INDEX_NODE) {
// use signed integers to avoid unsigned underflow
int start = 0;
int end = numberOfCells - 1;
int mid;
unsigned long targetChildPage = 0;
// binary search for page to visit
while (start <= end) {
mid = (start + end) / 2;
file.seekg(pageOffset + offsets[mid]);
unsigned long childPageNumber = Util::readInt(file);
Util::readVarInt(file);
std::vector<unsigned long> serialTypes = parseRecordHeader(file);
std::vector<std::string> values = parseValues(file, serialTypes);
if (values[0] == command.condition.second) {
validIndexes.push_back(std::stoul(values[1]));
targetChildPage = childPageNumber;
break;
}
else if (values[0] > command.condition.second) {
targetChildPage = childPageNumber;
end = mid - 1;
} else {
start = mid + 1;
}
}
// no exact match found, check the rightmost pointer
if (targetChildPage == 0 && rightMostPointer > 0) {
targetChildPage = rightMostPointer;
}
if (targetChildPage > 0) {
BTreeNode childPage;
file.seekg((targetChildPage - 1) * metadata.pageSize);
childPage.parseHeader(file, metadata);
std::vector<unsigned long> childIndexes = childPage.scanIndex(file, metadata, targetChildPage, command);
validIndexes.insert(validIndexes.end(), childIndexes.begin(), childIndexes.end());
}
}
return validIndexes;
}