关于数据库:实现一个简单的Database12译文

GreatSQL社区原创内容未经受权不得随便应用，转载请分割小编并注明起源。
GreatSQL是MySQL的国产分支版本，应用上与MySQL统一。
作者：花家舍
文章起源：GreatSQL社区原创

前文回顾

实现一个简略的Database系列

译注：cstack在github保护了一个简略的、相似sqlite的数据库实现，通过这个简略的我的项目，能够很好的了解数据库是如何运行的。本文是第十二篇，次要是实现扫描多级B-Tree

## Part 12 扫描多级B-Tree

咱们当初反对构建一个多级B-Tree，然而咱们在程序中毁坏了 select 语句。上面是一个插入15行数据并打印进去的测例。

+  it 'prints all rows in a multi-level tree' do+    script = []+    (1..15).each do |i|+      script << "insert #{i} user#{i} person#{i}@example.com"+    end+    script << "select"+    script << ".exit"+    result = run_script(script)++    expect(result[15...result.length]).to match_array([+      "db > (1, user1, person1@example.com)",+      "(2, user2, person2@example.com)",+      "(3, user3, person3@example.com)",+      "(4, user4, person4@example.com)",+      "(5, user5, person5@example.com)",+      "(6, user6, person6@example.com)",+      "(7, user7, person7@example.com)",+      "(8, user8, person8@example.com)",+      "(9, user9, person9@example.com)",+      "(10, user10, person10@example.com)",+      "(11, user11, person11@example.com)",+      "(12, user12, person12@example.com)",+      "(13, user13, person13@example.com)",+      "(14, user14, person14@example.com)",+      "(15, user15, person15@example.com)",+      "Executed.", "db > ",+    ])+  end

然而当初当咱们运行这个测例的时候，它实际上产生的是：

db > select(2, user1, person1@example.com)Executed.

这很奇怪。它只打印了一行，而且数据看起来产生了损坏（留神输入的 id 和 username 不匹配）。

这个奇怪之处是因为 execute_select() 函数在表的开始地位执行，咱们当初的实现 table_start() 函数返回根节点的第0个单元格。然而当初树的跟节点是一个外部节点，它并不蕴含任何数据行。打印出的数据是根节点当初作为叶子节点时遗留下的。execute_select() 函数应该返回的是最左侧的叶子节点的第0个单元格。

所以当初摈弃旧的实现：

-Cursor* table_start(Table* table) {-  Cursor* cursor = malloc(sizeof(Cursor));-  cursor->table = table;-  cursor->page_num = table->root_page_num;-  cursor->cell_num = 0;--  void* root_node = get_page(table->pager, table->root_page_num);-  uint32_t num_cells = *leaf_node_num_cells(root_node);-  cursor->end_of_table = (num_cells == 0);--  return cursor;-}

并且增加一个新的实现来搜寻 key 0 (可能存在的最小的 key)。即便在表中 key 0不存在，这个办法也会返回最小id的地位（最左叶子节点的开始地位）。

+Cursor* table_start(Table* table) {+  Cursor* cursor =  table_find(table, 0);++  void* node = get_page(table->pager, cursor->page_num);+  uint32_t num_cells = *leaf_node_num_cells(node);+  cursor->end_of_table = (num_cells == 0);++  return cursor;+}

有了这些批改，测例依然只打印出一个节点的行数：

db > select(1, user1, person1@example.com)(2, user2, person2@example.com)(3, user3, person3@example.com)(4, user4, person4@example.com)(5, user5, person5@example.com)(6, user6, person6@example.com)(7, user7, person7@example.com)Executed.db >

有15个条目，B-tree蕴含了一个外部节点和两个叶子节点，看起来就是上面的样子：

structure of our btree

为了遍历整个表，咱们须要在遍历到第一个节点的结尾时跳到第二个叶子节点（持续遍历）。为了可能这样，咱们须要在叶子节点的头部保留一个叫做“next_leaf”的字段，这个字段将保留左边兄弟叶子节点的 page number。而最右侧的叶子节点中next_leaf字段将保留 0 值，来示意它没有兄弟节点（无论如何，page 0 是为表的根节点保留的）。

更新叶子节点的头部格局来蕴含新的字段：

const uint32_t LEAF_NODE_NUM_CELLS_SIZE = sizeof(uint32_t);const uint32_t LEAF_NODE_NUM_CELLS_OFFSET = COMMON_NODE_HEADER_SIZE;-const uint32_t LEAF_NODE_HEADER_SIZE =-    COMMON_NODE_HEADER_SIZE + LEAF_NODE_NUM_CELLS_SIZE;+const uint32_t LEAF_NODE_NEXT_LEAF_SIZE = sizeof(uint32_t);+const uint32_t LEAF_NODE_NEXT_LEAF_OFFSET =+    LEAF_NODE_NUM_CELLS_OFFSET + LEAF_NODE_NUM_CELLS_SIZE;+const uint32_t LEAF_NODE_HEADER_SIZE = COMMON_NODE_HEADER_SIZE ++                                       LEAF_NODE_NUM_CELLS_SIZE ++                                       LEAF_NODE_NEXT_LEAF_SIZE;

增加一个办法来拜访这个字段：

+uint32_t* leaf_node_next_leaf(void* node) {+  return node + LEAF_NODE_NEXT_LEAF_OFFSET;+}

在初始化一个新的叶子节点的时候，设置 next_leaf 字段的默认值值为0：

@@ -322,6 +330,7 @@ void initialize_leaf_node(void* node) {   set_node_type(node, NODE_LEAF);   set_node_root(node, false);   *leaf_node_num_cells(node) = 0;+  *leaf_node_next_leaf(node) = 0;  // 0 represents no sibling }

每当决裂叶子节点时，更新同级兄弟节点的指针。老叶子节点的兄弟变成了新叶子节点，而新叶子节点的兄弟变成了旧叶子节点的兄弟。

@@ -659,6 +671,8 @@ void leaf_node_split_and_insert(Cursor* cursor, uint32_t key, Row* value) {   uint32_t new_page_num = get_unused_page_num(cursor->table->pager);   void* new_node = get_page(cursor->table->pager, new_page_num);   initialize_leaf_node(new_node);+  *leaf_node_next_leaf(new_node) = *leaf_node_next_leaf(old_node);+  *leaf_node_next_leaf(old_node) = new_page_num;

减少几个新的字段，扭转几个变量：

it 'prints constants' do  script = [    ".constants",@@ -199,9 +228,9 @@ describe 'database' do    "db > Constants:",    "ROW_SIZE: 293",    "COMMON_NODE_HEADER_SIZE: 6",-      "LEAF_NODE_HEADER_SIZE: 10",+      "LEAF_NODE_HEADER_SIZE: 14",    "LEAF_NODE_CELL_SIZE: 297",-      "LEAF_NODE_SPACE_FOR_CELLS: 4086",+      "LEAF_NODE_SPACE_FOR_CELLS: 4082",    "LEAF_NODE_MAX_CELLS: 13",    "db > ",  ])

当初，每当咱们想将游标推动到叶子节点的开端时，就能够查看叶子节点是否有兄弟节点。如果有，跳到兄弟节点。否则，咱们就完结在表的开端。

@@ -428,7 +432,15 @@ void cursor_advance(Cursor* cursor) {   cursor->cell_num += 1;   if (cursor->cell_num >= (*leaf_node_num_cells(node))) {-    cursor->end_of_table = true;+    /* Advance to next leaf node */+    uint32_t next_page_num = *leaf_node_next_leaf(node);+    if (next_page_num == 0) {+      /* This was rightmost leaf */+      cursor->end_of_table = true;+    } else {+      cursor->page_num = next_page_num;+      cursor->cell_num = 0;+    }   } }

有了这些更改之后，咱们实际上就能够打印 15 行了...

db > select(1, user1, person1@example.com)(2, user2, person2@example.com)(3, user3, person3@example.com)(4, user4, person4@example.com)(5, user5, person5@example.com)(6, user6, person6@example.com)(7, user7, person7@example.com)(8, user8, person8@example.com)(9, user9, person9@example.com)(10, user10, person10@example.com)(11, user11, person11@example.com)(12, user12, person12@example.com)(13, user13, person13@example.com)(1919251317, 14, on14@example.com)(15, user15, person15@example.com)Executed.db >

...然而有一行数据如同损坏了。

(1919251317, 14, on14@example.com)

在做几次调试之后，我发现问题是因为在决裂叶子节点时的一个bug导致的：

@@ -676,7 +690,9 @@ void leaf_node_split_and_insert(Cursor* cursor, uint32_t key, Row* value) {     void* destination = leaf_node_cell(destination_node, index_within_node);     if (i == cursor->cell_num) {-      serialize_row(value, destination);+      serialize_row(value,+                    leaf_node_value(destination_node, index_within_node));+      *leaf_node_key(destination_node, index_within_node) = key;     } else if (i > cursor->cell_num) {       memcpy(destination, leaf_node_cell(old_node, i - 1), LEAF_NODE_CELL_SIZE);     } else {

请记住，叶节点中的每个单元格首先蕴含一个键，而后蕴含一个值：

Original leaf node format

咱们将新行（值）写入单元格的结尾，键应该放在那里。这意味着用户名(username)的一部分进入了id局部(因为疯狂的大id)。在批改bug之后，咱们终于按预期打印出了整个表格。

db > select(1, user1, person1@example.com)(2, user2, person2@example.com)(3, user3, person3@example.com)(4, user4, person4@example.com)(5, user5, person5@example.com)(6, user6, person6@example.com)(7, user7, person7@example.com)(8, user8, person8@example.com)(9, user9, person9@example.com)(10, user10, person10@example.com)(11, user11, person11@example.com)(12, user12, person12@example.com)(13, user13, person13@example.com)(14, user14, person14@example.com)(15, user15, person15@example.com)Executed.db >

哇哦！一个又一个BUG，但咱们正在获得停顿。直到下次。

Enjoy GreatSQL :)

## 对于 GreatSQL

GreatSQL是由万里数据库保护的MySQL分支，专一于晋升MGR可靠性及性能，反对InnoDB并行查问个性，是实用于金融级利用的MySQL分支版本。

相干链接： GreatSQL社区 Gitee GitHub Bilibili

GreatSQL社区：

社区博客有奖征稿详情：https://greatsql.cn/thread-100-1-1.html

技术交换群：

微信：扫码增加GreatSQL社区助手微信好友，发送验证信息加群。