Commit fdafe0ba authored by Andreas Schmidt's avatar Andreas Schmidt
Browse files

Add BPTree.

parent 757273c9
......@@ -17,4 +17,15 @@
#define N_START 4
#define N_P_START 1
// Uncomment the line below if you are compiling on Windows.
// #define WINDOWS
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#ifdef WINDOWS
#define bool char
#define false 0
#define true 1
#endif
#endif //PRRT_DEFINES_H
add_library(UTIL ../defines.h common.c common.h list.c list.h dbg.h)
\ No newline at end of file
add_library(UTIL ../defines.h common.c common.h list.c list.h dbg.h bptree.c bptree.h)
\ No newline at end of file
/*
*
* bpt: B+ Tree Implementation
* Copyright (C) 2010 Amittai Aviram http://www.amittai.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*
* Author: Amittai Aviram
* http://www.amittai.com
* amittai.aviram@gmail.edu or afa13@columbia.edu
* Senior Software Engineer
* MathWorks, Inc.
* 3 Apple Hill Drive
* Natick, MA 01760
* Original Date: 26 June 2010
* Last modified: 15 April 2014
*
* This implementation is based on the original code by Amittai. It is significantly changed to use it in a productive
* environment.
*
* Must be compiled with a C99-compliant C compiler such as the latest GCC.
*
*/
#include "bptree.h"
// Default order is 4.
#define DEFAULT_ORDER 4
// TYPES.
/* Type representing the BPTreeRecord to which a given key refers.
*/
typedef struct {
void* value;
} BPTreeRecord;
// GLOBALS.
/* The order determines the maximum and minimum number of entries (keys and pointers) in any node. Every node has at
* most order - 1 keys and at least (roughly speaking) half that number. Every leaf has as many pointers to data as
* keys, and every internal node has one more pointer to a subtree than the number of keys. This global variable is
* initialized to the default value.
*/
int order = DEFAULT_ORDER;
/* The queue is used to print the tree in
* level order, starting from the root
* printing each entire rank on a separate
* line, finishing with the leaves.
*/
BPTreeNode * queue = NULL;
// FUNCTION PROTOTYPES.
// Output and utility.
void enqueue( BPTreeNode * new_node );
BPTreeNode * dequeue( void );
int path_to_root( BPTreeNode * root, BPTreeNode * child );
int find_range(BPTreeNode *root, int key_start, int key_end, int returned_keys[], void *returned_pointers[]);
BPTreeNode *find_leaf(BPTreeNode *root, int key);
BPTreeRecord *find(BPTreeNode *root, int key);
int cut( int length );
// Insertion.
BPTreeRecord * make_record(void *value);
BPTreeNode * make_node( void );
BPTreeNode * make_leaf( void );
int get_left_index(BPTreeNode * parent, BPTreeNode * left);
BPTreeNode * insert_into_leaf( BPTreeNode * leaf, int key, BPTreeRecord * pointer );
BPTreeNode * insert_into_leaf_after_splitting(BPTreeNode * root, BPTreeNode * leaf, int key, BPTreeRecord * pointer);
BPTreeNode * insert_into_node(BPTreeNode * root, BPTreeNode * parent,
int left_index, int key, BPTreeNode * right);
BPTreeNode * insert_into_node_after_splitting(BPTreeNode * root, BPTreeNode * parent, int left_index,
int key, BPTreeNode * right);
BPTreeNode * insert_into_parent(BPTreeNode * root, BPTreeNode * left, int key, BPTreeNode * right);
BPTreeNode * insert_into_new_root(BPTreeNode * left, int key, BPTreeNode * right);
BPTreeNode * start_new_tree(int key, BPTreeRecord * pointer);
// Deletion.
int get_neighbor_index( BPTreeNode * n );
BPTreeNode * adjust_root(BPTreeNode * root);
BPTreeNode * coalesce_nodes(BPTreeNode * root, BPTreeNode * n, BPTreeNode * neighbor, int neighbor_index, int k_prime);
BPTreeNode * redistribute_nodes(BPTreeNode * root, BPTreeNode * n, BPTreeNode * neighbor, int neighbor_index,
int k_prime_index, int k_prime);
BPTreeNode * delete_entry( BPTreeNode * root, BPTreeNode * n, int key, void * pointer );
// FUNCTION DEFINITIONS.
/* Helper function for printing the
* tree out. See BPTree_print.
*/
void enqueue( BPTreeNode * new_node ) {
BPTreeNode * c;
if (queue == NULL) {
queue = new_node;
queue->next = NULL;
}
else {
c = queue;
while(c->next != NULL) {
c = c->next;
}
c->next = new_node;
new_node->next = NULL;
}
}
/* Helper function for printing the
* tree out. See BPTree_print.
*/
BPTreeNode * dequeue( void ) {
BPTreeNode * n = queue;
queue = queue->next;
n->next = NULL;
return n;
}
/* Utility function to give the height
* of the tree, which length in number of edges
* of the path from the root to any leaf.
*/
int BPTree_height(BPTreeNode *root) {
int h = 0;
BPTreeNode * c = root;
while (!c->is_leaf) {
c = c->pointers[0];
h++;
}
return h;
}
/* Utility function to give the length in edges
* of the path from any node to the root.
*/
int path_to_root( BPTreeNode * root, BPTreeNode * child ) {
int length = 0;
BPTreeNode * c = child;
while (c != root) {
c = c->parent;
length++;
}
return length;
}
/* Prints the B+ tree in the command line in level (rank) order, with the keys in each node and the '|' symbol to
* separate nodes.
*/
void BPTree_print(BPTreeNode *root) {
BPTreeNode * n = NULL;
int i = 0;
int rank = 0;
int new_rank = 0;
if (root == NULL) {
printf("Empty tree.\n");
return;
}
queue = NULL;
enqueue(root);
while( queue != NULL ) {
n = dequeue();
if (n->parent != NULL && n == n->parent->pointers[0]) {
new_rank = path_to_root( root, n );
if (new_rank != rank) {
rank = new_rank;
printf("\n");
}
}
for (i = 0; i < n->num_keys; i++) {
printf("%d ", n->keys[i]);
}
if (!n->is_leaf)
for (i = 0; i <= n->num_keys; i++)
enqueue(n->pointers[i]);
printf("| ");
}
printf("\n");
}
/* Finds keys and their pointers, if present, in the range specified
* by key_start and key_end, inclusive. Places these in the arrays
* returned_keys and returned_pointers, and returns the number of
* entries found.
*/
int find_range(BPTreeNode *root, int key_start, int key_end, int returned_keys[], void *returned_pointers[]) {
int i, num_found;
num_found = 0;
BPTreeNode * n = find_leaf(root, key_start);
if (n == NULL) return 0;
for (i = 0; i < n->num_keys && n->keys[i] < key_start; i++) ;
if (i == n->num_keys) return 0;
while (n != NULL) {
for ( ; i < n->num_keys && n->keys[i] <= key_end; i++) {
returned_keys[num_found] = n->keys[i];
returned_pointers[num_found] = n->pointers[i];
num_found++;
}
n = n->pointers[order - 1];
i = 0;
}
return num_found;
}
/* Traces the path from the root to a leaf, searching by key. Returns the leaf containing the given key.
*/
BPTreeNode *find_leaf(BPTreeNode *root, int key) {
int i = 0;
BPTreeNode * c = root;
if (c == NULL) {
return c;
}
while (!c->is_leaf) {
i = 0;
while (i < c->num_keys) {
if (key >= c->keys[i]) i++;
else break;
}
c = (BPTreeNode *)c->pointers[i];
}
return c;
}
/* Finds and returns the BPTreeRecord to which a key refers.
*/
BPTreeRecord *find(BPTreeNode *root, int key) {
int i = 0;
BPTreeNode * c = find_leaf(root, key);
if (c == NULL) return NULL;
for (i = 0; i < c->num_keys; i++)
if (c->keys[i] == key) break;
if (i == c->num_keys)
return NULL;
else
return (BPTreeRecord *)c->pointers[i];
}
/* Finds the appropriate place to split a node that is too big into two.
*/
int cut( int length ) {
if (length % 2 == 0)
return length/2;
else
return length/2 + 1;
}
// INSERTION
/* Creates a new BPTreeRecord to hold the value to which a key refers.
*/
BPTreeRecord * make_record(void *value) {
BPTreeRecord * new_record = (BPTreeRecord *)malloc(sizeof(BPTreeRecord));
if (new_record == NULL) {
perror("Record creation.");
exit(EXIT_FAILURE);
}
else {
new_record->value = value;
}
return new_record;
}
/* Creates a new general node, which can be adapted to serve as either a leaf or an internal node.
*/
BPTreeNode * make_node( void ) {
BPTreeNode * new_node;
new_node = malloc(sizeof(BPTreeNode));
if (new_node == NULL) {
perror("Node creation.");
exit(EXIT_FAILURE);
}
new_node->keys = malloc( (order - 1) * sizeof(int) );
if (new_node->keys == NULL) {
perror("New node keys array.");
exit(EXIT_FAILURE);
}
new_node->pointers = malloc( order * sizeof(void *) );
if (new_node->pointers == NULL) {
perror("New node pointers array.");
exit(EXIT_FAILURE);
}
new_node->is_leaf = false;
new_node->num_keys = 0;
new_node->parent = NULL;
new_node->next = NULL;
return new_node;
}
/* Creates a new leaf by creating a node
* and then adapting it appropriately.
*/
BPTreeNode * make_leaf( void ) {
BPTreeNode * leaf = make_node();
leaf->is_leaf = true;
return leaf;
}
/* Helper function used in insert_into_parent
* to find the index of the parent's pointer to
* the node to the left of the key to be inserted.
*/
int get_left_index(BPTreeNode * parent, BPTreeNode * left) {
int left_index = 0;
while (left_index <= parent->num_keys &&
parent->pointers[left_index] != left)
left_index++;
return left_index;
}
/* Inserts a new pointer to a BPTreeRecord and its corresponding
* key into a leaf.
* Returns the altered leaf.
*/
BPTreeNode * insert_into_leaf( BPTreeNode * leaf, int key, BPTreeRecord * pointer ) {
int i, insertion_point;
insertion_point = 0;
while (insertion_point < leaf->num_keys && leaf->keys[insertion_point] < key)
insertion_point++;
for (i = leaf->num_keys; i > insertion_point; i--) {
leaf->keys[i] = leaf->keys[i - 1];
leaf->pointers[i] = leaf->pointers[i - 1];
}
leaf->keys[insertion_point] = key;
leaf->pointers[insertion_point] = pointer;
leaf->num_keys++;
return leaf;
}
/* Inserts a new key and pointer
* to a new BPTreeRecord into a leaf so as to exceed
* the tree's order, causing the leaf to be split
* in half.
*/
BPTreeNode * insert_into_leaf_after_splitting(BPTreeNode * root, BPTreeNode * leaf, int key, BPTreeRecord * pointer) {
BPTreeNode * new_leaf;
int * temp_keys;
void ** temp_pointers;
int insertion_index, split, new_key, i, j;
new_leaf = make_leaf();
temp_keys = malloc( order * sizeof(int) );
if (temp_keys == NULL) {
perror("Temporary keys array.");
exit(EXIT_FAILURE);
}
temp_pointers = malloc( order * sizeof(void *) );
if (temp_pointers == NULL) {
perror("Temporary pointers array.");
exit(EXIT_FAILURE);
}
insertion_index = 0;
while (insertion_index < order - 1 && leaf->keys[insertion_index] < key)
insertion_index++;
for (i = 0, j = 0; i < leaf->num_keys; i++, j++) {
if (j == insertion_index) j++;
temp_keys[j] = leaf->keys[i];
temp_pointers[j] = leaf->pointers[i];
}
temp_keys[insertion_index] = key;
temp_pointers[insertion_index] = pointer;
leaf->num_keys = 0;
split = cut(order - 1);
for (i = 0; i < split; i++) {
leaf->pointers[i] = temp_pointers[i];
leaf->keys[i] = temp_keys[i];
leaf->num_keys++;
}
for (i = split, j = 0; i < order; i++, j++) {
new_leaf->pointers[j] = temp_pointers[i];
new_leaf->keys[j] = temp_keys[i];
new_leaf->num_keys++;
}
free(temp_pointers);
free(temp_keys);
new_leaf->pointers[order - 1] = leaf->pointers[order - 1];
leaf->pointers[order - 1] = new_leaf;
for (i = leaf->num_keys; i < order - 1; i++)
leaf->pointers[i] = NULL;
for (i = new_leaf->num_keys; i < order - 1; i++)
new_leaf->pointers[i] = NULL;
new_leaf->parent = leaf->parent;
new_key = new_leaf->keys[0];
return insert_into_parent(root, leaf, new_key, new_leaf);
}
/* Inserts a new key and pointer to a node
* into a node into which these can fit
* without violating the B+ tree properties.
*/
BPTreeNode * insert_into_node(BPTreeNode * root, BPTreeNode * n,
int left_index, int key, BPTreeNode * right) {
int i;
for (i = n->num_keys; i > left_index; i--) {
n->pointers[i + 1] = n->pointers[i];
n->keys[i] = n->keys[i - 1];
}
n->pointers[left_index + 1] = right;
n->keys[left_index] = key;
n->num_keys++;
return root;
}
/* Inserts a new key and pointer to a node
* into a node, causing the node's size to exceed
* the order, and causing the node to split into two.
*/
BPTreeNode * insert_into_node_after_splitting(BPTreeNode * root, BPTreeNode * old_node, int left_index,
int key, BPTreeNode * right) {
int i, j, split, k_prime;
BPTreeNode * new_node, * child;
int * temp_keys;
BPTreeNode ** temp_pointers;
/* First create a temporary set of keys and pointers
* to hold everything in order, including
* the new key and pointer, inserted in their
* correct places.
* Then create a new node and copy half of the
* keys and pointers to the old node and
* the other half to the new.
*/
temp_pointers = malloc( (order + 1) * sizeof(BPTreeNode *) );
if (temp_pointers == NULL) {
perror("Temporary pointers array for splitting nodes.");
exit(EXIT_FAILURE);
}
temp_keys = malloc( order * sizeof(int) );
if (temp_keys == NULL) {
perror("Temporary keys array for splitting nodes.");
exit(EXIT_FAILURE);
}
for (i = 0, j = 0; i < old_node->num_keys + 1; i++, j++) {
if (j == left_index + 1) j++;
temp_pointers[j] = old_node->pointers[i];
}
for (i = 0, j = 0; i < old_node->num_keys; i++, j++) {
if (j == left_index) j++;
temp_keys[j] = old_node->keys[i];
}
temp_pointers[left_index + 1] = right;
temp_keys[left_index] = key;
/* Create the new node and copy
* half the keys and pointers to the
* old and half to the new.
*/
split = cut(order);
new_node = make_node();
old_node->num_keys = 0;
for (i = 0; i < split - 1; i++) {
old_node->pointers[i] = temp_pointers[i];
old_node->keys[i] = temp_keys[i];
old_node->num_keys++;
}
old_node->pointers[i] = temp_pointers[i];
k_prime = temp_keys[split - 1];
for (++i, j = 0; i < order; i++, j++) {
new_node->pointers[j] = temp_pointers[i];
new_node->keys[j] = temp_keys[i];
new_node->num_keys++;
}
new_node->pointers[j] = temp_pointers[i];
free(temp_pointers);
free(temp_keys);
new_node->parent = old_node->parent;
for (i = 0; i <= new_node->num_keys; i++) {
child = new_node->pointers[i];
child->parent = new_node;
}
/* Insert a new key into the parent of the two
* nodes resulting from the split, with
* the old node to the left and the new to the right.
*/
return insert_into_parent(root, old_node, k_prime, new_node);
}
/* Inserts a new node (leaf or internal node) into the B+ tree.
* Returns the root of the tree after insertion.
*/
BPTreeNode * insert_into_parent(BPTreeNode * root, BPTreeNode * left, int key, BPTreeNode * right) {
int left_index;
BPTreeNode * parent;
parent = left->parent;
/* Case: new root. */
if (parent == NULL)
return insert_into_new_root(left, key, right);
/* Case: leaf or node. (Remainder of
* function body.)
*/
/* Find the parent's pointer to the left
* node.
*/
left_index = get_left_index(parent, left);
/* Simple case: the new key fits into the node.
*/
if (parent->num_keys < order - 1)
return insert_into_node(root, parent, left_index, key, right);
/* Harder case: split a node in order
* to preserve the B+ tree properties.
*/
return insert_into_node_after_splitting(root, parent, left_index, key, right);
}
/* Creates a new root for two subtrees
* and inserts the appropriate key into
* the new root.
*/
BPTreeNode * insert_into_new_root(BPTreeNode * left, int key, BPTreeNode * right) {
BPTreeNode * root = make_node();
root->keys[0] = key;
root->pointers[0] = left;
root->pointers[1] = right;
root->num_keys++;
root->parent = NULL;
left->parent = root;
right->parent = root;
return root;
}
/* First insertion:
* start a new tree.
*/
BPTreeNode * start_new_tree(int key, BPTreeRecord * pointer) {
BPTreeNode * root = make_leaf();
root->keys[0] = key;
root->pointers[0] = pointer;
root->pointers[order - 1] = NULL;
root->parent = NULL;