Add backing store, memory management

master
Chris Mason 2007-02-02 09:18:22 -05:00 committed by David Woodhouse
parent 5622872d31
commit 215ad20a9b
8 changed files with 1446 additions and 162 deletions

View File

@ -1,7 +1,12 @@
ctree: ctree.o
gcc -g -O2 -Wall -o ctree ctree.c
CFLAGS= -g -Wall
.c.o:
$(CC) $(CFLAGS) -c $<
ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h
gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o
clean:
rm ctree ctree.o
rm ctree *.o

430
ctree.c
View File

@ -1,68 +1,25 @@
#include <stdio.h>
#include <stdlib.h>
#include "kerncompat.h"
#define BLOCKSIZE 4096
struct key {
u64 objectid;
u32 flags;
u64 offset;
} __attribute__ ((__packed__));
struct header {
u64 fsid[2]; /* FS specific uuid */
u64 blocknum;
u64 parentid;
u32 csum;
u32 ham;
u16 nritems;
u16 flags;
} __attribute__ ((__packed__));
#define NODEPTRS_PER_BLOCK ((BLOCKSIZE - sizeof(struct header)) / \
(sizeof(struct key) + sizeof(u64)))
#define LEVEL_BITS 3
#define MAX_LEVEL (1 << LEVEL_BITS)
#define node_level(f) ((f) & (MAX_LEVEL-1))
#define is_leaf(f) (node_level(f) == 0)
struct ctree_root {
struct node *node;
};
struct item {
struct key key;
u16 offset;
u16 size;
} __attribute__ ((__packed__));
#define LEAF_DATA_SIZE (BLOCKSIZE - sizeof(struct header))
struct leaf {
struct header header;
union {
struct item items[LEAF_DATA_SIZE/sizeof(struct item)];
u8 data[BLOCKSIZE-sizeof(struct header)];
};
} __attribute__ ((__packed__));
struct node {
struct header header;
struct key keys[NODEPTRS_PER_BLOCK];
u64 blockptrs[NODEPTRS_PER_BLOCK];
} __attribute__ ((__packed__));
struct ctree_path {
struct node *nodes[MAX_LEVEL];
int slots[MAX_LEVEL];
};
#include "radix-tree.h"
#include "ctree.h"
#include "disk-io.h"
static inline void init_path(struct ctree_path *p)
{
memset(p, 0, sizeof(*p));
}
static void release_path(struct ctree_root *root, struct ctree_path *p)
{
int i;
for (i = 0; i < MAX_LEVEL; i++) {
if (!p->nodes[i])
break;
tree_block_release(root, p->nodes[i]);
}
}
static inline unsigned int leaf_data_end(struct leaf *leaf)
{
unsigned int nr = leaf->header.nritems;
@ -135,26 +92,25 @@ int bin_search(struct node *c, struct key *key, int *slot)
return -1;
}
void *read_block(u64 blocknum)
{
return (void *)blocknum;
}
int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
{
struct node *c = root->node;
struct tree_buffer *b = root->node;
struct node *c;
int slot;
int ret;
int level;
while (c) {
b->count++;
while (b) {
c = &b->node;
level = node_level(c->header.flags);
p->nodes[level] = c;
p->nodes[level] = b;
ret = bin_search(c, key, &slot);
if (!is_leaf(c->header.flags)) {
if (ret && slot > 0)
slot -= 1;
p->slots[level] = slot;
c = read_block(c->blockptrs[slot]);
b = read_tree_block(root, c->blockptrs[slot]);
continue;
} else {
p->slots[level] = slot;
@ -164,17 +120,20 @@ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
return -1;
}
static void fixup_low_keys(struct ctree_path *path, struct key *key,
int level)
static void fixup_low_keys(struct ctree_root *root,
struct ctree_path *path, struct key *key,
int level)
{
int i;
/* adjust the pointers going up the tree */
for (i = level; i < MAX_LEVEL; i++) {
struct node *t = path->nodes[i];
struct node *t;
int tslot = path->slots[i];
if (!t)
if (!path->nodes[i])
break;
t = &path->nodes[i]->node;
memcpy(t->keys + tslot, key, sizeof(*key));
write_tree_block(root, path->nodes[i]);
if (tslot != 0)
break;
}
@ -190,27 +149,34 @@ int __insert_ptr(struct ctree_root *root,
int nritems;
/* need a new root */
if (!path->nodes[level]) {
c = malloc(sizeof(struct node));
struct tree_buffer *t;
t = alloc_free_block(root);
c = &t->node;
memset(c, 0, sizeof(c));
c->header.nritems = 2;
c->header.flags = node_level(level);
lower = path->nodes[level-1];
c->header.blocknr = t->blocknr;
lower = &path->nodes[level-1]->node;
if (is_leaf(lower->header.flags))
lower_key = &((struct leaf *)lower)->items[0].key;
else
lower_key = lower->keys;
memcpy(c->keys, lower_key, sizeof(struct key));
memcpy(c->keys + 1, key, sizeof(struct key));
c->blockptrs[0] = (u64)lower;
c->blockptrs[0] = path->nodes[level-1]->blocknr;
c->blockptrs[1] = blocknr;
root->node = c;
path->nodes[level] = c;
/* the path has an extra ref to root->node */
tree_block_release(root, root->node);
root->node = t;
t->count++;
write_tree_block(root, t);
path->nodes[level] = t;
path->slots[level] = 0;
if (c->keys[1].objectid == 0)
BUG();
return 0;
}
lower = path->nodes[level];
lower = &path->nodes[level]->node;
nritems = lower->header.nritems;
if (slot > nritems)
BUG();
@ -227,6 +193,7 @@ int __insert_ptr(struct ctree_root *root,
lower->header.nritems++;
if (lower->keys[1].objectid == 0)
BUG();
write_tree_block(root, path->nodes[level]);
return 0;
}
@ -238,6 +205,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
int push_items = 0;
int left_nritems;
int right_nritems;
struct tree_buffer *t;
struct tree_buffer *right_buf;
if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
return 1;
@ -245,13 +214,18 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
if (slot == 0)
return 1;
left = read_block(path->nodes[level + 1]->blockptrs[slot - 1]);
right = path->nodes[level];
t = read_tree_block(root,
path->nodes[level + 1]->node.blockptrs[slot - 1]);
left = &t->node;
right_buf = path->nodes[level];
right = &right_buf->node;
left_nritems = left->header.nritems;
right_nritems = right->header.nritems;
push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
if (push_items <= 0)
if (push_items <= 0) {
tree_block_release(root, t);
return 1;
}
if (right_nritems < push_items)
push_items = right_nritems;
@ -267,15 +241,20 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
left->header.nritems += push_items;
/* adjust the pointers going up the tree */
fixup_low_keys(path, right->keys, level + 1);
fixup_low_keys(root, path, right->keys, level + 1);
write_tree_block(root, t);
write_tree_block(root, right_buf);
/* then fixup the leaf pointer in the path */
if (path->slots[level] < push_items) {
path->slots[level] += left_nritems;
path->nodes[level] = (struct node*)left;
tree_block_release(root, path->nodes[level]);
path->nodes[level] = t;
path->slots[level + 1] -= 1;
} else {
path->slots[level] -= push_items;
tree_block_release(root, t);
}
return 0;
}
@ -283,6 +262,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
{
int slot;
struct tree_buffer *t;
struct tree_buffer *src_buffer;
struct node *dst;
struct node *src;
int push_items = 0;
@ -295,16 +276,21 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
if (slot == NODEPTRS_PER_BLOCK - 1)
return 1;
if (slot >= path->nodes[level + 1]->header.nritems -1)
if (slot >= path->nodes[level + 1]->node.header.nritems -1)
return 1;
dst = read_block(path->nodes[level + 1]->blockptrs[slot + 1]);
src = path->nodes[level];
t = read_tree_block(root,
path->nodes[level + 1]->node.blockptrs[slot + 1]);
dst = &t->node;
src_buffer = path->nodes[level];
src = &src_buffer->node;
dst_nritems = dst->header.nritems;
src_nritems = src->header.nritems;
push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
if (push_items <= 0)
if (push_items <= 0) {
tree_block_release(root, t);
return 1;
}
if (src_nritems < push_items)
push_items = src_nritems;
@ -322,13 +308,21 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
dst->header.nritems += push_items;
/* adjust the pointers going up the tree */
memcpy(path->nodes[level + 1]->keys + path->slots[level + 1] + 1,
memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
dst->keys, sizeof(struct key));
write_tree_block(root, path->nodes[level + 1]);
write_tree_block(root, t);
write_tree_block(root, src_buffer);
/* then fixup the leaf pointer in the path */
if (path->slots[level] >= src->header.nritems) {
path->slots[level] -= src->header.nritems;
path->nodes[level] = (struct node*)dst;
tree_block_release(root, path->nodes[level]);
path->nodes[level] = t;
path->slots[level + 1] += 1;
} else {
tree_block_release(root, t);
}
return 0;
}
@ -337,15 +331,18 @@ int insert_ptr(struct ctree_root *root,
struct ctree_path *path, struct key *key,
u64 blocknr, int level)
{
struct node *c = path->nodes[level];
struct tree_buffer *t = path->nodes[level];
struct node *c = &path->nodes[level]->node;
struct node *b;
struct node *bal[MAX_LEVEL];
struct tree_buffer *b_buffer;
struct tree_buffer *bal[MAX_LEVEL];
int bal_level = level;
int mid;
int bal_start = -1;
memset(bal, 0, ARRAY_SIZE(bal));
while(c && c->header.nritems == NODEPTRS_PER_BLOCK) {
while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
c = &t->node;
if (push_node_left(root, path,
node_level(c->header.flags)) == 0)
break;
@ -355,8 +352,10 @@ int insert_ptr(struct ctree_root *root,
bal_start = bal_level;
if (bal_level == MAX_LEVEL - 1)
BUG();
b = malloc(sizeof(struct node));
b_buffer = alloc_free_block(root);
b = &b_buffer->node;
b->header.flags = c->header.flags;
b->header.blocknr = b_buffer->blocknr;
mid = (c->header.nritems + 1) / 2;
memcpy(b->keys, c->keys + mid,
(c->header.nritems - mid) * sizeof(struct key));
@ -364,21 +363,28 @@ int insert_ptr(struct ctree_root *root,
(c->header.nritems - mid) * sizeof(u64));
b->header.nritems = c->header.nritems - mid;
c->header.nritems = mid;
bal[bal_level] = b;
write_tree_block(root, t);
write_tree_block(root, b_buffer);
bal[bal_level] = b_buffer;
if (bal_level == MAX_LEVEL - 1)
break;
bal_level += 1;
c = path->nodes[bal_level];
t = path->nodes[bal_level];
}
while(bal_start > 0) {
b = bal[bal_start];
c = path->nodes[bal_start];
__insert_ptr(root, path, b->keys, (u64)b,
b_buffer = bal[bal_start];
c = &path->nodes[bal_start]->node;
__insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr,
path->slots[bal_start + 1] + 1, bal_start + 1);
if (path->slots[bal_start] >= c->header.nritems) {
path->slots[bal_start] -= c->header.nritems;
path->nodes[bal_start] = b;
tree_block_release(root, path->nodes[bal_start]);
path->nodes[bal_start] = b_buffer;
path->slots[bal_start + 1] += 1;
} else {
tree_block_release(root, b_buffer);
}
bal_start--;
if (!bal[bal_start])
@ -404,7 +410,9 @@ int leaf_space_used(struct leaf *l, int start, int nr)
int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
int data_size)
{
struct leaf *right = (struct leaf *)path->nodes[0];
struct tree_buffer *right_buf = path->nodes[0];
struct leaf *right = &right_buf->leaf;
struct tree_buffer *t;
struct leaf *left;
int slot;
int i;
@ -421,9 +429,11 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
if (!path->nodes[1]) {
return 1;
}
left = read_block(path->nodes[1]->blockptrs[slot - 1]);
t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
left = &t->leaf;
free_space = leaf_free_space(left);
if (free_space < data_size + sizeof(struct item)) {
tree_block_release(root, t);
return 1;
}
for (i = 0; i < right->header.nritems; i++) {
@ -436,6 +446,7 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
push_space += item->size + sizeof(*item);
}
if (push_items == 0) {
tree_block_release(root, t);
return 1;
}
/* push data from right to left */
@ -446,6 +457,8 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
right->data + right->items[push_items - 1].offset,
push_space);
old_left_nritems = left->header.nritems;
BUG_ON(old_left_nritems < 0);
for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
left->items[i].offset -= LEAF_DATA_SIZE -
left->items[old_left_nritems -1].offset;
@ -460,30 +473,40 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
(right->header.nritems - push_items) * sizeof(struct item));
right->header.nritems -= push_items;
push_space = LEAF_DATA_SIZE;
for (i = 0; i < right->header.nritems; i++) {
right->items[i].offset = push_space - right->items[i].size;
push_space = right->items[i].offset;
}
fixup_low_keys(path, &right->items[0].key, 1);
write_tree_block(root, t);
write_tree_block(root, right_buf);
fixup_low_keys(root, path, &right->items[0].key, 1);
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
path->slots[0] += old_left_nritems;
path->nodes[0] = (struct node*)left;
tree_block_release(root, path->nodes[0]);
path->nodes[0] = t;
path->slots[1] -= 1;
} else {
tree_block_release(root, t);
path->slots[0] -= push_items;
}
BUG_ON(path->slots[0] < 0);
return 0;
}
int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
{
struct leaf *l = (struct leaf *)path->nodes[0];
int nritems = l->header.nritems;
int mid = (nritems + 1)/ 2;
int slot = path->slots[0];
struct tree_buffer *l_buf = path->nodes[0];
struct leaf *l = &l_buf->leaf;
int nritems;
int mid;
int slot;
struct leaf *right;
struct tree_buffer *right_buffer;
int space_needed = data_size + sizeof(struct item);
int data_copy_size;
int rt_data_off;
@ -491,9 +514,19 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
int ret;
if (push_leaf_left(root, path, data_size) == 0) {
return 0;
l_buf = path->nodes[0];
l = &l_buf->leaf;
if (leaf_free_space(l) >= sizeof(struct item) + data_size)
return 0;
}
right = malloc(sizeof(struct leaf));
slot = path->slots[0];
nritems = l->header.nritems;
mid = (nritems + 1)/ 2;
right_buffer = alloc_free_block(root);
BUG_ON(!right_buffer);
BUG_ON(mid == nritems);
right = &right_buffer->leaf;
memset(right, 0, sizeof(*right));
if (mid <= slot) {
if (leaf_space_used(l, mid, nritems - mid) + space_needed >
@ -505,6 +538,8 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
BUG();
}
right->header.nritems = nritems - mid;
right->header.blocknr = right_buffer->blocknr;
right->header.flags = node_level(0);
data_copy_size = l->items[mid].offset + l->items[mid].size -
leaf_data_end(l);
memcpy(right->items, l->items + mid,
@ -518,12 +553,20 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
}
l->header.nritems = mid;
ret = insert_ptr(root, path, &right->items[0].key,
(u64)right, 1);
right_buffer->blocknr, 1);
write_tree_block(root, right_buffer);
write_tree_block(root, l_buf);
BUG_ON(path->slots[0] != slot);
if (mid <= slot) {
path->nodes[0] = (struct node *)right;
tree_block_release(root, path->nodes[0]);
path->nodes[0] = right_buffer;
path->slots[0] -= mid;
path->slots[1] += 1;
}
} else
tree_block_release(root, right_buffer);
BUG_ON(path->slots[0] < 0);
return ret;
}
@ -532,28 +575,48 @@ int insert_item(struct ctree_root *root, struct key *key,
{
int ret;
int slot;
int slot_orig;
struct leaf *leaf;
struct tree_buffer *leaf_buf;
unsigned int nritems;
unsigned int data_end;
struct ctree_path path;
if (!root->node) {
struct tree_buffer *t;
t = alloc_free_block(root);
BUG_ON(!t);
t->node.header.nritems = 0;
t->node.header.flags = node_level(0);
t->node.header.blocknr = t->blocknr;
root->node = t;
write_tree_block(root, t);
}
init_path(&path);
ret = search_slot(root, key, &path);
if (ret == 0)
if (ret == 0) {
release_path(root, &path);
return -EEXIST;
}
leaf = (struct leaf *)path.nodes[0];
if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
slot_orig = path.slots[0];
leaf_buf = path.nodes[0];
leaf = &leaf_buf->leaf;
if (leaf_free_space(leaf) < sizeof(struct item) + data_size) {
split_leaf(root, &path, data_size);
leaf = (struct leaf *)path.nodes[0];
leaf_buf = path.nodes[0];
leaf = &path.nodes[0]->leaf;
}
nritems = leaf->header.nritems;
data_end = leaf_data_end(leaf);
if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
BUG();
slot = path.slots[0];
BUG_ON(slot < 0);
if (slot == 0)
fixup_low_keys(&path, key, 1);
fixup_low_keys(root, &path, key, 1);
if (slot != nritems) {
int i;
unsigned int old_data = leaf->items[slot].offset +
@ -580,21 +643,25 @@ int insert_item(struct ctree_root *root, struct key *key,
leaf->items[slot].size = data_size;
memcpy(leaf->data + data_end - data_size, data, data_size);
leaf->header.nritems += 1;
write_tree_block(root, leaf_buf);
if (leaf_free_space(leaf) < 0)
BUG();
release_path(root, &path);
return 0;
}
int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
{
int slot;
struct tree_buffer *t;
struct node *node;
int nritems;
while(1) {
node = path->nodes[level];
if (!node)
t = path->nodes[level];
if (!t)
break;
node = &t->node;
slot = path->slots[level];
nritems = node->header.nritems;
@ -606,28 +673,34 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
sizeof(u64) * (nritems - slot - 1));
}
node->header.nritems--;
write_tree_block(root, t);
if (node->header.nritems != 0) {
int tslot;
if (slot == 0)
fixup_low_keys(path, node->keys, level + 1);
fixup_low_keys(root, path, node->keys,
level + 1);
tslot = path->slots[level+1];
t->count++;
push_node_left(root, path, level);
if (node->header.nritems) {
push_node_right(root, path, level);
}
if (node->header.nritems)
if (node->header.nritems) {
tree_block_release(root, t);
break;
}
tree_block_release(root, t);
path->slots[level+1] = tslot;
}
if (node == root->node) {
printf("root is now null!\n");
root->node = NULL;
if (t == root->node) {
/* just turn the root into a leaf and break */
root->node->node.header.flags = node_level(0);
write_tree_block(root, t);
break;
}
level++;
if (!path->nodes[level])
BUG();
free(node);
}
return 0;
}
@ -636,10 +709,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path)
{
int slot;
struct leaf *leaf;
struct tree_buffer *leaf_buf;
int doff;
int dsize;
leaf = (struct leaf *)path->nodes[0];
leaf_buf = path->nodes[0];
leaf = &leaf_buf->leaf;
slot = path->slots[0];
doff = leaf->items[slot].offset;
dsize = leaf->items[slot].size;
@ -658,14 +733,15 @@ int del_item(struct ctree_root *root, struct ctree_path *path)
}
leaf->header.nritems -= 1;
if (leaf->header.nritems == 0) {
if (leaf == (struct leaf *)root->node)
root->node = NULL;
else
if (leaf_buf == root->node) {
leaf->header.flags = node_level(0);
write_tree_block(root, leaf_buf);
} else
del_ptr(root, path, 1);
free(leaf);
} else {
if (slot == 0)
fixup_low_keys(path, &leaf->items[0].key, 1);
fixup_low_keys(root, path, &leaf->items[0].key, 1);
write_tree_block(root, leaf_buf);
if (leaf_space_used(leaf, 0, leaf->header.nritems) <
LEAF_DATA_SIZE / 4) {
/* push_leaf_left fixes the path.
@ -673,12 +749,13 @@ int del_item(struct ctree_root *root, struct ctree_path *path)
* for possible call to del_ptr below
*/
slot = path->slots[1];
leaf_buf->count++;
push_leaf_left(root, path, 1);
if (leaf->header.nritems == 0) {
free(leaf);
path->slots[1] = slot;
del_ptr(root, path, 1);
}
tree_block_release(root, leaf_buf);
}
}
return 0;
@ -689,7 +766,7 @@ void print_leaf(struct leaf *l)
int i;
int nr = l->header.nritems;
struct item *item;
printf("leaf %p total ptrs %d free space %d\n", l, nr,
printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
leaf_free_space(l));
fflush(stdout);
for (i = 0 ; i < nr ; i++) {
@ -703,38 +780,45 @@ void print_leaf(struct leaf *l)
fflush(stdout);
}
}
void print_tree(struct node *c)
void print_tree(struct ctree_root *root, struct tree_buffer *t)
{
int i;
int nr;
struct node *c;
if (!c)
if (!t)
return;
c = &t->node;
nr = c->header.nritems;
if (c->header.blocknr != t->blocknr)
BUG();
if (is_leaf(c->header.flags)) {
print_leaf((struct leaf *)c);
return;
}
printf("node %p level %d total ptrs %d free spc %lu\n", c,
printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
node_level(c->header.flags), c->header.nritems,
NODEPTRS_PER_BLOCK - c->header.nritems);
fflush(stdout);
for (i = 0; i < nr; i++) {
printf("\tkey %d (%lu %u %lu) block %lx\n",
printf("\tkey %d (%lu %u %lu) block %lu\n",
i,
c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
c->blockptrs[i]);
fflush(stdout);
}
for (i = 0; i < nr; i++) {
struct node *next = read_block(c->blockptrs[i]);
struct tree_buffer *next_buf = read_tree_block(root,
c->blockptrs[i]);
struct node *next = &next_buf->node;
if (is_leaf(next->header.flags) &&
node_level(c->header.flags) != 1)
BUG();
if (node_level(next->header.flags) !=
node_level(c->header.flags) - 1)
BUG();
print_tree(next);
print_tree(root, next_buf);
tree_block_release(root, next_buf);
}
}
@ -746,23 +830,24 @@ int next_key(int i, int max_key) {
}
int main() {
struct leaf *first_node = malloc(sizeof(struct leaf));
struct ctree_root root;
struct ctree_root *root;
struct key ins;
struct key last = { (u64)-1, 0, 0};
char *buf;
int i;
int num;
int ret;
int run_size = 100000;
int run_size = 1000000;
int max_key = 100000000;
int tree_size = 0;
struct ctree_path path;
radix_tree_init();
root = open_ctree("dbfile");
srand(55);
root.node = (struct node *)first_node;
memset(first_node, 0, sizeof(*first_node));
for (i = 0; i < run_size; i++) {
buf = malloc(64);
num = next_key(i, max_key);
@ -772,39 +857,46 @@ int main() {
ins.objectid = num;
ins.offset = 0;
ins.flags = 0;
ret = insert_item(&root, &ins, buf, strlen(buf));
ret = insert_item(root, &ins, buf, strlen(buf));
if (!ret)
tree_size++;
}
close_ctree(root);
root = open_ctree("dbfile");
printf("starting search\n");
srand(55);
for (i = 0; i < run_size; i++) {
num = next_key(i, max_key);
ins.objectid = num;
init_path(&path);
ret = search_slot(&root, &ins, &path);
ret = search_slot(root, &ins, &path);
if (ret) {
print_tree(root.node);
print_tree(root, root->node);
printf("unable to find %d\n", num);
exit(1);
}
release_path(root, &path);
}
printf("node %p level %d total ptrs %d free spc %lu\n", root.node,
node_level(root.node->header.flags), root.node->header.nritems,
NODEPTRS_PER_BLOCK - root.node->header.nritems);
// print_tree(root.node);
printf("all searches good\n");
close_ctree(root);
root = open_ctree("dbfile");
printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
node_level(root->node->node.header.flags),
root->node->node.header.nritems,
NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
printf("all searches good, deleting some items\n");
i = 0;
srand(55);
for (i = 0 ; i < run_size/4; i++) {
num = next_key(i, max_key);
ins.objectid = num;
init_path(&path);
ret = search_slot(&root, &ins, &path);
ret = search_slot(root, &ins, &path);
if (ret)
continue;
ret = del_item(&root, &path);
ret = del_item(root, &path);
if (ret != 0)
BUG();
release_path(root, &path);
tree_size--;
}
srand(128);
@ -813,38 +905,58 @@ int main() {
num = next_key(i, max_key);
sprintf(buf, "string-%d", num);
ins.objectid = num;
ret = insert_item(&root, &ins, buf, strlen(buf));
ret = insert_item(root, &ins, buf, strlen(buf));
if (!ret)
tree_size++;
}
while(root.node) {
close_ctree(root);
root = open_ctree("dbfile");
printf("starting search2\n");
srand(128);
for (i = 0; i < run_size; i++) {
num = next_key(i, max_key);
ins.objectid = num;
init_path(&path);
ret = search_slot(root, &ins, &path);
if (ret) {
print_tree(root, root->node);
printf("unable to find %d\n", num);
exit(1);
}
release_path(root, &path);
}
printf("starting big long delete run\n");
while(root->node && root->node->node.header.nritems > 0) {
struct leaf *leaf;
int slot;
ins.objectid = (u64)-1;
init_path(&path);
ret = search_slot(&root, &ins, &path);
ret = search_slot(root, &ins, &path);
if (ret == 0)
BUG();
leaf = (struct leaf *)(path.nodes[0]);
leaf = &path.nodes[0]->leaf;
slot = path.slots[0];
if (slot != leaf->header.nritems)
BUG();
while(path.slots[0] > 0) {
path.slots[0] -= 1;
slot = path.slots[0];
leaf = (struct leaf *)(path.nodes[0]);
leaf = &path.nodes[0]->leaf;
if (comp_keys(&last, &leaf->items[slot].key) <= 0)
BUG();
memcpy(&last, &leaf->items[slot].key, sizeof(last));
ret = del_item(&root, &path);
if (ret != 0)
ret = del_item(root, &path);
if (ret != 0) {
printf("del_item returned %d\n", ret);
BUG();
}
tree_size--;
}
release_path(root, &path);
}
print_tree(root.node);
close_ctree(root);
printf("tree size is now %d\n", tree_size);
return 0;
}

62
ctree.h 100644
View File

@ -0,0 +1,62 @@
#ifndef __CTREE__
#define __CTREE__
#define CTREE_BLOCKSIZE 4096
struct key {
u64 objectid;
u32 flags;
u64 offset;
} __attribute__ ((__packed__));
struct header {
u64 fsid[2]; /* FS specific uuid */
u64 blocknr;
u64 parentid;
u32 csum;
u32 ham;
u16 nritems;
u16 flags;
} __attribute__ ((__packed__));
#define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \
(sizeof(struct key) + sizeof(u64)))
#define LEVEL_BITS 3
#define MAX_LEVEL (1 << LEVEL_BITS)
#define node_level(f) ((f) & (MAX_LEVEL-1))
#define is_leaf(f) (node_level(f) == 0)
struct tree_buffer;
struct ctree_root {
struct tree_buffer *node;
int fp;
struct radix_tree_root cache_radix;
};
struct item {
struct key key;
u16 offset;
u16 size;
} __attribute__ ((__packed__));
#define LEAF_DATA_SIZE (CTREE_BLOCKSIZE - sizeof(struct header))
struct leaf {
struct header header;
union {
struct item items[LEAF_DATA_SIZE/sizeof(struct item)];
u8 data[CTREE_BLOCKSIZE-sizeof(struct header)];
};
} __attribute__ ((__packed__));
struct node {
struct header header;
struct key keys[NODEPTRS_PER_BLOCK];
u64 blockptrs[NODEPTRS_PER_BLOCK];
} __attribute__ ((__packed__));
struct ctree_path {
struct tree_buffer *nodes[MAX_LEVEL];
int slots[MAX_LEVEL];
};
#endif

174
disk-io.c 100644
View File

@ -0,0 +1,174 @@
#define _XOPEN_SOURCE 500
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include "kerncompat.h"
#include "radix-tree.h"
#include "ctree.h"
#include "disk-io.h"
static int allocated_blocks = 0;
struct ctree_header {
u64 root_block;
} __attribute__ ((__packed__));
static int get_free_block(struct ctree_root *root, u64 *block)
{
struct stat st;
int ret;
st.st_size = 0;
ret = fstat(root->fp, &st);
if (st.st_size > sizeof(struct ctree_header)) {
*block = (st.st_size -
sizeof(struct ctree_header)) / CTREE_BLOCKSIZE;
} else {
*block = 0;
}
ret = ftruncate(root->fp, sizeof(struct ctree_header) + (*block + 1) *
CTREE_BLOCKSIZE);
return ret;
}
struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr)
{
struct tree_buffer *buf;
int ret;
buf = malloc(sizeof(struct tree_buffer));
if (!buf)
return buf;
allocated_blocks++;
buf->blocknr = blocknr;
buf->count = 1;
radix_tree_preload(GFP_KERNEL);
ret = radix_tree_insert(&root->cache_radix, blocknr, buf);
radix_tree_preload_end();
if (ret) {
free(buf);
return NULL;
}
return buf;
}
struct tree_buffer *alloc_free_block(struct ctree_root *root)
{
u64 free_block;
int ret;
struct tree_buffer * buf;
ret = get_free_block(root, &free_block);
if (ret) {
BUG();
return NULL;
}
buf = alloc_tree_block(root, free_block);
if (!buf)
BUG();
return buf;
}
struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr)
{
loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header);
struct tree_buffer *buf;
int ret;
buf = radix_tree_lookup(&root->cache_radix, blocknr);
if (buf) {
buf->count++;
if (buf->blocknr != blocknr)
BUG();
if (buf->blocknr != buf->node.header.blocknr)
BUG();
return buf;
}
buf = alloc_tree_block(root, blocknr);
if (!buf)
return NULL;
ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset);
if (ret != CTREE_BLOCKSIZE) {
free(buf);
return NULL;
}
if (buf->blocknr != buf->node.header.blocknr)
BUG();
return buf;
}
int write_tree_block(struct ctree_root *root, struct tree_buffer *buf)
{
u64 blocknr = buf->blocknr;
loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header);
int ret;
if (buf->blocknr != buf->node.header.blocknr)
BUG();
ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset);
if (ret != CTREE_BLOCKSIZE)
return ret;
if (buf == root->node)
return update_root_block(root);
return 0;
}
struct ctree_root *open_ctree(char *filename)
{
struct ctree_root *root = malloc(sizeof(struct ctree_root));
int fp;
u64 root_block;
int ret;
fp = open(filename, O_CREAT | O_RDWR);
if (fp < 0) {
free(root);
return NULL;
}
root->fp = fp;
INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL);
ret = pread(fp, &root_block, sizeof(u64), 0);
if (ret == sizeof(u64)) {
printf("reading root node at block %lu\n", root_block);
root->node = read_tree_block(root, root_block);
} else
root->node = NULL;
return root;
}
int close_ctree(struct ctree_root *root)
{
close(root->fp);
if (root->node)
tree_block_release(root, root->node);
free(root);
printf("on close %d blocks are allocated\n", allocated_blocks);
return 0;
}
int update_root_block(struct ctree_root *root)
{
int ret;
u64 root_block = root->node->blocknr;
ret = pwrite(root->fp, &root_block, sizeof(u64), 0);
if (ret != sizeof(u64))
return ret;
return 0;
}
void tree_block_release(struct ctree_root *root, struct tree_buffer *buf)
{
buf->count--;
if (buf->count == 0) {
if (!radix_tree_lookup(&root->cache_radix, buf->blocknr))
BUG();
radix_tree_delete(&root->cache_radix, buf->blocknr);
memset(buf, 0, sizeof(*buf));
free(buf);
BUG_ON(allocated_blocks == 0);
allocated_blocks--;
}
}

21
disk-io.h 100644
View File

@ -0,0 +1,21 @@
#ifndef __DISKIO__
#define __DISKIO__
struct tree_buffer {
u64 blocknr;
int count;
union {
struct node node;
struct leaf leaf;
};
};
struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr);
int write_tree_block(struct ctree_root *root, struct tree_buffer *buf);
struct ctree_root *open_ctree(char *filename);
int close_ctree(struct ctree_root *root);
void tree_block_release(struct ctree_root *root, struct tree_buffer *buf);
struct tree_buffer *alloc_free_block(struct ctree_root *root);
int update_root_block(struct ctree_root *root);
#endif

View File

@ -6,6 +6,7 @@
#define BITS_PER_LONG 64
#define __GFP_BITS_SHIFT 20
#define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1))
#define GFP_KERNEL 0
#define __read_mostly
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define __force

836
radix-tree.c 100644
View File

@ -0,0 +1,836 @@
/*
* Copyright (C) 2001 Momchil Velikov
* Portions Copyright (C) 2001 Christoph Hellwig
* Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "kerncompat.h"
#include "radix-tree.h"
#ifdef __KERNEL__
#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
#else
#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
#endif
#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
#define RADIX_TREE_TAG_LONGS \
((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
struct radix_tree_node {
unsigned int count;
void *slots[RADIX_TREE_MAP_SIZE];
unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
};
struct radix_tree_path {
struct radix_tree_node *node;
int offset;
};
#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly;
/*
* Per-cpu pool of preloaded nodes
*/
struct radix_tree_preload {
int nr;
struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
};
struct radix_tree_preload radix_tree_preloads = { 0, };
static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
{
return root->gfp_mask & __GFP_BITS_MASK;
}
static int internal_nodes = 0;
/*
* This assumes that the caller has performed appropriate preallocation, and
* that the caller has pinned this thread of control to the current CPU.
*/
static struct radix_tree_node *
radix_tree_node_alloc(struct radix_tree_root *root)
{
struct radix_tree_node *ret;
ret = malloc(sizeof(struct radix_tree_node));
if (ret) {
memset(ret, 0, sizeof(struct radix_tree_node));
internal_nodes++;
}
return ret;
}
static inline void
radix_tree_node_free(struct radix_tree_node *node)
{
internal_nodes--;
free(node);
}
/*
* Load up this CPU's radix_tree_node buffer with sufficient objects to
* ensure that the addition of a single element in the tree cannot fail. On
* success, return zero, with preemption disabled. On error, return -ENOMEM
* with preemption not disabled.
*/
int radix_tree_preload(gfp_t gfp_mask)
{
struct radix_tree_preload *rtp;
struct radix_tree_node *node;
int ret = -ENOMEM;
preempt_disable();
rtp = &__get_cpu_var(radix_tree_preloads);
while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
preempt_enable();
node = radix_tree_node_alloc(NULL);
if (node == NULL)
goto out;
preempt_disable();
rtp = &__get_cpu_var(radix_tree_preloads);
if (rtp->nr < ARRAY_SIZE(rtp->nodes))
rtp->nodes[rtp->nr++] = node;
else
radix_tree_node_free(node);
}
ret = 0;
out:
return ret;
}
static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
int offset)
{
__set_bit(offset, node->tags[tag]);
}
static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
int offset)
{
__clear_bit(offset, node->tags[tag]);
}
static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
int offset)
{
return test_bit(offset, node->tags[tag]);
}
static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
{
root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
}
static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
{
root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
}
static inline void root_tag_clear_all(struct radix_tree_root *root)
{
root->gfp_mask &= __GFP_BITS_MASK;
}
static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
{
return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
}
/*
* Returns 1 if any slot in the node has this tag set.
* Otherwise returns 0.
*/
static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
{
int idx;
for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
if (node->tags[tag][idx])
return 1;
}
return 0;
}
/*
* Return the maximum key which can be store into a
* radix tree with height HEIGHT.
*/
static inline unsigned long radix_tree_maxindex(unsigned int height)
{
return height_to_maxindex[height];
}
/*
* Extend a radix tree so it can store key @index.
*/
static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
{
struct radix_tree_node *node;
unsigned int height;
int tag;
/* Figure out what the height should be. */
height = root->height + 1;
while (index > radix_tree_maxindex(height))
height++;
if (root->rnode == NULL) {
root->height = height;
goto out;
}
do {
if (!(node = radix_tree_node_alloc(root)))
return -ENOMEM;
/* Increase the height. */
node->slots[0] = root->rnode;
/* Propagate the aggregated tag info into the new root */
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
if (root_tag_get(root, tag))
tag_set(node, tag, 0);
}
node->count = 1;
root->rnode = node;
root->height++;
} while (height > root->height);
out:
return 0;
}
/**
* radix_tree_insert - insert into a radix tree
* @root: radix tree root
* @index: index key
* @item: item to insert
*
* Insert an item into the radix tree at position @index.
*/
int radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *item)
{
struct radix_tree_node *node = NULL, *slot;
unsigned int height, shift;
int offset;
int error;
/* Make sure the tree is high enough. */
if (index > radix_tree_maxindex(root->height)) {
error = radix_tree_extend(root, index);
if (error)
return error;
}
slot = root->rnode;
height = root->height;
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
offset = 0; /* uninitialised var warning */
while (height > 0) {
if (slot == NULL) {
/* Have to add a child node. */
if (!(slot = radix_tree_node_alloc(root)))
return -ENOMEM;
if (node) {
node->slots[offset] = slot;
node->count++;
} else
root->rnode = slot;
}
/* Go a level down */
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
node = slot;
slot = node->slots[offset];
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}
if (slot != NULL)
return -EEXIST;
if (node) {
node->count++;
node->slots[offset] = item;
BUG_ON(tag_get(node, 0, offset));
BUG_ON(tag_get(node, 1, offset));
} else {
root->rnode = item;
BUG_ON(root_tag_get(root, 0));
BUG_ON(root_tag_get(root, 1));
}
return 0;
}
static inline void **__lookup_slot(struct radix_tree_root *root,
unsigned long index)
{
unsigned int height, shift;
struct radix_tree_node **slot;
height = root->height;
if (index > radix_tree_maxindex(height))
return NULL;
if (height == 0 && root->rnode)
return (void **)&root->rnode;
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
slot = &root->rnode;
while (height > 0) {
if (*slot == NULL)
return NULL;
slot = (struct radix_tree_node **)
((*slot)->slots +
((index >> shift) & RADIX_TREE_MAP_MASK));
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}
return (void **)slot;
}
/**
* radix_tree_lookup_slot - lookup a slot in a radix tree
* @root: radix tree root
* @index: index key
*
* Lookup the slot corresponding to the position @index in the radix tree
* @root. This is useful for update-if-exists operations.
*/
void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
{
return __lookup_slot(root, index);
}
/**
* radix_tree_lookup - perform lookup operation on a radix tree
* @root: radix tree root
* @index: index key
*
* Lookup the item at the position @index in the radix tree @root.
*/
void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
{
void **slot;
slot = __lookup_slot(root, index);
return slot != NULL ? *slot : NULL;
}
/**
* radix_tree_tag_set - set a tag on a radix tree node
* @root: radix tree root
* @index: index key
* @tag: tag index
*
* Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
* corresponding to @index in the radix tree. From
* the root all the way down to the leaf node.
*
* Returns the address of the tagged item. Setting a tag on a not-present
* item is a bug.
*/
void *radix_tree_tag_set(struct radix_tree_root *root,
unsigned long index, unsigned int tag)
{
unsigned int height, shift;
struct radix_tree_node *slot;
height = root->height;
BUG_ON(index > radix_tree_maxindex(height));
slot = root->rnode;
shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
while (height > 0) {
int offset;
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
if (!tag_get(slot, tag, offset))
tag_set(slot, tag, offset);
slot = slot->slots[offset];
BUG_ON(slot == NULL);
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}
/* set the root's tag bit */
if (slot && !root_tag_get(root, tag))
root_tag_set(root, tag);
return slot;
}
/**
* radix_tree_tag_clear - clear a tag on a radix tree node
* @root: radix tree root
* @index: index key
* @tag: tag index
*
* Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
* corresponding to @index in the radix tree. If
* this causes the leaf node to have no tags set then clear the tag in the
* next-to-leaf node, etc.
*
* Returns the address of the tagged item on success, else NULL. ie:
* has the same return value and semantics as radix_tree_lookup().
*/
void *radix_tree_tag_clear(struct radix_tree_root *root,
unsigned long index, unsigned int tag)
{
struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
struct radix_tree_node *slot = NULL;
unsigned int height, shift;
height = root->height;
if (index > radix_tree_maxindex(height))
goto out;
shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
pathp->node = NULL;
slot = root->rnode;
while (height > 0) {
int offset;
if (slot == NULL)
goto out;
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
pathp[1].offset = offset;
pathp[1].node = slot;
slot = slot->slots[offset];
pathp++;
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}
if (slot == NULL)
goto out;
while (pathp->node) {
if (!tag_get(pathp->node, tag, pathp->offset))
goto out;
tag_clear(pathp->node, tag, pathp->offset);
if (any_tag_set(pathp->node, tag))
goto out;
pathp--;
}
/* clear the root's tag bit */
if (root_tag_get(root, tag))
root_tag_clear(root, tag);
out:
return slot;
}
#ifndef __KERNEL__ /* Only the test harness uses this at present */
/**
* radix_tree_tag_get - get a tag on a radix tree node
* @root: radix tree root
* @index: index key
* @tag: tag index (< RADIX_TREE_MAX_TAGS)
*
* Return values:
*
* 0: tag not present or not set
* 1: tag set
*/
int radix_tree_tag_get(struct radix_tree_root *root,
unsigned long index, unsigned int tag)
{
unsigned int height, shift;
struct radix_tree_node *slot;
int saw_unset_tag = 0;
height = root->height;
if (index > radix_tree_maxindex(height))
return 0;
/* check the root's tag bit */
if (!root_tag_get(root, tag))
return 0;
if (height == 0)
return 1;
shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
slot = root->rnode;
for ( ; ; ) {
int offset;
if (slot == NULL)
return 0;
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
/*
* This is just a debug check. Later, we can bale as soon as
* we see an unset tag.
*/
if (!tag_get(slot, tag, offset))
saw_unset_tag = 1;
if (height == 1) {
int ret = tag_get(slot, tag, offset);
BUG_ON(ret && saw_unset_tag);
return !!ret;
}
slot = slot->slots[offset];
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}
}
#endif
static unsigned int
__lookup(struct radix_tree_root *root, void **results, unsigned long index,
unsigned int max_items, unsigned long *next_index)
{
unsigned int nr_found = 0;
unsigned int shift, height;
struct radix_tree_node *slot;
unsigned long i;
height = root->height;
if (height == 0) {
if (root->rnode && index == 0)
results[nr_found++] = root->rnode;
goto out;
}
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
slot = root->rnode;
for ( ; height > 1; height--) {
for (i = (index >> shift) & RADIX_TREE_MAP_MASK ;
i < RADIX_TREE_MAP_SIZE; i++) {
if (slot->slots[i] != NULL)
break;
index &= ~((1UL << shift) - 1);
index += 1UL << shift;
if (index == 0)
goto out; /* 32-bit wraparound */
}
if (i == RADIX_TREE_MAP_SIZE)
goto out;
shift -= RADIX_TREE_MAP_SHIFT;
slot = slot->slots[i];
}
/* Bottom level: grab some items */
for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
index++;
if (slot->slots[i]) {
results[nr_found++] = slot->slots[i];
if (nr_found == max_items)
goto out;
}
}
out:
*next_index = index;
return nr_found;
}
/**
* radix_tree_gang_lookup - perform multiple lookup on a radix tree
* @root: radix tree root
* @results: where the results of the lookup are placed
* @first_index: start the lookup from this key
* @max_items: place up to this many items at *results
*
* Performs an index-ascending scan of the tree for present items. Places
* them at *@results and returns the number of items which were placed at
* *@results.
*
* The implementation is naive.
*/
unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items)
{
const unsigned long max_index = radix_tree_maxindex(root->height);
unsigned long cur_index = first_index;
unsigned int ret = 0;
while (ret < max_items) {
unsigned int nr_found;
unsigned long next_index; /* Index of next search */
if (cur_index > max_index)
break;
nr_found = __lookup(root, results + ret, cur_index,
max_items - ret, &next_index);
ret += nr_found;
if (next_index == 0)
break;
cur_index = next_index;
}
return ret;
}
/*
* FIXME: the two tag_get()s here should use find_next_bit() instead of
* open-coding the search.
*/
static unsigned int
__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
unsigned int max_items, unsigned long *next_index, unsigned int tag)
{
unsigned int nr_found = 0;
unsigned int shift;
unsigned int height = root->height;
struct radix_tree_node *slot;
if (height == 0) {
if (root->rnode && index == 0)
results[nr_found++] = root->rnode;
goto out;
}
shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
slot = root->rnode;
do {
unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;
for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
if (tag_get(slot, tag, i)) {
BUG_ON(slot->slots[i] == NULL);
break;
}
index &= ~((1UL << shift) - 1);
index += 1UL << shift;
if (index == 0)
goto out; /* 32-bit wraparound */
}
if (i == RADIX_TREE_MAP_SIZE)
goto out;
height--;
if (height == 0) { /* Bottom level: grab some items */
unsigned long j = index & RADIX_TREE_MAP_MASK;
for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
index++;
if (tag_get(slot, tag, j)) {
BUG_ON(slot->slots[j] == NULL);
results[nr_found++] = slot->slots[j];
if (nr_found == max_items)
goto out;
}
}
}
shift -= RADIX_TREE_MAP_SHIFT;
slot = slot->slots[i];
} while (height > 0);
out:
*next_index = index;
return nr_found;
}
/**
* radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
* based on a tag
* @root: radix tree root
* @results: where the results of the lookup are placed
* @first_index: start the lookup from this key
* @max_items: place up to this many items at *results
* @tag: the tag index (< RADIX_TREE_MAX_TAGS)
*
* Performs an index-ascending scan of the tree for present items which
* have the tag indexed by @tag set. Places the items at *@results and
* returns the number of items which were placed at *@results.
*/
unsigned int
radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items,
unsigned int tag)
{
const unsigned long max_index = radix_tree_maxindex(root->height);
unsigned long cur_index = first_index;
unsigned int ret = 0;
/* check the root's tag bit */
if (!root_tag_get(root, tag))
return 0;
while (ret < max_items) {
unsigned int nr_found;
unsigned long next_index; /* Index of next search */
if (cur_index > max_index)
break;
nr_found = __lookup_tag(root, results + ret, cur_index,
max_items - ret, &next_index, tag);
ret += nr_found;
if (next_index == 0)
break;
cur_index = next_index;
}
return ret;
}
/**
* radix_tree_shrink - shrink height of a radix tree to minimal
* @root radix tree root
*/
static inline void radix_tree_shrink(struct radix_tree_root *root)
{
/* try to shrink tree height */
while (root->height > 0 &&
root->rnode->count == 1 &&
root->rnode->slots[0]) {
struct radix_tree_node *to_free = root->rnode;
root->rnode = to_free->slots[0];
root->height--;
/* must only free zeroed nodes into the slab */
tag_clear(to_free, 0, 0);
tag_clear(to_free, 1, 0);
to_free->slots[0] = NULL;
to_free->count = 0;
radix_tree_node_free(to_free);
}
}
/**
* radix_tree_delete - delete an item from a radix tree
* @root: radix tree root
* @index: index key
*
* Remove the item at @index from the radix tree rooted at @root.
*
* Returns the address of the deleted item, or NULL if it was not present.
*/
void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
{
struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
struct radix_tree_node *slot = NULL;
unsigned int height, shift;
int tag;
int offset;
height = root->height;
if (index > radix_tree_maxindex(height))
goto out;
slot = root->rnode;
if (height == 0 && root->rnode) {
root_tag_clear_all(root);
root->rnode = NULL;
goto out;
}
shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
pathp->node = NULL;
do {
if (slot == NULL)
goto out;
pathp++;
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
pathp->offset = offset;
pathp->node = slot;
slot = slot->slots[offset];
shift -= RADIX_TREE_MAP_SHIFT;
height--;
} while (height > 0);
if (slot == NULL)
goto out;
/*
* Clear all tags associated with the just-deleted item
*/
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
if (tag_get(pathp->node, tag, pathp->offset))
radix_tree_tag_clear(root, index, tag);
}
/* Now free the nodes we do not need anymore */
while (pathp->node) {
pathp->node->slots[pathp->offset] = NULL;
pathp->node->count--;
if (pathp->node->count) {
if (pathp->node == root->rnode)
radix_tree_shrink(root);
goto out;
}
/* Node with zero slots in use so free it */
radix_tree_node_free(pathp->node);
pathp--;
}
root_tag_clear_all(root);
root->height = 0;
root->rnode = NULL;
out:
return slot;
}
/**
* radix_tree_tagged - test whether any items in the tree are tagged
* @root: radix tree root
* @tag: tag to test
*/
int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
{
return root_tag_get(root, tag);
}
static unsigned long __maxindex(unsigned int height)
{
unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;
if (tmp >= RADIX_TREE_INDEX_BITS)
index = ~0UL;
return index;
}
static void radix_tree_init_maxindex(void)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
height_to_maxindex[i] = __maxindex(i);
}
void radix_tree_init(void)
{
radix_tree_init_maxindex();
}

73
radix-tree.h 100644
View File

@ -0,0 +1,73 @@
/*
* Copyright (C) 2001 Momchil Velikov
* Portions Copyright (C) 2001 Christoph Hellwig
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _LINUX_RADIX_TREE_H
#define _LINUX_RADIX_TREE_H
#define RADIX_TREE_MAX_TAGS 2
/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
struct radix_tree_root {
unsigned int height;
gfp_t gfp_mask;
struct radix_tree_node *rnode;
};
#define RADIX_TREE_INIT(mask) { \
.height = 0, \
.gfp_mask = (mask), \
.rnode = NULL, \
}
#define RADIX_TREE(name, mask) \
struct radix_tree_root name = RADIX_TREE_INIT(mask)
#define INIT_RADIX_TREE(root, mask) \
do { \
(root)->height = 0; \
(root)->gfp_mask = (mask); \
(root)->rnode = NULL; \
} while (0)
int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
void *radix_tree_delete(struct radix_tree_root *, unsigned long);
unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items);
int radix_tree_preload(gfp_t gfp_mask);
void radix_tree_init(void);
void *radix_tree_tag_set(struct radix_tree_root *root,
unsigned long index, unsigned int tag);
void *radix_tree_tag_clear(struct radix_tree_root *root,
unsigned long index, unsigned int tag);
int radix_tree_tag_get(struct radix_tree_root *root,
unsigned long index, unsigned int tag);
unsigned int
radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items,
unsigned int tag);
int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
static inline void radix_tree_preload_end(void)
{
preempt_enable();
}
#endif /* _LINUX_RADIX_TREE_H */