From f32f004cddf86d63a9c42994bbce9f4e2f07c9fa Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 4 Jul 2018 15:42:46 -0400 Subject: [PATCH] ida: Convert to XArray Use the XA_TRACK_FREE ability to track which entries have a free bit, similarly to how it uses the radix tree's IDR_FREE tag. This eliminates the per-cpu ida_bitmap preload, and fixes the memory consumption regression I introduced when making the IDR able to store any pointer. Signed-off-by: Matthew Wilcox --- include/linux/idr.h | 18 +- lib/idr.c | 389 +++++++++++++++------------- lib/radix-tree.c | 71 ----- tools/testing/radix-tree/idr-test.c | 8 +- 4 files changed, 218 insertions(+), 268 deletions(-) diff --git a/include/linux/idr.h b/include/linux/idr.h index 3ec8628ce17f..60daf34b625d 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -214,8 +214,7 @@ static inline void idr_preload_end(void) ++id, (entry) = idr_get_next((idr), &(id))) /* - * IDA - IDR based id allocator, use when translation from id to - * pointer isn't necessary. + * IDA - ID Allocator, use when translation from id to pointer isn't necessary. */ #define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ #define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long)) @@ -225,14 +224,14 @@ struct ida_bitmap { unsigned long bitmap[IDA_BITMAP_LONGS]; }; -DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap); - struct ida { - struct radix_tree_root ida_rt; + struct xarray xa; }; +#define IDA_INIT_FLAGS (XA_FLAGS_LOCK_IRQ | XA_FLAGS_ALLOC) + #define IDA_INIT(name) { \ - .ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT), \ + .xa = XARRAY_INIT(name, IDA_INIT_FLAGS) \ } #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) @@ -292,7 +291,7 @@ static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp) static inline void ida_init(struct ida *ida) { - INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT); + xa_init_flags(&ida->xa, IDA_INIT_FLAGS); } #define ida_simple_get(ida, start, end, gfp) \ @@ -301,9 +300,6 @@ static inline void ida_init(struct ida *ida) static inline bool ida_is_empty(const struct ida *ida) { - return radix_tree_empty(&ida->ida_rt); + return xa_empty(&ida->xa); } - -/* in lib/radix-tree.c */ -int ida_pre_get(struct ida *ida, gfp_t gfp_mask); #endif /* __IDR_H__ */ diff --git a/lib/idr.c b/lib/idr.c index 9a366b5be2c2..3c20ad9b0463 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -6,8 +6,6 @@ #include #include -DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap); - /** * idr_alloc_u32() - Allocate an ID. * @idr: IDR handle. @@ -320,6 +318,9 @@ EXPORT_SYMBOL(idr_replace); * free the individual IDs in it. You can use ida_is_empty() to find * out whether the IDA has any IDs currently allocated. * + * The IDA handles its own locking. It is safe to call any of the IDA + * functions without synchronisation in your code. + * * IDs are currently limited to the range [0-INT_MAX]. If this is an awkward * limitation, it should be quite straightforward to raise the maximum. */ @@ -327,180 +328,38 @@ EXPORT_SYMBOL(idr_replace); /* * Developer's notes: * - * The IDA uses the functionality provided by the IDR & radix tree to store - * bitmaps in each entry. The IDR_FREE tag means there is at least one bit - * free, unlike the IDR where it means at least one entry is free. + * The IDA uses the functionality provided by the XArray to store bitmaps in + * each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap + * have been set. * - * I considered telling the radix tree that each slot is an order-10 node - * and storing the bit numbers in the radix tree, but the radix tree can't - * allow a single multiorder entry at index 0, which would significantly - * increase memory consumption for the IDA. So instead we divide the index - * by the number of bits in the leaf bitmap before doing a radix tree lookup. + * I considered telling the XArray that each slot is an order-10 node + * and indexing by bit number, but the XArray can't allow a single multi-index + * entry in the head, which would significantly increase memory consumption + * for the IDA. So instead we divide the index by the number of bits in the + * leaf bitmap before doing a radix tree lookup. * * As an optimisation, if there are only a few low bits set in any given * leaf, instead of allocating a 128-byte bitmap, we store the bits - * directly in the entry. + * as a value entry. Value entries never have the XA_FREE_MARK cleared + * because we can always convert them into a bitmap entry. * - * We allow the radix tree 'exceptional' count to get out of date. Nothing - * in the IDA nor the radix tree code checks it. If it becomes important - * to maintain an accurate exceptional count, switch the rcu_assign_pointer() - * calls to radix_tree_iter_replace() which will correct the exceptional - * count. + * It would be possible to optimise further; once we've run out of a + * single 128-byte bitmap, we currently switch to a 576-byte node, put + * the 128-byte bitmap in the first entry and then start allocating extra + * 128-byte entries. We could instead use the 512 bytes of the node's + * data as a bitmap before moving to that scheme. I do not believe this + * is a worthwhile optimisation; Rasmus Villemoes surveyed the current + * users of the IDA and almost none of them use more than 1024 entries. + * Those that do use more than the 8192 IDs that the 512 bytes would + * provide. * - * The IDA always requires a lock to alloc/free. If we add a 'test_bit' + * The IDA always uses a lock to alloc/free. If we add a 'test_bit' * equivalent, it will still need locking. Going to RCU lookup would require * using RCU to free bitmaps, and that's not trivial without embedding an * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte * bitmap, which is excessive. */ -#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1) - -static int ida_get_new_above(struct ida *ida, int start) -{ - struct radix_tree_root *root = &ida->ida_rt; - void __rcu **slot; - struct radix_tree_iter iter; - struct ida_bitmap *bitmap; - unsigned long index; - unsigned bit; - int new; - - index = start / IDA_BITMAP_BITS; - bit = start % IDA_BITMAP_BITS; - - slot = radix_tree_iter_init(&iter, index); - for (;;) { - if (slot) - slot = radix_tree_next_slot(slot, &iter, - RADIX_TREE_ITER_TAGGED); - if (!slot) { - slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX); - if (IS_ERR(slot)) { - if (slot == ERR_PTR(-ENOMEM)) - return -EAGAIN; - return PTR_ERR(slot); - } - } - if (iter.index > index) - bit = 0; - new = iter.index * IDA_BITMAP_BITS; - bitmap = rcu_dereference_raw(*slot); - if (xa_is_value(bitmap)) { - unsigned long tmp = xa_to_value(bitmap); - int vbit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, - bit); - if (vbit < BITS_PER_XA_VALUE) { - tmp |= 1UL << vbit; - rcu_assign_pointer(*slot, xa_mk_value(tmp)); - return new + vbit; - } - bitmap = this_cpu_xchg(ida_bitmap, NULL); - if (!bitmap) - return -EAGAIN; - bitmap->bitmap[0] = tmp; - rcu_assign_pointer(*slot, bitmap); - } - - if (bitmap) { - bit = find_next_zero_bit(bitmap->bitmap, - IDA_BITMAP_BITS, bit); - new += bit; - if (new < 0) - return -ENOSPC; - if (bit == IDA_BITMAP_BITS) - continue; - - __set_bit(bit, bitmap->bitmap); - if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) - radix_tree_iter_tag_clear(root, &iter, - IDR_FREE); - } else { - new += bit; - if (new < 0) - return -ENOSPC; - if (bit < BITS_PER_XA_VALUE) { - bitmap = xa_mk_value(1UL << bit); - } else { - bitmap = this_cpu_xchg(ida_bitmap, NULL); - if (!bitmap) - return -EAGAIN; - __set_bit(bit, bitmap->bitmap); - } - radix_tree_iter_replace(root, &iter, slot, bitmap); - } - - return new; - } -} - -static void ida_remove(struct ida *ida, int id) -{ - unsigned long index = id / IDA_BITMAP_BITS; - unsigned offset = id % IDA_BITMAP_BITS; - struct ida_bitmap *bitmap; - unsigned long *btmp; - struct radix_tree_iter iter; - void __rcu **slot; - - slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index); - if (!slot) - goto err; - - bitmap = rcu_dereference_raw(*slot); - if (xa_is_value(bitmap)) { - btmp = (unsigned long *)slot; - offset += 1; /* Intimate knowledge of the value encoding */ - if (offset >= BITS_PER_LONG) - goto err; - } else { - btmp = bitmap->bitmap; - } - if (!test_bit(offset, btmp)) - goto err; - - __clear_bit(offset, btmp); - radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE); - if (xa_is_value(bitmap)) { - if (xa_to_value(rcu_dereference_raw(*slot)) == 0) - radix_tree_iter_delete(&ida->ida_rt, &iter, slot); - } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) { - kfree(bitmap); - radix_tree_iter_delete(&ida->ida_rt, &iter, slot); - } - return; - err: - WARN(1, "ida_free called for id=%d which is not allocated.\n", id); -} - -/** - * ida_destroy() - Free all IDs. - * @ida: IDA handle. - * - * Calling this function frees all IDs and releases all resources used - * by an IDA. When this call returns, the IDA is empty and can be reused - * or freed. If the IDA is already empty, there is no need to call this - * function. - * - * Context: Any context. - */ -void ida_destroy(struct ida *ida) -{ - unsigned long flags; - struct radix_tree_iter iter; - void __rcu **slot; - - xa_lock_irqsave(&ida->ida_rt, flags); - radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) { - struct ida_bitmap *bitmap = rcu_dereference_raw(*slot); - if (!xa_is_value(bitmap)) - kfree(bitmap); - radix_tree_iter_delete(&ida->ida_rt, &iter, slot); - } - xa_unlock_irqrestore(&ida->ida_rt, flags); -} -EXPORT_SYMBOL(ida_destroy); - /** * ida_alloc_range() - Allocate an unused ID. * @ida: IDA handle. @@ -518,8 +377,10 @@ EXPORT_SYMBOL(ida_destroy); int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max, gfp_t gfp) { - int id = 0; + XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS); + unsigned bit = min % IDA_BITMAP_BITS; unsigned long flags; + struct ida_bitmap *bitmap, *alloc = NULL; if ((int)min < 0) return -ENOSPC; @@ -527,22 +388,87 @@ int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max, if ((int)max < 0) max = INT_MAX; -again: - xa_lock_irqsave(&ida->ida_rt, flags); - id = ida_get_new_above(ida, min); - if (id > (int)max) { - ida_remove(ida, id); - id = -ENOSPC; - } - xa_unlock_irqrestore(&ida->ida_rt, flags); +retry: + xas_lock_irqsave(&xas, flags); +next: + bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK); + if (xas.xa_index > min / IDA_BITMAP_BITS) + bit = 0; + if (xas.xa_index * IDA_BITMAP_BITS + bit > max) + goto nospc; - if (unlikely(id == -EAGAIN)) { - if (!ida_pre_get(ida, gfp)) - return -ENOMEM; - goto again; + if (xa_is_value(bitmap)) { + unsigned long tmp = xa_to_value(bitmap); + + if (bit < BITS_PER_XA_VALUE) { + bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit); + if (xas.xa_index * IDA_BITMAP_BITS + bit > max) + goto nospc; + if (bit < BITS_PER_XA_VALUE) { + tmp |= 1UL << bit; + xas_store(&xas, xa_mk_value(tmp)); + goto out; + } + } + bitmap = alloc; + if (!bitmap) + bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT); + if (!bitmap) + goto alloc; + bitmap->bitmap[0] = tmp; + xas_store(&xas, bitmap); + if (xas_error(&xas)) { + bitmap->bitmap[0] = 0; + goto out; + } } - return id; + if (bitmap) { + bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit); + if (xas.xa_index * IDA_BITMAP_BITS + bit > max) + goto nospc; + if (bit == IDA_BITMAP_BITS) + goto next; + + __set_bit(bit, bitmap->bitmap); + if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) + xas_clear_mark(&xas, XA_FREE_MARK); + } else { + if (bit < BITS_PER_XA_VALUE) { + bitmap = xa_mk_value(1UL << bit); + } else { + bitmap = alloc; + if (!bitmap) + bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT); + if (!bitmap) + goto alloc; + __set_bit(bit, bitmap->bitmap); + } + xas_store(&xas, bitmap); + } +out: + xas_unlock_irqrestore(&xas, flags); + if (xas_nomem(&xas, gfp)) { + xas.xa_index = min / IDA_BITMAP_BITS; + bit = min % IDA_BITMAP_BITS; + goto retry; + } + if (bitmap != alloc) + kfree(alloc); + if (xas_error(&xas)) + return xas_error(&xas); + return xas.xa_index * IDA_BITMAP_BITS + bit; +alloc: + xas_unlock_irqrestore(&xas, flags); + alloc = kzalloc(sizeof(*bitmap), gfp); + if (!alloc) + return -ENOMEM; + xas_set(&xas, min / IDA_BITMAP_BITS); + bit = min % IDA_BITMAP_BITS; + goto retry; +nospc: + xas_unlock_irqrestore(&xas, flags); + return -ENOSPC; } EXPORT_SYMBOL(ida_alloc_range); @@ -555,11 +481,112 @@ EXPORT_SYMBOL(ida_alloc_range); */ void ida_free(struct ida *ida, unsigned int id) { + XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS); + unsigned bit = id % IDA_BITMAP_BITS; + struct ida_bitmap *bitmap; unsigned long flags; BUG_ON((int)id < 0); - xa_lock_irqsave(&ida->ida_rt, flags); - ida_remove(ida, id); - xa_unlock_irqrestore(&ida->ida_rt, flags); + + xas_lock_irqsave(&xas, flags); + bitmap = xas_load(&xas); + + if (xa_is_value(bitmap)) { + unsigned long v = xa_to_value(bitmap); + if (bit >= BITS_PER_XA_VALUE) + goto err; + if (!(v & (1UL << bit))) + goto err; + v &= ~(1UL << bit); + if (!v) + goto delete; + xas_store(&xas, xa_mk_value(v)); + } else { + if (!test_bit(bit, bitmap->bitmap)) + goto err; + __clear_bit(bit, bitmap->bitmap); + xas_set_mark(&xas, XA_FREE_MARK); + if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) { + kfree(bitmap); +delete: + xas_store(&xas, NULL); + } + } + xas_unlock_irqrestore(&xas, flags); + return; + err: + xas_unlock_irqrestore(&xas, flags); + WARN(1, "ida_free called for id=%d which is not allocated.\n", id); } EXPORT_SYMBOL(ida_free); + +/** + * ida_destroy() - Free all IDs. + * @ida: IDA handle. + * + * Calling this function frees all IDs and releases all resources used + * by an IDA. When this call returns, the IDA is empty and can be reused + * or freed. If the IDA is already empty, there is no need to call this + * function. + * + * Context: Any context. + */ +void ida_destroy(struct ida *ida) +{ + XA_STATE(xas, &ida->xa, 0); + struct ida_bitmap *bitmap; + unsigned long flags; + + xas_lock_irqsave(&xas, flags); + xas_for_each(&xas, bitmap, ULONG_MAX) { + if (!xa_is_value(bitmap)) + kfree(bitmap); + xas_store(&xas, NULL); + } + xas_unlock_irqrestore(&xas, flags); +} +EXPORT_SYMBOL(ida_destroy); + +#ifndef __KERNEL__ +extern void xa_dump_index(unsigned long index, unsigned int shift); +#define IDA_CHUNK_SHIFT ilog2(IDA_BITMAP_BITS) + +static void ida_dump_entry(void *entry, unsigned long index) +{ + unsigned long i; + + if (!entry) + return; + + if (xa_is_node(entry)) { + struct xa_node *node = xa_to_node(entry); + unsigned int shift = node->shift + IDA_CHUNK_SHIFT + + XA_CHUNK_SHIFT; + + xa_dump_index(index * IDA_BITMAP_BITS, shift); + xa_dump_node(node); + for (i = 0; i < XA_CHUNK_SIZE; i++) + ida_dump_entry(node->slots[i], + index | (i << node->shift)); + } else if (xa_is_value(entry)) { + xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG)); + pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry); + } else { + struct ida_bitmap *bitmap = entry; + + xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT); + pr_cont("bitmap: %p data", bitmap); + for (i = 0; i < IDA_BITMAP_LONGS; i++) + pr_cont(" %lx", bitmap->bitmap[i]); + pr_cont("\n"); + } +} + +static void ida_dump(struct ida *ida) +{ + struct xarray *xa = &ida->xa; + pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head, + xa->xa_flags >> ROOT_TAG_SHIFT); + ida_dump_entry(xa->xa_head, 0); +} +#endif diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3479d93c32b9..68702061464f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -255,54 +255,6 @@ static unsigned long next_index(unsigned long index, return (index & ~node_maxindex(node)) + (offset << node->shift); } -#ifndef __KERNEL__ -static void dump_ida_node(void *entry, unsigned long index) -{ - unsigned long i; - - if (!entry) - return; - - if (radix_tree_is_internal_node(entry)) { - struct radix_tree_node *node = entry_to_node(entry); - - pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n", - node, node->offset, index * IDA_BITMAP_BITS, - ((index | node_maxindex(node)) + 1) * - IDA_BITMAP_BITS - 1, - node->parent, node->tags[0][0], node->shift, - node->count); - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) - dump_ida_node(node->slots[i], - index | (i << node->shift)); - } else if (xa_is_value(entry)) { - pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n", - entry, (int)(index & RADIX_TREE_MAP_MASK), - index * IDA_BITMAP_BITS, - index * IDA_BITMAP_BITS + BITS_PER_XA_VALUE, - xa_to_value(entry)); - } else { - struct ida_bitmap *bitmap = entry; - - pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap, - (int)(index & RADIX_TREE_MAP_MASK), - index * IDA_BITMAP_BITS, - (index + 1) * IDA_BITMAP_BITS - 1); - for (i = 0; i < IDA_BITMAP_LONGS; i++) - pr_cont(" %lx", bitmap->bitmap[i]); - pr_cont("\n"); - } -} - -static void ida_dump(struct ida *ida) -{ - struct radix_tree_root *root = &ida->ida_rt; - pr_debug("ida: %p node %p free %d\n", ida, root->xa_head, - root->xa_flags >> ROOT_TAG_SHIFT); - dump_ida_node(root->xa_head, 0); -} -#endif - /* * This assumes that the caller has performed appropriate preallocation, and * that the caller has pinned this thread of control to the current CPU. @@ -2039,27 +1991,6 @@ void idr_preload(gfp_t gfp_mask) } EXPORT_SYMBOL(idr_preload); -int ida_pre_get(struct ida *ida, gfp_t gfp) -{ - /* - * The IDA API has no preload_end() equivalent. Instead, - * ida_get_new() can return -EAGAIN, prompting the caller - * to return to the ida_pre_get() step. - */ - if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE)) - preempt_enable(); - - if (!this_cpu_read(ida_bitmap)) { - struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp); - if (!bitmap) - return 0; - if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap)) - kfree(bitmap); - } - - return 1; -} - void __rcu **idr_get_free(struct radix_tree_root *root, struct radix_tree_iter *iter, gfp_t gfp, unsigned long max) @@ -2201,8 +2132,6 @@ static int radix_tree_cpu_dead(unsigned int cpu) kmem_cache_free(radix_tree_node_cachep, node); rtp->nr--; } - kfree(per_cpu(ida_bitmap, cpu)); - per_cpu(ida_bitmap, cpu) = NULL; return 0; } diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index a5a4494922a6..1b63bdb7688f 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -402,16 +402,15 @@ void ida_check_nomem(void) */ void ida_check_conv_user(void) { -#if 0 DEFINE_IDA(ida); unsigned long i; - radix_tree_cpu_dead(1); for (i = 0; i < 1000000; i++) { int id = ida_alloc(&ida, GFP_NOWAIT); if (id == -ENOMEM) { - IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) != - BITS_PER_XA_VALUE); + IDA_BUG_ON(&ida, ((i % IDA_BITMAP_BITS) != + BITS_PER_XA_VALUE) && + ((i % IDA_BITMAP_BITS) != 0)); id = ida_alloc(&ida, GFP_KERNEL); } else { IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) == @@ -420,7 +419,6 @@ void ida_check_conv_user(void) IDA_BUG_ON(&ida, id != i); } ida_destroy(&ida); -#endif } void ida_check_random(void)