linux-brain/drivers/gpu/drm/i915/i915_utils.h
Mel Gorman 70b44595ea mm, compaction: use free lists to quickly locate a migration source
The migration scanner is a linear scan of a zone with a potentiall large
search space.  Furthermore, many pageblocks are unusable such as those
filled with reserved pages or partially filled with pages that cannot
migrate.  These still get scanned in the common case of allocating a THP
and the cost accumulates.

The patch uses a partial search of the free lists to locate a migration
source candidate that is marked as MOVABLE when allocating a THP.  It
prefers picking a block with a larger number of free pages already on
the basis that there are fewer pages to migrate to free the entire
block.  The lowest PFN found during searches is tracked as the basis of
the start for the linear search after the first search of the free list
fails.  After the search, the free list is shuffled so that the next
search will not encounter the same page.  If the search fails then the
subsequent searches will be shorter and the linear scanner is used.

If this search fails, or if the request is for a small or
unmovable/reclaimable allocation then the linear scanner is still used.
It is somewhat pointless to use the list search in those cases.  Small
free pages must be used for the search and there is no guarantee that
movable pages are located within that block that are contiguous.

                                     5.0.0-rc1              5.0.0-rc1
                                 noboost-v3r10          findmig-v3r15
Amean     fault-both-3      3771.41 (   0.00%)     3390.40 (  10.10%)
Amean     fault-both-5      5409.05 (   0.00%)     5082.28 (   6.04%)
Amean     fault-both-7      7040.74 (   0.00%)     7012.51 (   0.40%)
Amean     fault-both-12    11887.35 (   0.00%)    11346.63 (   4.55%)
Amean     fault-both-18    16718.19 (   0.00%)    15324.19 (   8.34%)
Amean     fault-both-24    21157.19 (   0.00%)    16088.50 *  23.96%*
Amean     fault-both-30    21175.92 (   0.00%)    18723.42 *  11.58%*
Amean     fault-both-32    21339.03 (   0.00%)    18612.01 *  12.78%*

                                5.0.0-rc1              5.0.0-rc1
                            noboost-v3r10          findmig-v3r15
Percentage huge-3        86.50 (   0.00%)       89.83 (   3.85%)
Percentage huge-5        92.52 (   0.00%)       91.96 (  -0.61%)
Percentage huge-7        92.44 (   0.00%)       92.85 (   0.44%)
Percentage huge-12       92.98 (   0.00%)       92.74 (  -0.25%)
Percentage huge-18       91.70 (   0.00%)       91.71 (   0.02%)
Percentage huge-24       91.59 (   0.00%)       92.13 (   0.60%)
Percentage huge-30       90.14 (   0.00%)       93.79 (   4.04%)
Percentage huge-32       90.03 (   0.00%)       91.27 (   1.37%)

This shows an improvement in allocation latencies with similar
allocation success rates.  While not presented, there was a 31%
reduction in migration scanning and a 8% reduction on system CPU usage.
A 2-socket machine showed similar benefits.

[mgorman@techsingularity.net: several fixes]
  Link: http://lkml.kernel.org/r/20190204120111.GL9565@techsingularity.net
[vbabka@suse.cz: migrate block that was found-fast, some optimisations]
Link: http://lkml.kernel.org/r/20190118175136.31341-10-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <Vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-05 21:07:16 -08:00

162 lines
4.5 KiB
C

/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_UTILS_H
#define __I915_UTILS_H
#undef WARN_ON
/* Many gcc seem to no see through this and fall over :( */
#if 0
#define WARN_ON(x) ({ \
bool __i915_warn_cond = (x); \
if (__builtin_constant_p(__i915_warn_cond)) \
BUILD_BUG_ON(__i915_warn_cond); \
WARN(__i915_warn_cond, "WARN_ON(" #x ")"); })
#else
#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
#endif
#undef WARN_ON_ONCE
#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")")
#define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
__stringify(x), (long)(x))
#if defined(GCC_VERSION) && GCC_VERSION >= 70000
#define add_overflows_t(T, A, B) \
__builtin_add_overflow_p((A), (B), (T)0)
#else
#define add_overflows_t(T, A, B) ({ \
typeof(A) a = (A); \
typeof(B) b = (B); \
(T)(a + b) < a; \
})
#endif
#define add_overflows(A, B) \
add_overflows_t(typeof((A) + (B)), (A), (B))
#define range_overflows(start, size, max) ({ \
typeof(start) start__ = (start); \
typeof(size) size__ = (size); \
typeof(max) max__ = (max); \
(void)(&start__ == &size__); \
(void)(&start__ == &max__); \
start__ > max__ || size__ > max__ - start__; \
})
#define range_overflows_t(type, start, size, max) \
range_overflows((type)(start), (type)(size), (type)(max))
/* Note we don't consider signbits :| */
#define overflows_type(x, T) \
(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
#define ptr_mask_bits(ptr, n) ({ \
unsigned long __v = (unsigned long)(ptr); \
(typeof(ptr))(__v & -BIT(n)); \
})
#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1))
#define ptr_unpack_bits(ptr, bits, n) ({ \
unsigned long __v = (unsigned long)(ptr); \
*(bits) = __v & (BIT(n) - 1); \
(typeof(ptr))(__v & -BIT(n)); \
})
#define ptr_pack_bits(ptr, bits, n) ({ \
unsigned long __bits = (bits); \
GEM_BUG_ON(__bits & -BIT(n)); \
((typeof(ptr))((unsigned long)(ptr) | __bits)); \
})
#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
#define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
#define fetch_and_zero(ptr) ({ \
typeof(*ptr) __T = *(ptr); \
*(ptr) = (typeof(*ptr))0; \
__T; \
})
static inline u64 ptr_to_u64(const void *ptr)
{
return (uintptr_t)ptr;
}
#define u64_to_ptr(T, x) ({ \
typecheck(u64, x); \
(T *)(uintptr_t)(x); \
})
#define __mask_next_bit(mask) ({ \
int __idx = ffs(mask) - 1; \
mask &= ~BIT(__idx); \
__idx; \
})
#include <linux/list.h>
static inline void __list_del_many(struct list_head *head,
struct list_head *first)
{
first->prev = head;
WRITE_ONCE(head->next, first);
}
/*
* Wait until the work is finally complete, even if it tries to postpone
* by requeueing itself. Note, that if the worker never cancels itself,
* we will spin forever.
*/
static inline void drain_delayed_work(struct delayed_work *dw)
{
do {
while (flush_delayed_work(dw))
;
} while (delayed_work_pending(dw));
}
static inline const char *yesno(bool v)
{
return v ? "yes" : "no";
}
static inline const char *onoff(bool v)
{
return v ? "on" : "off";
}
static inline const char *enableddisabled(bool v)
{
return v ? "enabled" : "disabled";
}
#endif /* !__I915_UTILS_H */