linux-brain/include/linux/frontswap.h
Vineeth Remanan Pillai b56a2d8af9 mm: rid swapoff of quadratic complexity
This patch was initially posted by Kelley Nielsen.  Reposting the patch
with all review comments addressed and with minor modifications and
optimizations.  Also, folding in the fixes offered by Hugh Dickins and
Huang Ying.  Tests were rerun and commit message updated with new
results.

try_to_unuse() is of quadratic complexity, with a lot of wasted effort.
It unuses swap entries one by one, potentially iterating over all the
page tables for all the processes in the system for each one.

This new proposed implementation of try_to_unuse simplifies its
complexity to linear.  It iterates over the system's mms once, unusing
all the affected entries as it walks each set of page tables.  It also
makes similar changes to shmem_unuse.

Improvement

swapoff was called on a swap partition containing about 6G of data, in a
VM(8cpu, 16G RAM), and calls to unuse_pte_range() were counted.

Present implementation....about 1200M calls(8min, avg 80% cpu util).
Prototype.................about  9.0K calls(3min, avg 5% cpu util).

Details

In shmem_unuse(), iterate over the shmem_swaplist and, for each
shmem_inode_info that contains a swap entry, pass it to
shmem_unuse_inode(), along with the swap type.  In shmem_unuse_inode(),
iterate over its associated xarray, and store the index and value of
each swap entry in an array for passing to shmem_swapin_page() outside
of the RCU critical section.

In try_to_unuse(), instead of iterating over the entries in the type and
unusing them one by one, perhaps walking all the page tables for all the
processes for each one, iterate over the mmlist, making one pass.  Pass
each mm to unuse_mm() to begin its page table walk, and during the walk,
unuse all the ptes that have backing store in the swap type received by
try_to_unuse().  After the walk, check the type for orphaned swap
entries with find_next_to_unuse(), and remove them from the swap cache.
If find_next_to_unuse() starts over at the beginning of the type, repeat
the check of the shmem_swaplist and the walk a maximum of three times.

Change unuse_mm() and the intervening walk functions down to
unuse_pte_range() to take the type as a parameter, and to iterate over
their entire range, calling the next function down on every iteration.
In unuse_pte_range(), make a swap entry from each pte in the range using
the passed in type.  If it has backing store in the type, call
swapin_readahead() to retrieve the page and pass it to unuse_pte().

Pass the count of pages_to_unuse down the page table walks in
try_to_unuse(), and return from the walk when the desired number of
pages has been swapped back in.

Link: http://lkml.kernel.org/r/20190114153129.4852-2-vpillai@digitalocean.com
Signed-off-by: Vineeth Remanan Pillai <vpillai@digitalocean.com>
Signed-off-by: Kelley Nielsen <kelleynnn@gmail.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-05 21:07:18 -08:00

123 lines
3.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_FRONTSWAP_H
#define _LINUX_FRONTSWAP_H
#include <linux/swap.h>
#include <linux/mm.h>
#include <linux/bitops.h>
#include <linux/jump_label.h>
/*
* Return code to denote that requested number of
* frontswap pages are unused(moved to page cache).
* Used in in shmem_unuse and try_to_unuse.
*/
#define FRONTSWAP_PAGES_UNUSED 2
struct frontswap_ops {
void (*init)(unsigned); /* this swap type was just swapon'ed */
int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
int (*load)(unsigned, pgoff_t, struct page *); /* load a page */
void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */
void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */
struct frontswap_ops *next; /* private pointer to next ops */
};
extern void frontswap_register_ops(struct frontswap_ops *ops);
extern void frontswap_shrink(unsigned long);
extern unsigned long frontswap_curr_pages(void);
extern void frontswap_writethrough(bool);
#define FRONTSWAP_HAS_EXCLUSIVE_GETS
extern void frontswap_tmem_exclusive_gets(bool);
extern bool __frontswap_test(struct swap_info_struct *, pgoff_t);
extern void __frontswap_init(unsigned type, unsigned long *map);
extern int __frontswap_store(struct page *page);
extern int __frontswap_load(struct page *page);
extern void __frontswap_invalidate_page(unsigned, pgoff_t);
extern void __frontswap_invalidate_area(unsigned);
#ifdef CONFIG_FRONTSWAP
extern struct static_key_false frontswap_enabled_key;
static inline bool frontswap_enabled(void)
{
return static_branch_unlikely(&frontswap_enabled_key);
}
static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
{
return __frontswap_test(sis, offset);
}
static inline void frontswap_map_set(struct swap_info_struct *p,
unsigned long *map)
{
p->frontswap_map = map;
}
static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
{
return p->frontswap_map;
}
#else
/* all inline routines become no-ops and all externs are ignored */
static inline bool frontswap_enabled(void)
{
return false;
}
static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
{
return false;
}
static inline void frontswap_map_set(struct swap_info_struct *p,
unsigned long *map)
{
}
static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
{
return NULL;
}
#endif
static inline int frontswap_store(struct page *page)
{
if (frontswap_enabled())
return __frontswap_store(page);
return -1;
}
static inline int frontswap_load(struct page *page)
{
if (frontswap_enabled())
return __frontswap_load(page);
return -1;
}
static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset)
{
if (frontswap_enabled())
__frontswap_invalidate_page(type, offset);
}
static inline void frontswap_invalidate_area(unsigned type)
{
if (frontswap_enabled())
__frontswap_invalidate_area(type);
}
static inline void frontswap_init(unsigned type, unsigned long *map)
{
#ifdef CONFIG_FRONTSWAP
__frontswap_init(type, map);
#endif
}
#endif /* _LINUX_FRONTSWAP_H */