blkcg: add tools/cgroup/iocost_coef_gen.py

Add a script which can be used to generate device-specific iocost
linear model coefficients.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Tejun Heo 2019-08-28 15:06:00 -07:00 committed by Jens Axboe
parent 6954ff185e
commit 8504dea783
3 changed files with 184 additions and 0 deletions

View File

@ -1529,6 +1529,9 @@ IO Interface Files
The IO cost model isn't expected to be accurate in absolute
sense and is scaled to the device behavior dynamically.
If needed, tools/cgroup/iocost_coef_gen.py can be used to
generate device-specific coefficients.
io.weight
A read-write flat-keyed file which exists on non-root cgroups.
The default is "default 100".

View File

@ -46,6 +46,9 @@
* If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
* device-specific coefficients.
*
* If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
* device-specific coefficients.
*
* 2. Control Strategy
*
* The device virtual time (vtime) is used as the primary control metric.

View File

@ -0,0 +1,178 @@
#!/usr/bin/env python3
#
# Copyright (C) 2019 Tejun Heo <tj@kernel.org>
# Copyright (C) 2019 Andy Newell <newella@fb.com>
# Copyright (C) 2019 Facebook
desc = """
Generate linear IO cost model coefficients used by the blk-iocost
controller. If the target raw testdev is specified, destructive tests
are performed against the whole device; otherwise, on
./iocost-coef-fio.testfile. The result can be written directly to
/sys/fs/cgroup/io.cost.model.
On high performance devices, --numjobs > 1 is needed to achieve
saturation.
See Documentation/admin-guide/cgroup-v2.rst and block/blk-iocost.c
for more details.
"""
import argparse
import re
import json
import glob
import os
import sys
import atexit
import shutil
import tempfile
import subprocess
parser = argparse.ArgumentParser(description=desc,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('--testdev', metavar='DEV',
help='Raw block device to use for testing, ignores --testfile-size')
parser.add_argument('--testfile-size-gb', type=float, metavar='GIGABYTES', default=16,
help='Testfile size in gigabytes (default: %(default)s)')
parser.add_argument('--duration', type=int, metavar='SECONDS', default=120,
help='Individual test run duration in seconds (default: %(default)s)')
parser.add_argument('--seqio-block-mb', metavar='MEGABYTES', type=int, default=128,
help='Sequential test block size in megabytes (default: %(default)s)')
parser.add_argument('--seq-depth', type=int, metavar='DEPTH', default=64,
help='Sequential test queue depth (default: %(default)s)')
parser.add_argument('--rand-depth', type=int, metavar='DEPTH', default=64,
help='Random test queue depth (default: %(default)s)')
parser.add_argument('--numjobs', type=int, metavar='JOBS', default=1,
help='Number of parallel fio jobs to run (default: %(default)s)')
parser.add_argument('--quiet', action='store_true')
parser.add_argument('--verbose', action='store_true')
def info(msg):
if not args.quiet:
print(msg)
def dbg(msg):
if args.verbose and not args.quiet:
print(msg)
# determine ('DEVNAME', 'MAJ:MIN') for @path
def dir_to_dev(path):
# find the block device the current directory is on
devname = subprocess.run(f'findmnt -nvo SOURCE -T{path}',
stdout=subprocess.PIPE, shell=True).stdout
devname = os.path.basename(devname).decode('utf-8').strip()
# partition -> whole device
parents = glob.glob('/sys/block/*/' + devname)
if len(parents):
devname = os.path.basename(os.path.dirname(parents[0]))
rdev = os.stat(f'/dev/{devname}').st_rdev
return (devname, f'{os.major(rdev)}:{os.minor(rdev)}')
def create_testfile(path, size):
global args
if os.path.isfile(path) and os.stat(path).st_size == size:
return
info(f'Creating testfile {path}')
subprocess.check_call(f'rm -f {path}', shell=True)
subprocess.check_call(f'touch {path}', shell=True)
subprocess.call(f'chattr +C {path}', shell=True)
subprocess.check_call(
f'pv -s {size} -pr /dev/urandom {"-q" if args.quiet else ""} | '
f'dd of={path} count={size} '
f'iflag=count_bytes,fullblock oflag=direct bs=16M status=none',
shell=True)
def run_fio(testfile, duration, iotype, iodepth, blocksize, jobs):
global args
eta = 'never' if args.quiet else 'always'
outfile = tempfile.NamedTemporaryFile()
cmd = (f'fio --direct=1 --ioengine=libaio --name=coef '
f'--filename={testfile} --runtime={round(duration)} '
f'--readwrite={iotype} --iodepth={iodepth} --blocksize={blocksize} '
f'--eta={eta} --output-format json --output={outfile.name} '
f'--time_based --numjobs={jobs}')
if args.verbose:
dbg(f'Running {cmd}')
subprocess.check_call(cmd, shell=True)
with open(outfile.name, 'r') as f:
d = json.loads(f.read())
return sum(j['read']['bw_bytes'] + j['write']['bw_bytes'] for j in d['jobs'])
def restore_elevator_nomerges():
global elevator_path, nomerges_path, elevator, nomerges
info(f'Restoring elevator to {elevator} and nomerges to {nomerges}')
with open(elevator_path, 'w') as f:
f.write(elevator)
with open(nomerges_path, 'w') as f:
f.write(nomerges)
args = parser.parse_args()
missing = False
for cmd in [ 'findmnt', 'pv', 'dd', 'fio' ]:
if not shutil.which(cmd):
print(f'Required command "{cmd}" is missing', file=sys.stderr)
missing = True
if missing:
sys.exit(1)
if args.testdev:
devname = os.path.basename(args.testdev)
rdev = os.stat(f'/dev/{devname}').st_rdev
devno = f'{os.major(rdev)}:{os.minor(rdev)}'
testfile = f'/dev/{devname}'
info(f'Test target: {devname}({devno})')
else:
devname, devno = dir_to_dev('.')
testfile = 'iocost-coef-fio.testfile'
testfile_size = int(args.testfile_size_gb * 2 ** 30)
create_testfile(testfile, testfile_size)
info(f'Test target: {testfile} on {devname}({devno})')
elevator_path = f'/sys/block/{devname}/queue/scheduler'
nomerges_path = f'/sys/block/{devname}/queue/nomerges'
with open(elevator_path, 'r') as f:
elevator = re.sub(r'.*\[(.*)\].*', r'\1', f.read().strip())
with open(nomerges_path, 'r') as f:
nomerges = f.read().strip()
info(f'Temporarily disabling elevator and merges')
atexit.register(restore_elevator_nomerges)
with open(elevator_path, 'w') as f:
f.write('none')
with open(nomerges_path, 'w') as f:
f.write('1')
info('Determining rbps...')
rbps = run_fio(testfile, args.duration, 'read',
1, args.seqio_block_mb * (2 ** 20), args.numjobs)
info(f'\nrbps={rbps}, determining rseqiops...')
rseqiops = round(run_fio(testfile, args.duration, 'read',
args.seq_depth, 4096, args.numjobs) / 4096)
info(f'\nrseqiops={rseqiops}, determining rrandiops...')
rrandiops = round(run_fio(testfile, args.duration, 'randread',
args.rand_depth, 4096, args.numjobs) / 4096)
info(f'\nrrandiops={rrandiops}, determining wbps...')
wbps = run_fio(testfile, args.duration, 'write',
1, args.seqio_block_mb * (2 ** 20), args.numjobs)
info(f'\nwbps={wbps}, determining wseqiops...')
wseqiops = round(run_fio(testfile, args.duration, 'write',
args.seq_depth, 4096, args.numjobs) / 4096)
info(f'\nwseqiops={wseqiops}, determining wrandiops...')
wrandiops = round(run_fio(testfile, args.duration, 'randwrite',
args.rand_depth, 4096, args.numjobs) / 4096)
info(f'\nwrandiops={wrandiops}')
restore_elevator_nomerges()
atexit.unregister(restore_elevator_nomerges)
info('')
print(f'{devno} rbps={rbps} rseqiops={rseqiops} rrandiops={rrandiops} '
f'wbps={wbps} wseqiops={wseqiops} wrandiops={wrandiops}')