""" Qemu task """ import contextlib import logging import os import yaml import time from packaging.version import Version from tasks import rbd from tasks.util.workunit import get_refspec_after_overrides from teuthology import contextutil from teuthology import misc as teuthology from teuthology.config import config as teuth_config from teuthology.orchestra import run from teuthology.packaging import install_package, remove_package log = logging.getLogger(__name__) DEFAULT_NUM_DISKS = 2 DEFAULT_IMAGE_URL = 'http://download.ceph.com/qa/ubuntu-12.04.qcow2' DEFAULT_IMAGE_SIZE = 10240 # in megabytes ENCRYPTION_HEADER_SIZE = 16 # in megabytes DEFAULT_CPUS = 1 DEFAULT_MEM = 4096 # in megabytes def normalize_disks(config): # normalize the 'disks' parameter into a list of dictionaries for role, client_config in config.items(): _, typ, id_ = teuthology.split_role(role) clone = client_config.get('clone', False) image_url = client_config.get('image_url', DEFAULT_IMAGE_URL) device_type = client_config.get('type', 'filesystem') encryption_format = client_config.get('encryption_format', 'none') parent_encryption_format = client_config.get( 'parent_encryption_format', 'none') disks = client_config.get('disks', DEFAULT_NUM_DISKS) if not isinstance(disks, list): disks = [{'image_name': '{typ}.{id_}.{num}'.format(typ=typ, id_=id_, num=i)} for i in range(int(disks))] client_config['disks'] = disks for i, disk in enumerate(disks): if 'action' not in disk: disk['action'] = 'create' assert disk['action'] in ['none', 'create', 'clone'], 'invalid disk action' assert disk['action'] != 'clone' or 'parent_name' in disk, 'parent_name required for clone' if 'image_size' not in disk: disk['image_size'] = DEFAULT_IMAGE_SIZE disk['image_size'] = int(disk['image_size']) if 'image_url' not in disk and i == 0: disk['image_url'] = image_url if 'device_type' not in disk: disk['device_type'] = device_type disk['device_letter'] = chr(ord('a') + i) if 'encryption_format' not in disk: if clone: disk['encryption_format'] = parent_encryption_format else: disk['encryption_format'] = encryption_format assert disk['encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format' assert disks, 'at least one rbd device must be used' if clone: for disk in disks: if disk['action'] != 'create': continue clone = dict(disk) clone['action'] = 'clone' clone['parent_name'] = clone['image_name'] clone['image_name'] += '-clone' del disk['device_letter'] clone['encryption_format'] = encryption_format assert clone['encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format' clone['parent_encryption_format'] = parent_encryption_format assert clone['parent_encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format' disks.append(clone) def create_images(ctx, config, managers): for role, client_config in config.items(): disks = client_config['disks'] for disk in disks: if disk.get('action') != 'create' or ( 'image_url' in disk and disk['encryption_format'] == 'none'): continue image_size = disk['image_size'] if disk['encryption_format'] != 'none': image_size += ENCRYPTION_HEADER_SIZE create_config = { role: { 'image_name': disk['image_name'], 'image_format': 2, 'image_size': image_size, 'encryption_format': disk['encryption_format'], } } managers.append( lambda create_config=create_config: rbd.create_image(ctx=ctx, config=create_config) ) def create_clones(ctx, config, managers): for role, client_config in config.items(): disks = client_config['disks'] for disk in disks: if disk['action'] != 'clone': continue create_config = { role: { 'image_name': disk['image_name'], 'parent_name': disk['parent_name'], 'encryption_format': disk['encryption_format'], } } managers.append( lambda create_config=create_config: rbd.clone_image(ctx=ctx, config=create_config) ) def create_encrypted_devices(ctx, config, managers): for role, client_config in config.items(): disks = client_config['disks'] for disk in disks: if (disk['encryption_format'] == 'none' and disk.get('parent_encryption_format', 'none') == 'none') or \ 'device_letter' not in disk: continue dev_config = {role: disk} managers.append( lambda dev_config=dev_config: rbd.dev_create(ctx=ctx, config=dev_config) ) @contextlib.contextmanager def create_dirs(ctx, config): """ Handle directory creation and cleanup """ testdir = teuthology.get_testdir(ctx) for role, client_config in config.items(): assert 'test' in client_config, 'You must specify a test to run' (remote,) = ctx.cluster.only(role).remotes.keys() remote.run( args=[ 'install', '-d', '-m0755', '--', '{tdir}/qemu'.format(tdir=testdir), '{tdir}/archive/qemu'.format(tdir=testdir), ] ) try: yield finally: for role, client_config in config.items(): assert 'test' in client_config, 'You must specify a test to run' (remote,) = ctx.cluster.only(role).remotes.keys() remote.run( args=[ 'rmdir', '{tdir}/qemu'.format(tdir=testdir), run.Raw('||'), 'true', ] ) @contextlib.contextmanager def install_block_rbd_driver(ctx, config): """ Make sure qemu rbd block driver (block-rbd.so) is installed """ packages = {} for role, _ in config.items(): (remote,) = ctx.cluster.only(role).remotes.keys() if remote.os.package_type == 'rpm': packages[role] = ['qemu-kvm-block-rbd'] else: packages[role] = ['qemu-block-extra', 'qemu-utils'] for pkg in packages[role]: install_package(pkg, remote) try: yield finally: for role, _ in config.items(): (remote,) = ctx.cluster.only(role).remotes.keys() for pkg in packages[role]: remove_package(pkg, remote) @contextlib.contextmanager def generate_iso(ctx, config): """Execute system commands to generate iso""" log.info('generating iso...') testdir = teuthology.get_testdir(ctx) # use ctx.config instead of config, because config has been # through teuthology.replace_all_with_clients() refspec = get_refspec_after_overrides(ctx.config, {}) git_url = teuth_config.get_ceph_qa_suite_git_url() log.info('Pulling tests from %s ref %s', git_url, refspec) for role, client_config in config.items(): assert 'test' in client_config, 'You must specify a test to run' test = client_config['test'] (remote,) = ctx.cluster.only(role).remotes.keys() clone_dir = '{tdir}/qemu_clone.{role}'.format(tdir=testdir, role=role) remote.run(args=refspec.clone(git_url, clone_dir)) src_dir = os.path.dirname(__file__) userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + role) metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + role) with open(os.path.join(src_dir, 'userdata_setup.yaml')) as f: test_setup = ''.join(f.readlines()) # configuring the commands to setup the nfs mount mnt_dir = "/export/{role}".format(role=role) test_setup = test_setup.format( mnt_dir=mnt_dir ) with open(os.path.join(src_dir, 'userdata_teardown.yaml')) as f: test_teardown = ''.join(f.readlines()) user_data = test_setup disks = client_config['disks'] for disk in disks: if disk['device_type'] != 'filesystem' or \ 'device_letter' not in disk or \ 'image_url' in disk: continue if disk['encryption_format'] == 'none' and \ disk.get('parent_encryption_format', 'none') == 'none': dev_name = 'vd' + disk['device_letter'] else: # encrypted disks use if=ide interface, instead of if=virtio dev_name = 'sd' + disk['device_letter'] user_data += """ - | #!/bin/bash mkdir /mnt/test_{dev_name} mkfs -t xfs /dev/{dev_name} mount -t xfs /dev/{dev_name} /mnt/test_{dev_name} """.format(dev_name=dev_name) user_data += """ - | #!/bin/bash test -d /etc/ceph || mkdir /etc/ceph cp /mnt/cdrom/ceph.* /etc/ceph/ """ cloud_config_archive = client_config.get('cloud_config_archive', []) if cloud_config_archive: user_data += yaml.safe_dump(cloud_config_archive, default_style='|', default_flow_style=False) # this may change later to pass the directories as args to the # script or something. xfstests needs that. user_data += """ - | #!/bin/bash test -d /mnt/test_b && cd /mnt/test_b /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success """ + test_teardown user_data = user_data.format( ceph_branch=ctx.config.get('branch'), ceph_sha1=ctx.config.get('sha1')) remote.write_file(userdata_path, user_data) with open(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f: remote.write_file(metadata_path, f) test_file = '{tdir}/qemu/{role}.test.sh'.format(tdir=testdir, role=role) cluster, _, _ = teuthology.split_role(role) log.info('fetching test %s for %s', test, role) remote.run( args=[ 'cp', '--', os.path.join(clone_dir, test), test_file, run.Raw('&&'), 'chmod', '755', test_file, ], ) remote.run( args=[ 'genisoimage', '-quiet', '-input-charset', 'utf-8', '-volid', 'cidata', '-joliet', '-rock', '-o', '{tdir}/qemu/{role}.iso'.format(tdir=testdir, role=role), '-graft-points', 'user-data={userdata}'.format(userdata=userdata_path), 'meta-data={metadata}'.format(metadata=metadata_path), 'ceph.conf=/etc/ceph/{cluster}.conf'.format(cluster=cluster), 'ceph.keyring=/etc/ceph/{cluster}.keyring'.format(cluster=cluster), 'test.sh={file}'.format(file=test_file), ], ) try: yield finally: for role in config.keys(): (remote,) = ctx.cluster.only(role).remotes.keys() remote.run( args=[ 'rm', '-rf', '{tdir}/qemu/{role}.iso'.format(tdir=testdir, role=role), os.path.join(testdir, 'qemu', 'userdata.' + role), os.path.join(testdir, 'qemu', 'metadata.' + role), '{tdir}/qemu/{role}.test.sh'.format(tdir=testdir, role=role), '{tdir}/qemu_clone.{role}'.format(tdir=testdir, role=role), ], ) @contextlib.contextmanager def download_image(ctx, config): """Downland base image, remove image file when done""" log.info('downloading base image') testdir = teuthology.get_testdir(ctx) client_base_files = {} for role, client_config in config.items(): (remote,) = ctx.cluster.only(role).remotes.keys() cluster, _, _ = teuthology.split_role(role) client_base_files[role] = [] disks = client_config['disks'] for disk in disks: if disk['action'] != 'create' or 'image_url' not in disk: continue base_file = '{tdir}/qemu/base.{name}.qcow2'.format(tdir=testdir, name=disk['image_name']) client_base_files[role].append(base_file) remote.run( args=[ 'wget', '-nv', '-O', base_file, disk['image_url'], ] ) if disk['encryption_format'] == 'none': remote.run( args=[ 'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw', base_file,'rbd:rbd/{image_name}:conf=/etc/ceph/{cluster}.conf'.format( image_name=disk['image_name'], cluster=cluster) ] ) else: dev_config = {role: {'image_name': disk['image_name'], 'encryption_format': disk['encryption_format']}} raw_file = '{tdir}/qemu/base.{name}.raw'.format( tdir=testdir, name=disk['image_name']) client_base_files[role].append(raw_file) remote.run( args=[ 'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw', base_file, raw_file ] ) with rbd.dev_create(ctx, dev_config): remote.run( args=[ 'dd', 'if={name}'.format(name=raw_file), 'of={name}'.format(name=dev_config[role]['device_path']), 'bs=4M', 'conv=fdatasync' ] ) cluster, _, _ = teuthology.split_role(role) for disk in disks: if disk['action'] == 'clone' or \ disk['encryption_format'] != 'none' or \ (disk['action'] == 'create' and 'image_url' not in disk): continue remote.run( args=[ 'rbd', '--cluster', cluster, 'resize', '--size={image_size}M'.format(image_size=disk['image_size']), disk['image_name'], run.Raw('||'), 'true' ] ) try: yield finally: log.debug('cleaning up base image files') for role, base_files in client_base_files.items(): (remote,) = ctx.cluster.only(role).remotes.keys() for base_file in base_files: remote.run( args=[ 'rm', '-f', base_file, ], ) def _setup_nfs_mount(remote, role, service_name, mount_dir): """ Sets up an nfs mount on the remote that the guest can use to store logs. This nfs mount is also used to touch a file at the end of the test to indicate if the test was successful or not. """ export_dir = "/export/{role}".format(role=role) log.info("Creating the nfs export directory...") remote.run(args=[ 'sudo', 'mkdir', '-p', export_dir, ]) log.info("Mounting the test directory...") remote.run(args=[ 'sudo', 'mount', '--bind', mount_dir, export_dir, ]) log.info("Adding mount to /etc/exports...") export = "{dir} *(rw,no_root_squash,no_subtree_check,insecure)".format( dir=export_dir ) log.info("Deleting export from /etc/exports...") remote.run(args=[ 'sudo', 'sed', '-i', "\|{export_dir}|d".format(export_dir=export_dir), '/etc/exports' ]) remote.run(args=[ 'echo', export, run.Raw("|"), 'sudo', 'tee', '-a', "/etc/exports", ]) log.info("Restarting NFS...") if remote.os.package_type == "deb": remote.run(args=['sudo', 'service', 'nfs-kernel-server', 'restart']) else: remote.run(args=['sudo', 'systemctl', 'restart', service_name]) def _teardown_nfs_mount(remote, role, service_name): """ Tears down the nfs mount on the remote used for logging and reporting the status of the tests being ran in the guest. """ log.info("Tearing down the nfs mount for {remote}".format(remote=remote)) export_dir = "/export/{role}".format(role=role) log.info("Stopping NFS...") if remote.os.package_type == "deb": remote.run(args=[ 'sudo', 'service', 'nfs-kernel-server', 'stop' ]) else: remote.run(args=[ 'sudo', 'systemctl', 'stop', service_name ]) log.info("Unmounting exported directory...") remote.run(args=[ 'sudo', 'umount', export_dir ]) log.info("Deleting export from /etc/exports...") remote.run(args=[ 'sudo', 'sed', '-i', "\|{export_dir}|d".format(export_dir=export_dir), '/etc/exports' ]) log.info("Starting NFS...") if remote.os.package_type == "deb": remote.run(args=[ 'sudo', 'service', 'nfs-kernel-server', 'start' ]) else: remote.run(args=[ 'sudo', 'systemctl', 'start', service_name ]) @contextlib.contextmanager def run_qemu(ctx, config): """Setup kvm environment and start qemu""" procs = [] testdir = teuthology.get_testdir(ctx) for role, client_config in config.items(): (remote,) = ctx.cluster.only(role).remotes.keys() log_dir = '{tdir}/archive/qemu/{role}'.format(tdir=testdir, role=role) remote.run( args=[ 'mkdir', log_dir, run.Raw('&&'), 'sudo', 'modprobe', 'kvm', ] ) nfs_service_name = 'nfs' if ( remote.os.name in ['rhel', 'centos'] and Version(remote.os.version.lower().removesuffix(".stream")) >= Version("8") ): nfs_service_name = 'nfs-server' # make an nfs mount to use for logging and to # allow to test to tell teuthology the tests outcome _setup_nfs_mount(remote, role, nfs_service_name, log_dir) # Hack to make sure /dev/kvm permissions are set correctly # See http://tracker.ceph.com/issues/17977 and # https://bugzilla.redhat.com/show_bug.cgi?id=1333159 remote.run(args='sudo udevadm control --reload') remote.run(args='sudo udevadm trigger /dev/kvm') remote.run(args='ls -l /dev/kvm') qemu_cmd = 'qemu-system-x86_64' if remote.os.package_type == "rpm": qemu_cmd = "/usr/libexec/qemu-kvm" args=[ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', qemu_cmd, '-enable-kvm', '-nographic', '-cpu', 'host', '-smp', str(client_config.get('cpus', DEFAULT_CPUS)), '-m', str(client_config.get('memory', DEFAULT_MEM)), # cd holding metadata for cloud-init '-cdrom', '{tdir}/qemu/{role}.iso'.format(tdir=testdir, role=role), ] cachemode = 'none' cluster, _, id_ = teuthology.split_role(role) ceph_config = ctx.ceph[cluster].conf.get('global', {}) ceph_config.update(ctx.ceph[cluster].conf.get('client', {})) if ceph_config.get('rbd cache', True): if ceph_config.get('rbd cache max dirty', 1) > 0: cachemode = 'writeback' else: cachemode = 'writethrough' disks = client_config['disks'] for disk in disks: if 'device_letter' not in disk: continue if disk['encryption_format'] == 'none' and \ disk.get('parent_encryption_format', 'none') == 'none': interface = 'virtio' disk_spec = 'rbd:rbd/{img}:conf=/etc/ceph/{cluster}.conf:id={id}'.format( img=disk['image_name'], cluster=cluster, id=id_) else: # encrypted disks use ide as a temporary workaround for # a bug in qemu when using virtio over nbd # TODO: use librbd encryption directly via qemu (not via nbd) interface = 'ide' disk_spec = disk['device_path'] args.extend([ '-drive', 'file={disk_spec},format=raw,if={interface},cache={cachemode}'.format( disk_spec=disk_spec, interface=interface, cachemode=cachemode, ), ]) time_wait = client_config.get('time_wait', 0) log.info('starting qemu...') procs.append( remote.run( args=args, logger=log.getChild(role), stdin=run.PIPE, wait=False, ) ) try: yield finally: log.info('waiting for qemu tests to finish...') run.wait(procs) if time_wait > 0: log.debug('waiting {time_wait} sec for workloads detect finish...'.format( time_wait=time_wait)); time.sleep(time_wait) log.debug('checking that qemu tests succeeded...') for role in config.keys(): (remote,) = ctx.cluster.only(role).remotes.keys() # ensure we have permissions to all the logs log_dir = '{tdir}/archive/qemu/{role}'.format(tdir=testdir, role=role) remote.run( args=[ 'sudo', 'chmod', 'a+rw', '-R', log_dir ] ) # teardown nfs mount _teardown_nfs_mount(remote, role, nfs_service_name) # check for test status remote.run( args=[ 'test', '-f', '{tdir}/archive/qemu/{role}/success'.format( tdir=testdir, role=role ), ], ) log.info("Deleting exported directory...") for role in config.keys(): (remote,) = ctx.cluster.only(role).remotes.keys() remote.run(args=[ 'sudo', 'rm', '-r', '/export' ]) @contextlib.contextmanager def task(ctx, config): """ Run a test inside of QEMU on top of rbd. Only one test is supported per client. For example, you can specify which clients to run on:: tasks: - ceph: - qemu: client.0: test: http://download.ceph.com/qa/test.sh client.1: test: http://download.ceph.com/qa/test2.sh Or use the same settings on all clients: tasks: - ceph: - qemu: all: test: http://download.ceph.com/qa/test.sh For tests that want to explicitly describe the RBD images to connect: tasks: - ceph: - qemu: client.0: test: http://download.ceph.com/qa/test.sh clone: True/False (optionally clone all created disks), image_url: (optional default image URL) type: filesystem / block (optional default device type) disks: [ { action: create / clone / none (optional, defaults to create) image_name: (optional) parent_name: (if action == clone), type: filesystem / block (optional, defaults to filesystem) image_url: (optional), image_size: (optional) encryption_format: luks1 / luks2 / none (optional, defaults to none) }, ... ] You can set the amount of CPUs and memory the VM has (default is 1 CPU and 4096 MB):: tasks: - ceph: - qemu: client.0: test: http://download.ceph.com/qa/test.sh cpus: 4 memory: 512 # megabytes If you need to configure additional cloud-config options, set cloud_config to the required data set:: tasks: - ceph - qemu: client.0: test: http://ceph.com/qa/test.sh cloud_config_archive: - | #/bin/bash touch foo1 - content: | test data type: text/plain filename: /tmp/data This task supports roles that include a ceph cluster, e.g.:: tasks: - ceph: - qemu: backup.client.0: [foo] client.1: [bar] # cluster is implicitly 'ceph' """ assert isinstance(config, dict), \ "task qemu only supports a dictionary for configuration" config = teuthology.replace_all_with_clients(ctx.cluster, config) normalize_disks(config) managers = [] create_images(ctx=ctx, config=config, managers=managers) managers.extend([ lambda: create_dirs(ctx=ctx, config=config), lambda: install_block_rbd_driver(ctx=ctx, config=config), lambda: generate_iso(ctx=ctx, config=config), lambda: download_image(ctx=ctx, config=config), ]) create_clones(ctx=ctx, config=config, managers=managers) create_encrypted_devices(ctx=ctx, config=config, managers=managers) managers.append( lambda: run_qemu(ctx=ctx, config=config), ) with contextutil.nested(*managers): yield