nova虚机保存自定义镜像代码分析

本文主要是介绍nova虚机保存自定义镜像代码分析，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

最近公司nova虚机保存自定义镜像时失败了，所以从代码侧梳理一下整个流程

虚机保存自定义镜像

#nova image-create {{INSTANCE_UUID}} {{NEW_IMAGE_NAME}}

nova/api/openstack/compute/servers.py

请求api 的_action_create_image ，这里查询instance以及bdms，因为未开enable_snapshot_volume_backed，所以执行了self.compute_api.snapshot

@wsgi.response(202)@wsgi.expected_errors((400, 403, 404, 409))@wsgi.action('createImage')@validation.schema(schema_servers.create_image, '2.0', '2.0')@validation.schema(schema_servers.create_image, '2.1')def _action_create_image(self, req, id, body):"""Snapshot a server instance."""context = req.environ['nova.context']context.can(server_policies.SERVERS % 'create_image')entity = body["createImage"]image_name = common.normalize_name(entity["name"])metadata = entity.get('metadata', {})# Starting from microversion 2.39 we don't check quotas on createImageif api_version_request.is_supported(req, max_version=api_version_request.MAX_IMAGE_META_PROXY_API_VERSION):common.check_img_metadata_properties_quota(context, metadata)instance = self._get_server(context, req, id)bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(context, instance.uuid)try:if CONF.api.enable_snapshot_volume_backed and \compute_utils.is_volume_backed_instance(context, instance,bdms):context.can(server_policies.SERVERS %'create_image:allow_volume_backed')image = self.compute_api.snapshot_volume_backed(context,instance,image_name,extra_properties=metadata)else:image = self.compute_api.snapshot(context,instance,image_name,extra_properties=metadata)except exception.InstanceUnknownCell as e:raise exc.HTTPNotFound(explanation=e.format_message())except exception.InstanceInvalidState as state_error:common.raise_http_conflict_for_instance_invalid_state(state_error,'createImage', id)except exception.Invalid as err:raise exc.HTTPBadRequest(explanation=err.format_message())except exception.OverQuota as e:raise exc.HTTPForbidden(explanation=e.format_message())# Starting with microversion 2.45 we return a response body containing# the snapshot image id without the Location header.if api_version_request.is_supported(req, '2.45'):return {'image_id': image['id']}# build location of newly-created image entityimage_id = str(image['id'])image_ref = image_api.API().generate_image_url(image_id, context)resp = webob.Response(status_int=202)resp.headers['Location'] = image_refreturn resp

nova/compute/api.py

调用 compute_utils.create_image 先调glance 创建了image，给instance创建action记录，最后携带参数虚机uuid和镜像uuid 发送rpc 请求 snapshot_instance，

    def snapshot(self, context, instance, name, extra_properties=None):"""Snapshot the given instance.:param instance: nova.objects.instance.Instance object:param name: name of the snapshot:param extra_properties: dict of extra image properties to includewhen creating the image.:returns: A dict containing image metadata"""image_meta = compute_utils.create_image(context, instance, name, 'snapshot', self.image_api,extra_properties=extra_properties)# NOTE(comstud): Any changes to this method should also be made# to the snapshot_instance() method in nova/cells/messaging.pyinstance.task_state = task_states.IMAGE_SNAPSHOT_PENDINGtry:instance.save(expected_task_state=[None])except (exception.InstanceNotFound,exception.UnexpectedDeletingTaskStateError) as ex:# Changing the instance task state to use in raising the# InstanceInvalidException belowLOG.debug('Instance disappeared during snapshot.',instance=instance)try:image_id = image_meta['id']self.image_api.delete(context, image_id)LOG.info('Image %s deleted because instance ''deleted before snapshot started.',image_id, instance=instance)except exception.ImageNotFound:passexcept Exception as exc:LOG.warning("Error while trying to clean up image %(img_id)s: ""%(error_msg)s",{"img_id": image_meta['id'],"error_msg": six.text_type(exc)})attr = 'task_state'state = task_states.DELETINGif type(ex) == exception.InstanceNotFound:attr = 'vm_state'state = vm_states.DELETEDraise exception.InstanceInvalidState(attr=attr,instance_uuid=instance.uuid,state=state,method='snapshot')self._record_action_start(context, instance,instance_actions.CREATE_IMAGE)self.compute_rpcapi.snapshot_instance(context, instance,image_meta['id'])return image_meta

nova/compute/manager.py

改变虚机状态，最终调用 self.driver.snapshot 进行打快照

    def snapshot_instance(self, context, image_id, instance):"""Snapshot an instance on this host.:param context: security context:param image_id: glance.db.sqlalchemy.models.Image.Id:param instance: a nova.objects.instance.Instance object"""# NOTE(dave-mcnally) the task state will already be set by the api# but if the compute manager has crashed/been restarted prior to the# request getting here the task state may have been cleared so we set# it again and things continue normallytry:instance.task_state = task_states.IMAGE_SNAPSHOTinstance.save(expected_task_state=task_states.IMAGE_SNAPSHOT_PENDING)except exception.InstanceNotFound:# possibility instance no longer exists, no point in continuingLOG.debug("Instance not found, could not set state %s ""for instance.",task_states.IMAGE_SNAPSHOT, instance=instance)returnexcept exception.UnexpectedDeletingTaskStateError:LOG.debug("Instance being deleted, snapshot cannot continue",instance=instance)returnself._snapshot_instance(context, image_id, instance,task_states.IMAGE_SNAPSHOT)

 def _snapshot_instance(self, context, image_id, instance,expected_task_state):context = context.elevated()instance.power_state = self._get_power_state(context, instance)try:instance.save()LOG.info('instance snapshotting', instance=instance)if instance.power_state != power_state.RUNNING:state = instance.power_staterunning = power_state.RUNNINGLOG.warning('trying to snapshot a non-running instance: ''(state: %(state)s expected: %(running)s)',{'state': state, 'running': running},instance=instance)self._notify_about_instance_usage(context, instance, "snapshot.start")compute_utils.notify_about_instance_snapshot(context, instance,self.host, phase=fields.NotificationPhase.START,snapshot_image_id=image_id)def update_task_state(task_state,expected_state=expected_task_state):instance.task_state = task_stateinstance.save(expected_task_state=expected_state)with timeutils.StopWatch() as timer:self.driver.snapshot(context, instance, image_id,update_task_state)LOG.info('Took %0.2f seconds to snapshot the instance on ''the hypervisor.', timer.elapsed(), instance=instance)instance.task_state = Noneinstance.save(expected_task_state=task_states.IMAGE_UPLOADING)self._notify_about_instance_usage(context, instance,"snapshot.end")compute_utils.notify_about_instance_snapshot(context, instance,self.host, phase=fields.NotificationPhase.END,snapshot_image_id=image_id)except (exception.InstanceNotFound,exception.UnexpectedDeletingTaskStateError):# the instance got deleted during the snapshot# Quickly bail out of heremsg = 'Instance disappeared during snapshot'LOG.debug(msg, instance=instance)try:image = self.image_api.get(context, image_id)if image['status'] != 'active':self.image_api.delete(context, image_id)except exception.ImageNotFound:LOG.debug('Image not found during clean up %s', image_id)except Exception:LOG.warning("Error while trying to clean up image %s",image_id, instance=instance)except exception.ImageNotFound:instance.task_state = Noneinstance.save()LOG.warning("Image not found during snapshot", instance=instance)

nova/virt/libvirt/driver.py

调用了libvirt的snapshot功能，这块代码比较多哈，最后调用 root_disk.direct_snapshot 对ceph 块打了快照获取backend url，同时调用image api update了location地址，

 def snapshot(self, context, instance, image_id, update_task_state):"""Create snapshot from a running VM instance.This command only works with qemu 0.14+"""try:guest = self._host.get_guest(instance)# TODO(sahid): We are converting all calls from a# virDomain object to use nova.virt.libvirt.Guest.# We should be able to remove virt_dom at the end.virt_dom = guest._domainexcept exception.InstanceNotFound:raise exception.InstanceNotRunning(instance_id=instance.uuid)snapshot = self._image_api.get(context, image_id)# source_format is an on-disk format# source_type is a backend typedisk_path, source_format = libvirt_utils.find_disk(guest)source_type = libvirt_utils.get_disk_type_from_path(disk_path)# We won't have source_type for raw or qcow2 disks, because we can't# determine that from the path. We should have it from the libvirt# xml, though.if source_type is None:source_type = source_format# For lxc instances we won't have it either from libvirt xml# (because we just gave libvirt the mounted filesystem), or the path,# so source_type is still going to be None. In this case,# root_disk is going to default to CONF.libvirt.images_type# below, which is still safe.image_format = CONF.libvirt.snapshot_image_format or source_type# NOTE(bfilippov): save lvm and rbd as rawif image_format == 'lvm' or image_format == 'rbd':image_format = 'raw'metadata = self._create_snapshot_metadata(instance.image_meta,instance,image_format,snapshot['name'])snapshot_name = uuidutils.generate_uuid(dashed=False)state = guest.get_power_state(self._host)# NOTE(dgenin): Instances with LVM encrypted ephemeral storage require#               cold snapshots. Currently, checking for encryption is#               redundant because LVM supports only cold snapshots.#               It is necessary in case this situation changes in the#               future.if (self._host.has_min_version(hv_type=host.HV_DRIVER_QEMU)and source_type not in ('lvm', 'qcow2')and not CONF.ephemeral_storage_encryption.enabledand not CONF.workarounds.disable_libvirt_livesnapshot# NOTE(rmk): We cannot perform live snapshots when a# managedSave file is present, so we will use the cold/legacy# method for instances which are shutdown or paused.# NOTE(mriedem): Live snapshot doesn't work with paused# instances on older versions of libvirt/qemu. We can likely# remove the restriction on PAUSED once we require# libvirt>=3.6.0 and qemu>=2.10 since that works with the# Pike Ubuntu Cloud Archive testing in Queens.and state not in (power_state.SHUTDOWN, power_state.PAUSED)):live_snapshot = True# Abort is an idempotent operation, so make sure any block# jobs which may have failed are ended. This operation also# confirms the running instance, as opposed to the system as a# whole, has a new enough version of the hypervisor (bug 1193146).try:guest.get_block_device(disk_path).abort_job()except libvirt.libvirtError as ex:error_code = ex.get_error_code()if error_code == libvirt.VIR_ERR_CONFIG_UNSUPPORTED:live_snapshot = Falseelse:passelse:live_snapshot = Falseself._prepare_domain_for_snapshot(context, live_snapshot, state,instance)ceph_conf = dict()if source_type == "rbd":bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(context, instance.uuid)for bdm in bdms:if bdm.get('boot_index', -1) == 0 and bdm.destination_type == 'volume':connection_info = jsonutils.loads(bdm.connection_info)ceph_conf["hosts"] = connection_info["data"]["hosts"]ceph_conf["auth_username"] = connection_info["data"]["auth_username"]LOG.info("Get ceph conf {ceph_conf}".format(ceph_conf=ceph_conf))instance.ceph_conf = ceph_confvolume = self._volume_api.get(context, bdm['volume_id'])if volume.has_key('volume_image_metadata') and \volume['volume_image_metadata'].get("stores", None):metadata["backend"] = volume['volume_image_metadata']['stores']breakif bdm.get('boot_index', -1) == 0 and bdm.destination_type == 'local':if CONF.libvirt.glance_backend_config:metadata["backend"] = CONF.libvirt.glance_backend_configroot_disk = self.image_backend.by_libvirt_path(instance, disk_path, image_type=source_type)LOG.info("Display image information, such as: instance is {instance}, disk_path is {disk_path}, source_format is {source_format}, source_type is {source_type}, image_format is {image_format}".format(instance=instance, disk_path=disk_path, source_format=source_format, source_type=source_type, image_format=image_format))if live_snapshot:LOG.info("Beginning live snapshot process", instance=instance)else:LOG.info("Beginning cold snapshot process", instance=instance)update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD)update_task_state(task_state=task_states.IMAGE_UPLOADING,expected_state=task_states.IMAGE_PENDING_UPLOAD)try:metadata['location'] = root_disk.direct_snapshot(context, snapshot_name, image_format, image_id,instance.image_ref)if CONF.enable_verify_image_md5sum:metadata['properties']['md5sum'] = \root_disk.calc_image_md5sum(metadata['location'])self._snapshot_domain(context, live_snapshot, virt_dom, state,instance)self._image_api.update(context, image_id, metadata,purge_props=False)except (NotImplementedError, exception.ImageUnacceptable,exception.Forbidden) as e:if type(e) != NotImplementedError:LOG.warning('Performing standard snapshot because direct ''snapshot failed: %(error)s',{'error': encodeutils.exception_to_unicode(e)})failed_snap = metadata.pop('location', None)if failed_snap:failed_snap = {'url': str(failed_snap)}root_disk.cleanup_direct_snapshot(failed_snap,also_destroy_volume=True,ignore_errors=True)update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD,expected_state=task_states.IMAGE_UPLOADING)# TODO(nic): possibly abstract this out to the root_diskif source_type == 'rbd' and live_snapshot:# Standard snapshot uses qemu-img convert from RBD which is# not safe to run with live_snapshot.live_snapshot = False# Suspend the guest, so this is no longer a live snapshotself._prepare_domain_for_snapshot(context, live_snapshot,state, instance)snapshot_directory = CONF.libvirt.snapshots_directoryfileutils.ensure_tree(snapshot_directory)with utils.tempdir(dir=snapshot_directory) as tmpdir:try:out_path = os.path.join(tmpdir, snapshot_name)if live_snapshot:# NOTE(xqueralt): libvirt needs o+x in the tempdiros.chmod(tmpdir, 0o701)self._live_snapshot(context, instance, guest,disk_path, out_path, source_format,image_format, instance.image_meta)if source_format == 'qcow2' and \CONF.libvirt.convert_ceph_raw_for_qcow2:snapshot_path = out_path + '.delta'# use qemu-img convert to convert qcow2 img to ceph rawmetadata['location'] = root_disk.convert_to_ceph(context, snapshot_path,image_format, image_id)if CONF.enable_verify_image_md5sum:metadata['properties']['md5sum'] = \root_disk.calc_image_md5sum(metadata['location'])else:if source_format == 'qcow2' and \CONF.libvirt.convert_ceph_raw_for_qcow2:metadata['location'] = root_disk.convert_to_ceph(context, None,image_format, image_id)if CONF.enable_verify_image_md5sum:metadata['properties']['md5sum'] = \root_disk.calc_image_md5sum(metadata['location'])else:root_disk.snapshot_extract(out_path, image_format)if not CONF.libvirt.convert_ceph_raw_for_qcow2:LOG.info("Snapshot extracted, beginning image upload",instance=instance)except libvirt.libvirtError as ex:error_code = ex.get_error_code()if error_code == libvirt.VIR_ERR_NO_DOMAIN:LOG.info('Instance %(instance_name)s disappeared ''while taking snapshot of it: [Error Code ''%(error_code)s] %(ex)s',{'instance_name': instance.name,'error_code': error_code,'ex': ex},instance=instance)raise exception.InstanceNotFound(instance_id=instance.uuid)else:raisefinally:self._snapshot_domain(context, live_snapshot, virt_dom,state, instance)update_task_state(task_state=task_states.IMAGE_UPLOADING,expected_state=task_states.IMAGE_PENDING_UPLOAD)if source_format == 'qcow2' and \CONF.libvirt.convert_ceph_raw_for_qcow2: self._image_api.update(context, image_id, metadata,purge_props=False)else:# Upload that image to the image serviceLOG.debug('upload to glance, out_path:%s', out_path)with libvirt_utils.file_open(out_path, 'rb') as image_file:# execute operation with disk concurrency semaphorewith compute_utils.disk_ops_semaphore:self._image_api.update(context,image_id,metadata,image_file)except Exception:with excutils.save_and_reraise_exception():LOG.exception(_("Failed to snapshot image"))failed_snap = metadata.pop('location', None)if failed_snap:failed_snap = {'url': str(failed_snap)}root_disk.cleanup_direct_snapshot(failed_snap, also_destroy_volume=True,ignore_errors=True)LOG.info("Snapshot image upload complete", instance=instance)

    def direct_snapshot(self, context, snapshot_name, image_format,image_id, base_image_id):"""Creates an RBD snapshot directly."""fsid = self.driver.get_fsid()# NOTE(nic): Nova has zero comprehension of how Glance's image store# is configured, but we can infer what storage pool Glance is using# by looking at the parent image.  If using authx, write access should# be enabled on that pool for the Nova user#parent_pool = self._get_parent_pool(context, base_image_id, fsid)# use instance pool to save imageparent_pool = self.pool LOG.debug('self.path:%s, self.pool:%s', self.path, self.pool)# Snapshot the disk and clone it into Glance's storage pool.  librbd# requires that snapshots be set to "protected" in order to clone themself.driver.create_snap(self.rbd_name, snapshot_name, protect=True)location = {'url': 'rbd://%(fsid)s/%(pool)s/%(image)s/%(snap)s' %dict(fsid=fsid,pool=self.pool,image=self.rbd_name,snap=snapshot_name)}try:self.driver.clone(location, image_id, dest_pool=parent_pool)# Flatten the image, which detaches it from the source snapshotself.driver.flatten(image_id, pool=parent_pool)finally:# all done with the source snapshot, clean it upself.cleanup_direct_snapshot(location)# Glance makes a protected snapshot called 'snap' on uploaded# images and hands it out, so we'll do that too.  The name of# the snapshot doesn't really matter, this just uses what the# glance-store rbd backend sets (which is not configurable).self.driver.create_snap(image_id, 'snap', pool=parent_pool,protect=True)return ('rbd://%(fsid)s/%(pool)s/%(image)s/snap' %dict(fsid=fsid, pool=parent_pool, image=image_id))

这篇关于nova虚机保存自定义镜像代码分析的文章就介绍到这儿，希望我们推荐的文章对编程师们有所帮助！