Jim Fehlig
2018-12-10 18:01:45 UTC
This can happen via `xl destroy`, for example. When this happens,
libvirt is stuck in an inconsistent state: libvirt believes the domain
is still running, but attempts to use libvirt’s APIs to shutdown the
domain fail. The only way out of this situation is to restart libvirt.
Marek asked about this last Fridaylibvirt is stuck in an inconsistent state: libvirt believes the domain
is still running, but attempts to use libvirt’s APIs to shutdown the
domain fail. The only way out of this situation is to restart libvirt.
https://www.redhat.com/archives/libvir-list/2018-December/msg00196.html
and then spent the day creating a patch :-)
https://www.redhat.com/archives/libvir-list/2018-December/msg00212.html
I'll review/test his patches today, but thank you for taking a stab at this problem!
Regards,
Jim
To prevent this from happening, process LIBXL_EVENT_TYPE_DOMAIN_DEATH as
well as LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN, but only if libvirt has not
already begun to destroy the domain.
---
src/conf/domain_conf.h | 4 ++++
src/libxl/libxl_domain.c | 24 +++++++++++++++++++-----
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index b24e6ec3de..d3520bde15 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2620,6 +2620,10 @@ struct _virDomainObj {
unsigned int updated : 1;
unsigned int removing : 1;
+ /* Only used by the Xen backend */
+ unsigned int being_destroyed_by_libvirt : 1;
+ unsigned int already_destroyed : 1;
+
virDomainDefPtr def; /* The current definition */
virDomainDefPtr newDef; /* New definition to activate at shutdown */
diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index 5fe3f44fbe..680e5f209f 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -482,9 +482,21 @@ libxlDomainShutdownThread(void *opaque)
goto cleanup;
}
+ VIR_INFO("Domain %d died", event->domid);
+
if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
goto cleanup;
+ if (LIBXL_EVENT_TYPE_DOMAIN_DEATH == ev->type) {
+ if (vm->being_destroyed_by_libvirt) {
+ VIR_INFO("VM %d already being destroyed by libvirt",
event->domid);
+ goto cleanup;
+ }
+
+ VIR_INFO("Marking VM %d as already destroyed", event->domid);
+ vm->already_destroyed = true;
+ }
+
if (xl_reason == LIBXL_SHUTDOWN_REASON_POWEROFF) {
virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF,
VIR_DOMAIN_SHUTOFF_SHUTDOWN);
@@ -620,7 +632,8 @@ libxlDomainEventHandler(void *data,
VIR_LIBXL_EVENT_CONST libxl_event *event)
virThread thread;
libxlDriverConfigPtr cfg;
- if (event->type != LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN) {
+ if (LIBXL_EVENT_TYPE_DOMAIN_DEATH != event->type &&
+ LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN != event->type) {
VIR_INFO("Unhandled event type %d", event->type);
goto error;
}
@@ -629,18 +642,16 @@ libxlDomainEventHandler(void *data,
VIR_LIBXL_EVENT_CONST libxl_event *event)
* Start a thread to handle shutdown. We don't want to be tying up
* libxl's event machinery by doing a potentially lengthy shutdown.
*/
- if (VIR_ALLOC(shutdown_info) < 0)
- goto error;
+ while (VIR_ALLOC(shutdown_info) < 0) {}
shutdown_info->driver = driver;
shutdown_info->event = (libxl_event *)event;
- if (virThreadCreate(&thread, false, libxlDomainShutdownThread,
+ while (virThreadCreate(&thread, false, libxlDomainShutdownThread,
shutdown_info) < 0) {
/*
* Not much we can do on error here except log it.
*/
VIR_ERROR(_("Failed to create thread to handle domain shutdown"));
- goto error;
}
/*
@@ -752,6 +763,9 @@ libxlDomainDestroyInternal(libxlDriverPrivatePtr driver,
{
libxlDriverConfigPtr cfg = libxlDriverConfigGet(driver);
int ret = -1;
+ if (vm->already_destroyed)
+ return -1;
+ vm->being_destroyed_by_libvirt = true;
/* Unlock virDomainObj during destroy, which can take considerable
* time on large memory domains.
--
libvir-list mailing list
https://www.redhat.com/mailman/listinfo/libvir-list
well as LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN, but only if libvirt has not
already begun to destroy the domain.
---
src/conf/domain_conf.h | 4 ++++
src/libxl/libxl_domain.c | 24 +++++++++++++++++++-----
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index b24e6ec3de..d3520bde15 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2620,6 +2620,10 @@ struct _virDomainObj {
unsigned int updated : 1;
unsigned int removing : 1;
+ /* Only used by the Xen backend */
+ unsigned int being_destroyed_by_libvirt : 1;
+ unsigned int already_destroyed : 1;
+
virDomainDefPtr def; /* The current definition */
virDomainDefPtr newDef; /* New definition to activate at shutdown */
diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index 5fe3f44fbe..680e5f209f 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -482,9 +482,21 @@ libxlDomainShutdownThread(void *opaque)
goto cleanup;
}
+ VIR_INFO("Domain %d died", event->domid);
+
if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
goto cleanup;
+ if (LIBXL_EVENT_TYPE_DOMAIN_DEATH == ev->type) {
+ if (vm->being_destroyed_by_libvirt) {
+ VIR_INFO("VM %d already being destroyed by libvirt",
event->domid);
+ goto cleanup;
+ }
+
+ VIR_INFO("Marking VM %d as already destroyed", event->domid);
+ vm->already_destroyed = true;
+ }
+
if (xl_reason == LIBXL_SHUTDOWN_REASON_POWEROFF) {
virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF,
VIR_DOMAIN_SHUTOFF_SHUTDOWN);
@@ -620,7 +632,8 @@ libxlDomainEventHandler(void *data,
VIR_LIBXL_EVENT_CONST libxl_event *event)
virThread thread;
libxlDriverConfigPtr cfg;
- if (event->type != LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN) {
+ if (LIBXL_EVENT_TYPE_DOMAIN_DEATH != event->type &&
+ LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN != event->type) {
VIR_INFO("Unhandled event type %d", event->type);
goto error;
}
@@ -629,18 +642,16 @@ libxlDomainEventHandler(void *data,
VIR_LIBXL_EVENT_CONST libxl_event *event)
* Start a thread to handle shutdown. We don't want to be tying up
* libxl's event machinery by doing a potentially lengthy shutdown.
*/
- if (VIR_ALLOC(shutdown_info) < 0)
- goto error;
+ while (VIR_ALLOC(shutdown_info) < 0) {}
shutdown_info->driver = driver;
shutdown_info->event = (libxl_event *)event;
- if (virThreadCreate(&thread, false, libxlDomainShutdownThread,
+ while (virThreadCreate(&thread, false, libxlDomainShutdownThread,
shutdown_info) < 0) {
/*
* Not much we can do on error here except log it.
*/
VIR_ERROR(_("Failed to create thread to handle domain shutdown"));
- goto error;
}
/*
@@ -752,6 +763,9 @@ libxlDomainDestroyInternal(libxlDriverPrivatePtr driver,
{
libxlDriverConfigPtr cfg = libxlDriverConfigGet(driver);
int ret = -1;
+ if (vm->already_destroyed)
+ return -1;
+ vm->being_destroyed_by_libvirt = true;
/* Unlock virDomainObj during destroy, which can take considerable
* time on large memory domains.
--
libvir-list mailing list
https://www.redhat.com/mailman/listinfo/libvir-list