xen: implement save/restore
This patch implements Xen save/restore and migration.
Saving is triggered via xenbus, which is polled in
drivers/xen/manage.c. When a suspend request comes in, the kernel
prepares itself for saving by:
1 - Freeze all processes. This is primarily to prevent any
partially-completed pagetable updates from confusing the suspend
process. If CONFIG_PREEMPT isn't defined, then this isn't necessary.
2 - Suspend xenbus and other devices
3 - Stop_machine, to make sure all the other vcpus are quiescent. The
Xen tools require the domain to run its save off vcpu0.
4 - Within the stop_machine state, it pins any unpinned pgds (under
construction or destruction), performs canonicalizes various other
pieces of state (mostly converting mfns to pfns), and finally
5 - Suspend the domain
Restore reverses the steps used to save the domain, ending when all
the frozen processes are thawed.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 70375a6..73d78dc 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -674,6 +674,89 @@
return ret;
}
+static void restore_cpu_virqs(unsigned int cpu)
+{
+ struct evtchn_bind_virq bind_virq;
+ int virq, irq, evtchn;
+
+ for (virq = 0; virq < NR_VIRQS; virq++) {
+ if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
+ continue;
+
+ BUG_ON(irq_info[irq].type != IRQT_VIRQ);
+ BUG_ON(irq_info[irq].index != virq);
+
+ /* Get a new binding from Xen. */
+ bind_virq.virq = virq;
+ bind_virq.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq) != 0)
+ BUG();
+ evtchn = bind_virq.port;
+
+ /* Record the new mapping. */
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+ bind_evtchn_to_cpu(evtchn, cpu);
+
+ /* Ready for use. */
+ unmask_evtchn(evtchn);
+ }
+}
+
+static void restore_cpu_ipis(unsigned int cpu)
+{
+ struct evtchn_bind_ipi bind_ipi;
+ int ipi, irq, evtchn;
+
+ for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
+ if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
+ continue;
+
+ BUG_ON(irq_info[irq].type != IRQT_IPI);
+ BUG_ON(irq_info[irq].index != ipi);
+
+ /* Get a new binding from Xen. */
+ bind_ipi.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+ &bind_ipi) != 0)
+ BUG();
+ evtchn = bind_ipi.port;
+
+ /* Record the new mapping. */
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+ bind_evtchn_to_cpu(evtchn, cpu);
+
+ /* Ready for use. */
+ unmask_evtchn(evtchn);
+
+ }
+}
+
+void xen_irq_resume(void)
+{
+ unsigned int cpu, irq, evtchn;
+
+ init_evtchn_cpu_bindings();
+
+ /* New event-channel space is not 'live' yet. */
+ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+ mask_evtchn(evtchn);
+
+ /* No IRQ <-> event-channel mappings. */
+ for (irq = 0; irq < NR_IRQS; irq++)
+ irq_info[irq].evtchn = 0; /* zap event-channel binding */
+
+ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+ evtchn_to_irq[evtchn] = -1;
+
+ for_each_possible_cpu(cpu) {
+ restore_cpu_virqs(cpu);
+ restore_cpu_ipis(cpu);
+ }
+}
+
static struct irq_chip xen_dynamic_chip __read_mostly = {
.name = "xen-dyn",
.mask = disable_dynirq,
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 52b6b41..e9e1116 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -471,14 +471,14 @@
return 0;
}
-static int gnttab_resume(void)
+int gnttab_resume(void)
{
if (max_nr_grant_frames() < nr_grant_frames)
return -ENOSYS;
return gnttab_map(0, nr_grant_frames - 1);
}
-static int gnttab_suspend(void)
+int gnttab_suspend(void)
{
arch_gnttab_unmap_shared(shared, nr_grant_frames);
return 0;
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index aa7af9e..ba85fa2 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -5,21 +5,113 @@
#include <linux/err.h>
#include <linux/reboot.h>
#include <linux/sysrq.h>
+#include <linux/stop_machine.h>
+#include <linux/freezer.h>
#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <xen/hvc-console.h>
+#include <xen/xen-ops.h>
-#define SHUTDOWN_INVALID -1
-#define SHUTDOWN_POWEROFF 0
-#define SHUTDOWN_SUSPEND 2
-/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
- * report a crash, not be instructed to crash!
- * HALT is the same as POWEROFF, as far as we're concerned. The tools use
- * the distinction when we return the reason code to them.
- */
-#define SHUTDOWN_HALT 4
+#include <asm/xen/hypercall.h>
+#include <asm/xen/page.h>
+
+enum shutdown_state {
+ SHUTDOWN_INVALID = -1,
+ SHUTDOWN_POWEROFF = 0,
+ SHUTDOWN_SUSPEND = 2,
+ /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
+ report a crash, not be instructed to crash!
+ HALT is the same as POWEROFF, as far as we're concerned. The tools use
+ the distinction when we return the reason code to them. */
+ SHUTDOWN_HALT = 4,
+};
/* Ignore multiple shutdown requests. */
-static int shutting_down = SHUTDOWN_INVALID;
+static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
+
+static int xen_suspend(void *data)
+{
+ int *cancelled = data;
+
+ BUG_ON(!irqs_disabled());
+
+ load_cr3(swapper_pg_dir);
+
+ xen_mm_pin_all();
+ gnttab_suspend();
+ xen_time_suspend();
+ xen_pre_suspend();
+
+ /*
+ * This hypercall returns 1 if suspend was cancelled
+ * or the domain was merely checkpointed, and 0 if it
+ * is resuming in a new domain.
+ */
+ *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+ xen_post_suspend(*cancelled);
+ xen_time_resume();
+ gnttab_resume();
+ xen_mm_unpin_all();
+
+ if (!*cancelled) {
+ xen_irq_resume();
+ xen_console_resume();
+ }
+
+ return 0;
+}
+
+static void do_suspend(void)
+{
+ int err;
+ int cancelled = 1;
+
+ shutting_down = SHUTDOWN_SUSPEND;
+
+#ifdef CONFIG_PREEMPT
+ /* If the kernel is preemptible, we need to freeze all the processes
+ to prevent them from being in the middle of a pagetable update
+ during suspend. */
+ err = freeze_processes();
+ if (err) {
+ printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
+ return;
+ }
+#endif
+
+ err = device_suspend(PMSG_SUSPEND);
+ if (err) {
+ printk(KERN_ERR "xen suspend: device_suspend %d\n", err);
+ goto out;
+ }
+
+ printk("suspending xenbus...\n");
+ /* XXX use normal device tree? */
+ xenbus_suspend();
+
+ err = stop_machine_run(xen_suspend, &cancelled, 0);
+ if (err) {
+ printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
+ goto out;
+ }
+
+ if (!cancelled)
+ xenbus_resume();
+ else
+ xenbus_suspend_cancel();
+
+ device_resume();
+
+
+out:
+#ifdef CONFIG_PREEMPT
+ thaw_processes();
+#endif
+ shutting_down = SHUTDOWN_INVALID;
+}
static void shutdown_handler(struct xenbus_watch *watch,
const char **vec, unsigned int len)
@@ -52,11 +144,17 @@
}
if (strcmp(str, "poweroff") == 0 ||
- strcmp(str, "halt") == 0)
+ strcmp(str, "halt") == 0) {
+ shutting_down = SHUTDOWN_POWEROFF;
orderly_poweroff(false);
- else if (strcmp(str, "reboot") == 0)
+ } else if (strcmp(str, "reboot") == 0) {
+ shutting_down = SHUTDOWN_POWEROFF; /* ? */
ctrl_alt_del();
- else {
+#ifdef CONFIG_PM_SLEEP
+ } else if (strcmp(str, "suspend") == 0) {
+ do_suspend();
+#endif
+ } else {
printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
shutting_down = SHUTDOWN_INVALID;
}