[PATCH v8 5/6] kvm: KVM_IRQ_ACKFD

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]


Enable a mechanism for IRQ ACKs to be exposed through an eventfd.  The
user can specify the GSI and optionally an IRQ source ID and have the
provided eventfd trigger whenever the irqchip resamples it's inputs,
for instance on EOI.

Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
---

 Documentation/virtual/kvm/api.txt |   18 ++
 arch/x86/kvm/x86.c                |    2 
 include/linux/kvm.h               |   16 ++
 include/linux/kvm_host.h          |   13 ++
 virt/kvm/eventfd.c                |  285 +++++++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c               |   10 +
 6 files changed, 344 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 17cd599..77b4837 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2011,6 +2011,24 @@ of IRQ source IDs.  These IDs are also shared with KVM internal users
 (ex. KVM assigned devices, PIT, shared user ID), therefore not all IDs
 may be allocated through this interface.
 
+4.78 KVM_IRQ_ACKFD
+
+Capability: KVM_CAP_IRQ_ACKFD
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_irq_ackfd (in)
+Returns: 0 on success, -errno on error
+
+Allows userspace notification of IRQ ACKs, or resampling of irqchip
+inputs, often associated with an EOI.  User provided kvm_irq_ackfd.fd
+and kvm_irq_ackfd.gsi are required and result in an eventfd trigger
+whenever the GSI is acknowledged.  When KVM_CAP_IRQ_ACKFD_IRQ_SOURCE_FD
+is available, KVM_IRQ_ACKFD supports the KVM_IRQ_ACKFD_FLAG_IRQ_SOURCE_ID
+flag which indicates kvm_irqfd.irq_source_id is provided.  With this,
+the eventfd is only triggered when the specified IRQ source ID is
+pending.  On deassign, fd, gsi, and irq_source_id (if provided on assign)
+must be provided.
+
 
 5. The kvm_run structure
 ------------------------
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19680ed..3d98e59 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2176,6 +2176,8 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_IRQFD_IRQ_SOURCE_ID:
 	case KVM_CAP_IRQFD_ASSERT_ONLY:
+	case KVM_CAP_IRQ_ACKFD:
+	case KVM_CAP_IRQ_ACKFD_IRQ_SOURCE_ID:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 19b1235..0f53bd5 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -621,6 +621,8 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_NR_IRQ_SOURCE_ID 81
 #define KVM_CAP_IRQFD_IRQ_SOURCE_ID 82
 #define KVM_CAP_IRQFD_ASSERT_ONLY 83
+#define KVM_CAP_IRQ_ACKFD 84
+#define KVM_CAP_IRQ_ACKFD_IRQ_SOURCE_ID 85
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -707,6 +709,18 @@ struct kvm_irq_source_id {
 	__u8 pad[24];
 };
 
+#define KVM_IRQ_ACKFD_FLAG_DEASSIGN (1 << 0)
+/* Available with KVM_CAP_IRQ_ACKFD_IRQ_SOURCE_ID */
+#define KVM_IRQ_ACKFD_FLAG_IRQ_SOURCE_ID (1 << 1)
+
+struct kvm_irq_ackfd {
+	__u32 flags;
+	__u32 fd;
+	__u32 gsi;
+	__u32 irq_source_id;
+	__u8 pad[16];
+};
+
 struct kvm_clock_data {
 	__u64 clock;
 	__u32 flags;
@@ -849,6 +863,8 @@ struct kvm_s390_ucas_mapping {
 #define KVM_PPC_ALLOCATE_HTAB	  _IOWR(KVMIO, 0xa7, __u32)
 /* Available with KVM_CAP_IRQ_SOURCE_ID */
 #define KVM_IRQ_SOURCE_ID         _IOWR(KVMIO, 0xa8, struct kvm_irq_source_id)
+/* Available with KVM_CAP_IRQ_ACKFD */
+#define KVM_IRQ_ACKFD             _IOW(KVMIO,  0xa9, struct kvm_irq_ackfd)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea6d7a1..cdc55c2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,10 @@ struct kvm {
 		struct list_head  items;
 	} irqfds;
 	struct list_head ioeventfds;
+	struct {
+		spinlock_t        lock;
+		struct list_head  items;
+	} irq_ackfds;
 #endif
 	struct kvm_vm_stat stat;
 	struct kvm_arch arch;
@@ -831,6 +835,8 @@ int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
 void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
+int kvm_irq_ackfd(struct kvm *kvm, struct kvm_irq_ackfd *args);
+void kvm_irq_ackfd_release(struct kvm *kvm);
 
 #else
 
@@ -843,6 +849,13 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
 
+static inline int kvm_irq_ackfd(struct kvm *kvm, struct kvm_irq_ackfd *args)
+{
+	return -EINVAL;
+}
+
+static inline void kvm_irq_ackfd_release(struct kvm *kvm) {}
+
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 static inline void kvm_irq_routing_update(struct kvm *kvm,
 					  struct kvm_irq_routing_table *irq_rt)
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index df41038..ff5c784 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -300,6 +300,8 @@ kvm_eventfd_init(struct kvm *kvm)
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
 	INIT_LIST_HEAD(&kvm->ioeventfds);
+	spin_lock_init(&kvm->irq_ackfds.lock);
+	INIT_LIST_HEAD(&kvm->irq_ackfds.items);
 }
 
 /*
@@ -668,3 +670,286 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	return kvm_assign_ioeventfd(kvm, args);
 }
+
+/*
+ * --------------------------------------------------------------------
+ * irq_ackfd: Expose IRQ ACKs to userspace via eventfd
+ *
+ * userspace can register to recieve eventfd notification for ACKs of
+ * interrupt lines.
+ * --------------------------------------------------------------------
+ */
+
+struct _irq_ackfd {
+	struct kvm *kvm;
+	struct eventfd_ctx *eventfd; /* signaled on ack */
+	struct kvm_irq_ack_notifier notifier;
+	/* Setup/shutdown */
+	wait_queue_t wait;
+	poll_table pt;
+	struct work_struct shutdown;
+	struct list_head list;
+};
+
+/* Called under irq_ackfds.lock */
+static void irq_ackfd_shutdown(struct work_struct *work)
+{
+	struct _irq_ackfd *ackfd = container_of(work, struct _irq_ackfd,
+						shutdown);
+	struct kvm *kvm = ackfd->kvm;
+	u64 cnt;
+
+	/*
+	 * Stop ACK signaling
+	 */
+	kvm_unregister_irq_ack_notifier(kvm, &ackfd->notifier);
+
+	/*
+	 * Synchronize with the wait-queue and unhook ourselves to prevent
+	 * further events.
+	 */
+	eventfd_ctx_remove_wait_queue(ackfd->eventfd, &ackfd->wait, &cnt);
+
+	/*
+	 * Release resources
+	 */
+	eventfd_ctx_put(ackfd->eventfd);
+	kfree(ackfd);
+}
+
+/* assumes kvm->irq_ackfds.lock is held */
+static bool irq_ackfd_is_active(struct _irq_ackfd *ackfd)
+{
+	return list_empty(&ackfd->list) ? false : true;
+}
+
+/*
+ * Mark the irq_ackfd as inactive and schedule it for removal
+ *
+ * assumes kvm->irq_ackfds.lock is held
+ */
+static void irq_ackfd_deactivate(struct _irq_ackfd *ackfd)
+{
+	BUG_ON(!irq_ackfd_is_active(ackfd));
+
+	list_del_init(&ackfd->list);
+
+	/* Borrow irqfd's workqueue, we don't need our own. */
+	queue_work(irqfd_cleanup_wq, &ackfd->shutdown);
+}
+
+/*
+ * Called with wqh->lock held and interrupts disabled
+ */
+static int irq_ackfd_wakeup(wait_queue_t *wait, unsigned mode,
+			    int sync, void *key)
+{
+	unsigned long flags = (unsigned long)key;
+
+	if (unlikely(flags & POLLHUP)) {
+		/* The eventfd is closing, detach from KVM */
+		struct _irq_ackfd *ackfd;
+		unsigned long flags;
+
+		ackfd = container_of(wait, struct _irq_ackfd, wait);
+
+		spin_lock_irqsave(&ackfd->kvm->irq_ackfds.lock, flags);
+
+		/*
+		 * We must check if someone deactivated the irq_ackfd before
+		 * we could acquire the irq_ackfds.lock since the item is
+		 * deactivated from the KVM side before it is unhooked from
+		 * the wait-queue.  If it is already deactivated, we can
+		 * simply return knowing the other side will cleanup for us.
+		 * We cannot race against the irq_ackfd going away since the
+		 * other side is required to acquire wqh->lock, which we hold
+		 */
+		if (irq_ackfd_is_active(ackfd))
+				irq_ackfd_deactivate(ackfd);
+
+		spin_unlock_irqrestore(&ackfd->kvm->irq_ackfds.lock, flags);
+	}
+
+	return 0;
+}
+
+static void irq_ackfd_ptable_queue_proc(struct file *file,
+					wait_queue_head_t *wqh,
+					poll_table *pt)
+{
+	struct _irq_ackfd *ackfd = container_of(pt, struct _irq_ackfd, pt);
+	add_wait_queue(wqh, &ackfd->wait);
+}
+
+/*
+ * This function is called as the kvm VM fd is being released. Shutdown all
+ * irq_ackfds that still remain open
+ */
+void kvm_irq_ackfd_release(struct kvm *kvm)
+{
+	struct _irq_ackfd *tmp, *ackfd;
+
+	spin_lock_irq(&kvm->irq_ackfds.lock);
+
+	list_for_each_entry_safe(ackfd, tmp, &kvm->irq_ackfds.items, list)
+		irq_ackfd_deactivate(ackfd);
+
+	spin_unlock_irq(&kvm->irq_ackfds.lock);
+
+	flush_workqueue(irqfd_cleanup_wq);
+}
+
+static void irq_ackfd_acked(struct kvm_irq_ack_notifier *notifier)
+{
+	struct _irq_ackfd *ackfd;
+
+	ackfd = container_of(notifier, struct _irq_ackfd, notifier);
+
+	eventfd_signal(ackfd->eventfd, 1);
+}
+
+static int kvm_assign_irq_ackfd(struct kvm *kvm, struct kvm_irq_ackfd *args)
+{
+	struct file *file = NULL;
+	struct eventfd_ctx *eventfd = NULL;
+	struct _irq_ackfd *ackfd = NULL, *tmp;
+	int ret;
+	u64 cnt;
+
+	file = eventfd_fget(args->fd);
+	if (IS_ERR(file)) {
+		ret = PTR_ERR(file);
+		goto fail;
+	}
+
+	eventfd = eventfd_ctx_fileget(file);
+	if (IS_ERR(eventfd)) {
+		ret = PTR_ERR(eventfd);
+		goto fail;
+	}
+
+	ackfd = kzalloc(sizeof(*ackfd), GFP_KERNEL);
+	if (!ackfd) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	INIT_LIST_HEAD(&ackfd->list);
+	INIT_WORK(&ackfd->shutdown, irq_ackfd_shutdown);
+	ackfd->kvm = kvm;
+	ackfd->eventfd = eventfd;
+	ackfd->notifier.gsi = args->gsi;
+	if (args->flags & KVM_IRQ_ACKFD_FLAG_IRQ_SOURCE_ID)
+		ackfd->notifier.irq_source_id = args->irq_source_id;
+	else
+		ackfd->notifier.irq_source_id = -1;
+	ackfd->notifier.irq_acked = irq_ackfd_acked;
+
+	/*
+	 * Install our own custom wake-up handling so we are notified via
+	 * a callback whenever someone releases the underlying eventfd
+	 */
+	init_waitqueue_func_entry(&ackfd->wait, irq_ackfd_wakeup);
+	init_poll_funcptr(&ackfd->pt, irq_ackfd_ptable_queue_proc);
+
+	/*
+	 * Install the wait queue function to allow cleanup when the
+	 * eventfd is closed by the user.  This grabs the wqh lock, so
+	 * we do it out of spinlock, holding the file reference ensures
+	 * we won't see a POLLHUP until setup is complete.
+	 */
+	file->f_op->poll(file, &ackfd->pt);
+
+	spin_lock_irq(&kvm->irq_ackfds.lock);
+
+	/* Check for duplicates.  */
+	list_for_each_entry(tmp, &kvm->irq_ackfds.items, list) {
+		if (ackfd->eventfd == tmp->eventfd &&
+		    ackfd->notifier.gsi == tmp->notifier.gsi &&
+		    ackfd->notifier.irq_source_id ==
+		    tmp->notifier.irq_source_id) {
+			spin_unlock_irq(&kvm->irq_ackfds.lock);
+			ret = -EBUSY;
+			goto fail_unqueue;
+		}
+	}
+
+	list_add_tail(&ackfd->list, &kvm->irq_ackfds.items);
+
+	spin_unlock_irq(&kvm->irq_ackfds.lock);
+
+	/* This can sleep, so register after spinlock.  */
+	kvm_register_irq_ack_notifier(kvm, &ackfd->notifier);
+
+	fput(file); /* Enable POLLHUP */
+
+	return 0;
+
+fail_unqueue:
+	eventfd_ctx_remove_wait_queue(eventfd, &ackfd->wait, &cnt);
+fail:
+	if (eventfd && !IS_ERR(eventfd))
+		eventfd_ctx_put(eventfd);
+
+	if (file && !IS_ERR(file))
+		fput(file);
+
+	kfree(ackfd);
+
+	return ret;
+}
+
+static int kvm_deassign_irq_ackfd(struct kvm *kvm, struct kvm_irq_ackfd *args)
+{
+	struct eventfd_ctx *eventfd = NULL;
+	struct _irq_ackfd *ackfd;
+	int irq_source_id = -1, ret = -ENOENT;
+
+	eventfd = eventfd_ctx_fdget(args->fd);
+	if (IS_ERR(eventfd)) {
+		ret = PTR_ERR(eventfd);
+		goto fail;
+	}
+
+	if (args->flags & KVM_IRQ_ACKFD_FLAG_IRQ_SOURCE_ID)
+		irq_source_id = args->irq_source_id;
+
+	spin_lock_irq(&kvm->irq_ackfds.lock);
+
+	list_for_each_entry(ackfd, &kvm->irq_ackfds.items, list) {
+		if (ackfd->eventfd == eventfd &&
+		    ackfd->notifier.gsi == args->gsi &&
+		    ackfd->notifier.irq_source_id == irq_source_id) {
+			irq_ackfd_deactivate(ackfd);
+			ret = 0;
+			break;
+		}
+	}
+
+	spin_unlock_irq(&kvm->irq_ackfds.lock);
+
+fail:
+	if (eventfd && !IS_ERR(eventfd))
+		eventfd_ctx_put(eventfd);
+
+	/*
+	 * Block until we know all outstanding shutdown jobs have completed
+	 * so that we guarantee there will not be any more ACKs signaled on
+	 * this eventfd once this deassign function returns.
+	 */
+	flush_workqueue(irqfd_cleanup_wq);
+
+	return ret;
+}
+
+int kvm_irq_ackfd(struct kvm *kvm, struct kvm_irq_ackfd *args)
+{
+	if (args->flags & ~(KVM_IRQ_ACKFD_FLAG_DEASSIGN |
+			    KVM_IRQ_ACKFD_FLAG_IRQ_SOURCE_ID))
+		return -EINVAL;
+
+	if (args->flags & KVM_IRQ_ACKFD_FLAG_DEASSIGN)
+		return kvm_deassign_irq_ackfd(kvm, args);
+
+	return kvm_assign_irq_ackfd(kvm, args);
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e120cb3..bbe3a20 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -619,6 +619,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
 	struct kvm *kvm = filp->private_data;
 
 	kvm_irqfd_release(kvm);
+	kvm_irq_ackfd_release(kvm);
 
 	kvm_put_kvm(kvm);
 	return 0;
@@ -2109,6 +2110,15 @@ static long kvm_vm_ioctl(struct file *filp,
 		break;
 	}
 #endif
+	case KVM_IRQ_ACKFD: {
+		struct kvm_irq_ackfd data;
+
+		r = -EFAULT;
+		if (copy_from_user(&data, argp, sizeof(data)))
+			goto out;
+		r = kvm_irq_ackfd(kvm, &data);
+		break;
+	}
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[Other Archives]     [Linux Kernel Newbies]     [Linux Driver Development]     [Linux Kbuild]     [Fedora Kernel]     [Linux Kernel Testers]     [Linux SH]     [Linux Omap]     [Linux Tape]     [Linux Input]     [Linux LEDS]     [Linux Kernel Janitors]     [Linux Kernel Packagers]     [Linux Doc]     [Linux Man Pages]     [Linux API]     [Linux Memory Management]     [Linux Modules]     [Linux Standards]     [Kernel Announce]     [Netdev]     [Git]     [Linux PCI]     Linux CAN Development     [Linux I2C]     [Linux RDMA]     [Linux NUMA]     [Netfilter]     [Netfilter Devel]     [SELinux]     [Bugtraq]     [FIO]     [Linux Perf Users]     [Linux Serial]     [Linux PPP]     [Linux ISDN]     [Linux Next]     [Kernel Stable Commits]     [Linux Tip Commits]     [Kernel MM Commits]     [Linux Security Module]     [AutoFS]     [Filesystem Development]     [Ext3 Filesystem]     [Linux bcache]     [Ext4 Filesystem]     [Linux BTRFS]     [Linux CEPH Filesystem]     [Linux XFS]     [XFS]     [Linux NFS]     [Linux CIFS]     [Ecryptfs]     [Linux NILFS]     [Linux Cachefs]     [Reiser FS]     [Initramfs]     [Linux FB Devel]     [Linux OpenGL]     [DRI Devel]     [Fastboot]     [Linux RT Users]     [Linux RT Stable]     [eCos]     [Corosync]     [Linux Clusters]     [LVS Devel]     [Hot Plug]     [Linux Virtualization]     [KVM]     [KVM PPC]     [KVM ia64]     [Linux Containers]     [Linux Hexagon]     [Linux Cgroups]     [Util Linux]     [Wireless]     [Linux Bluetooth]     [Bluez Devel]     [Ethernet Bridging]     [Embedded Linux]     [Barebox]     [Linux MMC]     [Linux IIO]     [Sparse]     [Smatch]     [Linux Arch]     [x86 Platform Driver]     [Linux ACPI]     [Linux IBM ACPI]     [LM Sensors]     [CPU Freq]     [Linux Power Management]     [Linmodems]     [Linux DCCP]     [Linux SCTP]     [ALSA Devel]     [Linux USB]     [Linux PA RISC]     [Linux Samsung SOC]     [MIPS Linux]     [IBM S/390 Linux]     [ARM Linux]     [ARM Kernel]     [ARM MSM]     [Tegra Devel]     [Sparc Linux]     [Linux Security]     [Linux Sound]     [Linux Media]     [Video 4 Linux]     [Linux IRDA Users]     [Linux for the blind]     [Linux RAID]     [Linux ATA RAID]     [Device Mapper]     [Linux SCSI]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Linux IDE]     [Linux SMP]     [Linux AXP]     [Linux Alpha]     [Linux M68K]     [Linux ia64]     [Linux 8086]     [Linux x86_64]     [Linux Config]     [Linux Apps]     [Linux MSDOS]     [Linux X.25]     [Linux Crypto]     [DM Crypt]     [Linux Trace Users]     [Linux Btrace]     [Linux Watchdog]     [Utrace Devel]     [Linux C Programming]     [Linux Assembly]     [Dash]     [DWARVES]     [Hail Devel]     [Linux Kernel Debugger]     [Linux gcc]     [Gcc Help]     [X.Org]     [Wine]

Add to Google Powered by Linux

[Older Kernel Discussion]     [Yosemite National Park Forum]     [Large Format Photos]     [Gimp]     [Yosemite Photos]     [Stuff]