410 lines
13 KiB
Diff
410 lines
13 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Hao Li <hao.l.li@intel.com>
|
|
Date: Fri, 31 Aug 2018 10:58:56 +0800
|
|
Subject: [PATCH] VBS-K: virtqueue runtime API.
|
|
|
|
This patch added the virtqueue runtime API to the VBS-K framework:
|
|
- int virtio_vq_getchain(struct virtio_vq_info *vq, uint16_t *pidx,
|
|
struct iovec *iov, int n_iov, uint16_t *flags);
|
|
- void virtio_vq_retchain(struct virtio_vq_info *vq);
|
|
- void virtio_vq_relchain(struct virtio_vq_info *vq, uint16_t idx,
|
|
uint32_t iolen);
|
|
- void virtio_vq_endchains(struct virtio_vq_info *vq, int used_all_avail);
|
|
|
|
Change-Id: Ie7f81d96c895a16e210133c19aca99b185b8682d
|
|
Tracked-On:218445
|
|
Signed-off-by: Hao Li <hao.l.li@intel.com>
|
|
Reviewed-on:
|
|
Reviewed-by: Chi, Mingqiang <mingqiang.chi@intel.com>
|
|
Reviewed-by: Dong, Eddie <eddie.dong@intel.com>
|
|
Tested-by: Dong, Eddie <eddie.dong@intel.com>
|
|
---
|
|
drivers/vbs/vq.c | 270 +++++++++++++++++++++++++++++++++++++++++
|
|
include/linux/vbs/vq.h | 73 +++++++++++
|
|
2 files changed, 343 insertions(+)
|
|
|
|
diff --git a/drivers/vbs/vq.c b/drivers/vbs/vq.c
|
|
index 95a6757..886f482 100644
|
|
--- a/drivers/vbs/vq.c
|
|
+++ b/drivers/vbs/vq.c
|
|
@@ -71,6 +71,276 @@ void * paddr_guest2host(struct ctx *ctx, uintptr_t gaddr, size_t len)
|
|
return map_guest_phys(ctx->vmid, gaddr, len);
|
|
}
|
|
|
|
+/*
|
|
+ * helper function for vq_getchain():
|
|
+ * record the i'th "real" descriptor.
|
|
+ */
|
|
+static inline void _vq_record(int i, volatile struct virtio_desc *vd,
|
|
+ struct ctx *ctx, struct iovec *iov,
|
|
+ int n_iov, uint16_t *flags)
|
|
+{
|
|
+ if (i >= n_iov)
|
|
+ return;
|
|
+
|
|
+ iov[i].iov_base = paddr_guest2host(ctx, vd->addr, vd->len);
|
|
+ iov[i].iov_len = vd->len;
|
|
+
|
|
+ if (flags != NULL)
|
|
+ flags[i] = vd->flags;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Walk descriptor table and put requests into iovec.
|
|
+ *
|
|
+ * Examine the chain of descriptors starting at the "next one" to
|
|
+ * make sure that they describe a sensible request. If so, return
|
|
+ * the number of "real" descriptors that would be needed/used in
|
|
+ * acting on this request. This may be smaller than the number of
|
|
+ * available descriptors, e.g., if there are two available but
|
|
+ * they are two separate requests, this just returns 1. Or, it
|
|
+ * may be larger: if there are indirect descriptors involved,
|
|
+ * there may only be one descriptor available but it may be an
|
|
+ * indirect pointing to eight more. We return 8 in this case,
|
|
+ * i.e., we do not count the indirect descriptors, only the "real"
|
|
+ * ones.
|
|
+ *
|
|
+ * Basically, this vets the vd_flags and vd_next field of each
|
|
+ * descriptor and tells you how many are involved. Since some may
|
|
+ * be indirect, this also needs the vmctx (in the pci_vdev
|
|
+ * at vc->vc_pi) so that it can find indirect descriptors.
|
|
+ *
|
|
+ * As we process each descriptor, we copy and adjust it (guest to
|
|
+ * host address wise, also using the vmtctx) into the given iov[]
|
|
+ * array (of the given size). If the array overflows, we stop
|
|
+ * placing values into the array but keep processing descriptors,
|
|
+ * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
|
|
+ * So you, the caller, must not assume that iov[] is as big as the
|
|
+ * return value (you can process the same thing twice to allocate
|
|
+ * a larger iov array if needed, or supply a zero length to find
|
|
+ * out how much space is needed).
|
|
+ *
|
|
+ * If you want to verify the WRITE flag on each descriptor, pass a
|
|
+ * non-NULL "flags" pointer to an array of "uint16_t" of the same size
|
|
+ * as n_iov and we'll copy each vd_flags field after unwinding any
|
|
+ * indirects.
|
|
+ *
|
|
+ * If some descriptor(s) are invalid, this prints a diagnostic message
|
|
+ * and returns -1. If no descriptors are ready now it simply returns 0.
|
|
+ *
|
|
+ * You are assumed to have done a vq_ring_ready() if needed (note
|
|
+ * that vq_has_descs() does one).
|
|
+ */
|
|
+int virtio_vq_getchain(struct virtio_vq_info *vq, uint16_t *pidx,
|
|
+ struct iovec *iov, int n_iov, uint16_t *flags)
|
|
+{
|
|
+ int i;
|
|
+ unsigned int ndesc, n_indir;
|
|
+ unsigned int idx, next;
|
|
+ struct ctx *ctx;
|
|
+ struct virtio_dev_info *dev;
|
|
+ const char *name;
|
|
+
|
|
+ volatile struct virtio_desc *vdir, *vindir, *vp;
|
|
+
|
|
+ dev = vq->dev;
|
|
+ name = dev->name;
|
|
+
|
|
+ /*
|
|
+ * Note: it's the responsibility of the guest not to
|
|
+ * update vq->vq_avail->va_idx until all of the descriptors
|
|
+ * the guest has written are valid (including all their
|
|
+ * vd_next fields and vd_flags).
|
|
+ *
|
|
+ * Compute (last_avail - va_idx) in integers mod 2**16. This is
|
|
+ * the number of descriptors the device has made available
|
|
+ * since the last time we updated vq->vq_last_avail.
|
|
+ *
|
|
+ * We just need to do the subtraction as an unsigned int,
|
|
+ * then trim off excess bits.
|
|
+ */
|
|
+ idx = vq->last_avail;
|
|
+ ndesc = (uint16_t)((unsigned int)vq->avail->idx - idx);
|
|
+
|
|
+ if (ndesc == 0)
|
|
+ return 0;
|
|
+
|
|
+ if (ndesc > vq->qsize) {
|
|
+ /* XXX need better way to diagnose issues */
|
|
+ pr_err("%s: ndesc (%u) out of range, driver confused?\r\n",
|
|
+ name, (unsigned int)ndesc);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Now count/parse "involved" descriptors starting from
|
|
+ * the head of the chain.
|
|
+ *
|
|
+ * To prevent loops, we could be more complicated and
|
|
+ * check whether we're re-visiting a previously visited
|
|
+ * index, but we just abort if the count gets excessive.
|
|
+ */
|
|
+ ctx = &dev->_ctx;
|
|
+ *pidx = next = vq->avail->ring[idx & (vq->qsize - 1)];
|
|
+ vq->last_avail++;
|
|
+ for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->next) {
|
|
+ if (next >= vq->qsize) {
|
|
+ pr_err("%s: descriptor index %u out of range, "
|
|
+ "driver confused?\r\n", name, next);
|
|
+ return -1;
|
|
+ }
|
|
+ vdir = &vq->desc[next];
|
|
+ if ((vdir->flags & VRING_DESC_F_INDIRECT) == 0) {
|
|
+ _vq_record(i, vdir, ctx, iov, n_iov, flags);
|
|
+ i++;
|
|
+ } else if ((dev->negotiated_features &
|
|
+ VIRTIO_RING_F_INDIRECT_DESC) == 0) {
|
|
+ pr_err("%s: descriptor has forbidden INDIRECT flag, "
|
|
+ "driver confused?\r\n", name);
|
|
+ return -1;
|
|
+ } else {
|
|
+ n_indir = vdir->len / 16;
|
|
+ if ((vdir->len & 0xf) || n_indir == 0) {
|
|
+ pr_err("%s: invalid indir len 0x%x, "
|
|
+ "driver confused?\r\n", name,
|
|
+ (unsigned int)vdir->len);
|
|
+ return -1;
|
|
+ }
|
|
+ vindir = paddr_guest2host(ctx, vdir->addr, vdir->len);
|
|
+ /*
|
|
+ * Indirects start at the 0th, then follow
|
|
+ * their own embedded "next"s until those run
|
|
+ * out. Each one's indirect flag must be off
|
|
+ * (we don't really have to check, could just
|
|
+ * ignore errors...).
|
|
+ */
|
|
+ next = 0;
|
|
+ for (;;) {
|
|
+ vp = &vindir[next];
|
|
+ if (vp->flags & VRING_DESC_F_INDIRECT) {
|
|
+ pr_err("%s: indirect desc has INDIR flag,"
|
|
+ " driver confused?\r\n", name);
|
|
+ return -1;
|
|
+ }
|
|
+ _vq_record(i, vp, ctx, iov, n_iov, flags);
|
|
+ if (++i > VQ_MAX_DESCRIPTORS)
|
|
+ goto loopy;
|
|
+ if ((vp->flags & VRING_DESC_F_NEXT) == 0)
|
|
+ break;
|
|
+ next = vp->next;
|
|
+ if (next >= n_indir) {
|
|
+ pr_err("%s: invalid next %u > %u, "
|
|
+ "driver confused?\r\n",
|
|
+ name, (unsigned int)next, n_indir);
|
|
+ return -1;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ if ((vdir->flags & VRING_DESC_F_NEXT) == 0)
|
|
+ return i;
|
|
+ }
|
|
+loopy:
|
|
+ pr_err("%s: descriptor loop? count > %d - driver confused?\r\n",
|
|
+ name, i);
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Return the currently-first request chain back to the available queue.
|
|
+ *
|
|
+ * (This chain is the one you handled when you called vq_getchain()
|
|
+ * and used its positive return value.)
|
|
+ */
|
|
+void virtio_vq_retchain(struct virtio_vq_info *vq)
|
|
+{
|
|
+ vq->last_avail--;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Return specified request chain to the guest, setting its I/O length
|
|
+ * to the provided value.
|
|
+ *
|
|
+ * (This chain is the one you handled when you called vq_getchain()
|
|
+ * and used its positive return value.)
|
|
+ */
|
|
+void virtio_vq_relchain(struct virtio_vq_info *vq, uint16_t idx,
|
|
+ uint32_t iolen)
|
|
+{
|
|
+ uint16_t uidx, mask;
|
|
+ volatile struct vring_used *vuh;
|
|
+ volatile struct virtio_used *vue;
|
|
+
|
|
+ /*
|
|
+ * Notes:
|
|
+ * - mask is N-1 where N is a power of 2 so computes x % N
|
|
+ * - vuh points to the "used" data shared with guest
|
|
+ * - vue points to the "used" ring entry we want to update
|
|
+ * - head is the same value we compute in vq_iovecs().
|
|
+ *
|
|
+ * (I apologize for the two fields named vu_idx; the
|
|
+ * virtio spec calls the one that vue points to, "id"...)
|
|
+ */
|
|
+ mask = vq->qsize - 1;
|
|
+ vuh = vq->used;
|
|
+
|
|
+ uidx = vuh->idx;
|
|
+ vue = &vuh->ring[uidx++ & mask];
|
|
+ vue->idx = idx;
|
|
+ vue->len = iolen;
|
|
+ vuh->idx = uidx;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Driver has finished processing "available" chains and calling
|
|
+ * vq_relchain on each one. If driver used all the available
|
|
+ * chains, used_all should be set.
|
|
+ *
|
|
+ * If the "used" index moved we may need to inform the guest, i.e.,
|
|
+ * deliver an interrupt. Even if the used index did NOT move we
|
|
+ * may need to deliver an interrupt, if the avail ring is empty and
|
|
+ * we are supposed to interrupt on empty.
|
|
+ *
|
|
+ * Note that used_all_avail is provided by the caller because it's
|
|
+ * a snapshot of the ring state when he decided to finish interrupt
|
|
+ * processing -- it's possible that descriptors became available after
|
|
+ * that point. (It's also typically a constant 1/True as well.)
|
|
+ */
|
|
+void virtio_vq_endchains(struct virtio_vq_info *vq, int used_all_avail)
|
|
+{
|
|
+ struct virtio_dev_info *dev;
|
|
+ uint16_t event_idx, new_idx, old_idx;
|
|
+ int intr;
|
|
+
|
|
+ /*
|
|
+ * Interrupt generation: if we're using EVENT_IDX,
|
|
+ * interrupt if we've crossed the event threshold.
|
|
+ * Otherwise interrupt is generated if we added "used" entries,
|
|
+ * but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
|
|
+ *
|
|
+ * In any case, though, if NOTIFY_ON_EMPTY is set and the
|
|
+ * entire avail was processed, we need to interrupt always.
|
|
+ */
|
|
+ dev = vq->dev;
|
|
+ old_idx = vq->save_used;
|
|
+ vq->save_used = new_idx = vq->used->idx;
|
|
+ if (used_all_avail &&
|
|
+ (dev->negotiated_features & VIRTIO_F_NOTIFY_ON_EMPTY))
|
|
+ intr = 1;
|
|
+ else if (dev->negotiated_features & VIRTIO_RING_F_EVENT_IDX) {
|
|
+ event_idx = VQ_USED_EVENT_IDX(vq);
|
|
+ /*
|
|
+ * This calculation is per docs and the kernel
|
|
+ * (see src/sys/dev/virtio/virtio_ring.h).
|
|
+ */
|
|
+ intr = (uint16_t)(new_idx - event_idx - 1) <
|
|
+ (uint16_t)(new_idx - old_idx);
|
|
+ } else {
|
|
+ intr = new_idx != old_idx &&
|
|
+ !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
|
|
+ }
|
|
+ if (intr)
|
|
+ virtio_vq_interrupt(dev, vq);
|
|
+}
|
|
+
|
|
/*
|
|
* Initialize the currently-selected virtqueue.
|
|
* The guest just gave us a page frame number, from which we can
|
|
diff --git a/include/linux/vbs/vq.h b/include/linux/vbs/vq.h
|
|
index 55ff810..9ebde05 100644
|
|
--- a/include/linux/vbs/vq.h
|
|
+++ b/include/linux/vbs/vq.h
|
|
@@ -64,6 +64,7 @@
|
|
|
|
#include <linux/uio.h>
|
|
#include <linux/vbs/vbs.h>
|
|
+#include <linux/vhm/vhm_vm_mngt.h>
|
|
|
|
/* virtqueue alignment */
|
|
#define VRING_ALIGN 4096
|
|
@@ -76,6 +77,30 @@
|
|
#define VQ_ALLOC 0x01
|
|
#define VQ_BROKED 0x02
|
|
|
|
+/*
|
|
+ * Feature flags.
|
|
+ * Note: bits 0 through 23 are reserved to each device type.
|
|
+ */
|
|
+#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24)
|
|
+#define VIRTIO_RING_F_INDIRECT_DESC (1 << 28)
|
|
+#define VIRTIO_RING_F_EVENT_IDX (1 << 29)
|
|
+
|
|
+#define VQ_MAX_DESCRIPTORS 512
|
|
+
|
|
+/* virtio_desc flags */
|
|
+#define VRING_DESC_F_NEXT (1 << 0)
|
|
+#define VRING_DESC_F_WRITE (1 << 1)
|
|
+#define VRING_DESC_F_INDIRECT (1 << 2)
|
|
+
|
|
+/* vring_avail flags */
|
|
+#define VRING_AVAIL_F_NO_INTERRUPT 1
|
|
+
|
|
+/* vring_used flags */
|
|
+#define VRING_USED_F_NO_NOTIFY 1
|
|
+
|
|
+/* Functions for dealing with generalized "virtual devices" */
|
|
+#define VQ_USED_EVENT_IDX(vq) ((vq)->avail->ring[(vq)->qsize])
|
|
+
|
|
/* get virtqueue size according to virtio specification */
|
|
static inline size_t virtio_vq_ring_size(unsigned int qsz)
|
|
{
|
|
@@ -92,8 +117,56 @@ static inline size_t virtio_vq_ring_size(unsigned int qsz)
|
|
return size;
|
|
}
|
|
|
|
+/* Is this ring ready for I/O? */
|
|
+static inline int virtio_vq_ring_ready(struct virtio_vq_info *vq)
|
|
+{
|
|
+ return (vq->flags & VQ_ALLOC);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Are there "available" descriptors? (This does not count
|
|
+ * how many, just returns True if there are some).
|
|
+ */
|
|
+static inline int virtio_vq_has_descs(struct virtio_vq_info *vq)
|
|
+{
|
|
+ return (virtio_vq_ring_ready(vq) &&
|
|
+ vq->last_avail != vq->avail->idx);
|
|
+}
|
|
+
|
|
+/* Deliver an interrupt to guest on the given virtual queue */
|
|
+static inline void virtio_vq_interrupt(struct virtio_dev_info *dev,
|
|
+ struct virtio_vq_info *vq)
|
|
+{
|
|
+ uint16_t msix_idx;
|
|
+ uint64_t msix_addr;
|
|
+ uint32_t msix_data;
|
|
+
|
|
+ /* Currently we only support MSIx */
|
|
+ msix_idx = vq->msix_idx;
|
|
+
|
|
+ if (msix_idx == VIRTIO_MSI_NO_VECTOR) {
|
|
+ pr_err("msix idx is VIRTIO_MSI_NO_VECTOR!\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ msix_addr = vq->msix_addr;
|
|
+ msix_data = vq->msix_data;
|
|
+
|
|
+ pr_debug("virtio_vq_interrupt: vmid is %d\n", dev->_ctx.vmid);
|
|
+ vhm_inject_msi(dev->_ctx.vmid, msix_addr, msix_data);
|
|
+}
|
|
+
|
|
+
|
|
/* virtqueue initialization APIs */
|
|
void virtio_vq_init(struct virtio_vq_info *vq, uint32_t pfn);
|
|
void virtio_vq_reset(struct virtio_vq_info *vq);
|
|
|
|
+/* virtqueue runtime APIs */
|
|
+int virtio_vq_getchain(struct virtio_vq_info *vq, uint16_t *pidx,
|
|
+ struct iovec *iov, int n_iov, uint16_t *flags);
|
|
+void virtio_vq_retchain(struct virtio_vq_info *vq);
|
|
+void virtio_vq_relchain(struct virtio_vq_info *vq, uint16_t idx,
|
|
+ uint32_t iolen);
|
|
+void virtio_vq_endchains(struct virtio_vq_info *vq, int used_all_avail);
|
|
+
|
|
#endif
|
|
--
|
|
2.21.0
|
|
|