clear-pkgs-linux-iot-lts2018/0478-VBS-K-virtqueue-runtim...

410 lines
13 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Hao Li <hao.l.li@intel.com>
Date: Fri, 31 Aug 2018 10:58:56 +0800
Subject: [PATCH] VBS-K: virtqueue runtime API.
This patch added the virtqueue runtime API to the VBS-K framework:
- int virtio_vq_getchain(struct virtio_vq_info *vq, uint16_t *pidx,
struct iovec *iov, int n_iov, uint16_t *flags);
- void virtio_vq_retchain(struct virtio_vq_info *vq);
- void virtio_vq_relchain(struct virtio_vq_info *vq, uint16_t idx,
uint32_t iolen);
- void virtio_vq_endchains(struct virtio_vq_info *vq, int used_all_avail);
Change-Id: Ie7f81d96c895a16e210133c19aca99b185b8682d
Tracked-On:218445
Signed-off-by: Hao Li <hao.l.li@intel.com>
Reviewed-on:
Reviewed-by: Chi, Mingqiang <mingqiang.chi@intel.com>
Reviewed-by: Dong, Eddie <eddie.dong@intel.com>
Tested-by: Dong, Eddie <eddie.dong@intel.com>
---
drivers/vbs/vq.c | 270 +++++++++++++++++++++++++++++++++++++++++
include/linux/vbs/vq.h | 73 +++++++++++
2 files changed, 343 insertions(+)
diff --git a/drivers/vbs/vq.c b/drivers/vbs/vq.c
index 95a6757a1c85..886f48225de9 100644
--- a/drivers/vbs/vq.c
+++ b/drivers/vbs/vq.c
@@ -71,6 +71,276 @@ void * paddr_guest2host(struct ctx *ctx, uintptr_t gaddr, size_t len)
return map_guest_phys(ctx->vmid, gaddr, len);
}
+/*
+ * helper function for vq_getchain():
+ * record the i'th "real" descriptor.
+ */
+static inline void _vq_record(int i, volatile struct virtio_desc *vd,
+ struct ctx *ctx, struct iovec *iov,
+ int n_iov, uint16_t *flags)
+{
+ if (i >= n_iov)
+ return;
+
+ iov[i].iov_base = paddr_guest2host(ctx, vd->addr, vd->len);
+ iov[i].iov_len = vd->len;
+
+ if (flags != NULL)
+ flags[i] = vd->flags;
+}
+
+/*
+ * Walk descriptor table and put requests into iovec.
+ *
+ * Examine the chain of descriptors starting at the "next one" to
+ * make sure that they describe a sensible request. If so, return
+ * the number of "real" descriptors that would be needed/used in
+ * acting on this request. This may be smaller than the number of
+ * available descriptors, e.g., if there are two available but
+ * they are two separate requests, this just returns 1. Or, it
+ * may be larger: if there are indirect descriptors involved,
+ * there may only be one descriptor available but it may be an
+ * indirect pointing to eight more. We return 8 in this case,
+ * i.e., we do not count the indirect descriptors, only the "real"
+ * ones.
+ *
+ * Basically, this vets the vd_flags and vd_next field of each
+ * descriptor and tells you how many are involved. Since some may
+ * be indirect, this also needs the vmctx (in the pci_vdev
+ * at vc->vc_pi) so that it can find indirect descriptors.
+ *
+ * As we process each descriptor, we copy and adjust it (guest to
+ * host address wise, also using the vmtctx) into the given iov[]
+ * array (of the given size). If the array overflows, we stop
+ * placing values into the array but keep processing descriptors,
+ * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
+ * So you, the caller, must not assume that iov[] is as big as the
+ * return value (you can process the same thing twice to allocate
+ * a larger iov array if needed, or supply a zero length to find
+ * out how much space is needed).
+ *
+ * If you want to verify the WRITE flag on each descriptor, pass a
+ * non-NULL "flags" pointer to an array of "uint16_t" of the same size
+ * as n_iov and we'll copy each vd_flags field after unwinding any
+ * indirects.
+ *
+ * If some descriptor(s) are invalid, this prints a diagnostic message
+ * and returns -1. If no descriptors are ready now it simply returns 0.
+ *
+ * You are assumed to have done a vq_ring_ready() if needed (note
+ * that vq_has_descs() does one).
+ */
+int virtio_vq_getchain(struct virtio_vq_info *vq, uint16_t *pidx,
+ struct iovec *iov, int n_iov, uint16_t *flags)
+{
+ int i;
+ unsigned int ndesc, n_indir;
+ unsigned int idx, next;
+ struct ctx *ctx;
+ struct virtio_dev_info *dev;
+ const char *name;
+
+ volatile struct virtio_desc *vdir, *vindir, *vp;
+
+ dev = vq->dev;
+ name = dev->name;
+
+ /*
+ * Note: it's the responsibility of the guest not to
+ * update vq->vq_avail->va_idx until all of the descriptors
+ * the guest has written are valid (including all their
+ * vd_next fields and vd_flags).
+ *
+ * Compute (last_avail - va_idx) in integers mod 2**16. This is
+ * the number of descriptors the device has made available
+ * since the last time we updated vq->vq_last_avail.
+ *
+ * We just need to do the subtraction as an unsigned int,
+ * then trim off excess bits.
+ */
+ idx = vq->last_avail;
+ ndesc = (uint16_t)((unsigned int)vq->avail->idx - idx);
+
+ if (ndesc == 0)
+ return 0;
+
+ if (ndesc > vq->qsize) {
+ /* XXX need better way to diagnose issues */
+ pr_err("%s: ndesc (%u) out of range, driver confused?\r\n",
+ name, (unsigned int)ndesc);
+ return -1;
+ }
+
+ /*
+ * Now count/parse "involved" descriptors starting from
+ * the head of the chain.
+ *
+ * To prevent loops, we could be more complicated and
+ * check whether we're re-visiting a previously visited
+ * index, but we just abort if the count gets excessive.
+ */
+ ctx = &dev->_ctx;
+ *pidx = next = vq->avail->ring[idx & (vq->qsize - 1)];
+ vq->last_avail++;
+ for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->next) {
+ if (next >= vq->qsize) {
+ pr_err("%s: descriptor index %u out of range, "
+ "driver confused?\r\n", name, next);
+ return -1;
+ }
+ vdir = &vq->desc[next];
+ if ((vdir->flags & VRING_DESC_F_INDIRECT) == 0) {
+ _vq_record(i, vdir, ctx, iov, n_iov, flags);
+ i++;
+ } else if ((dev->negotiated_features &
+ VIRTIO_RING_F_INDIRECT_DESC) == 0) {
+ pr_err("%s: descriptor has forbidden INDIRECT flag, "
+ "driver confused?\r\n", name);
+ return -1;
+ } else {
+ n_indir = vdir->len / 16;
+ if ((vdir->len & 0xf) || n_indir == 0) {
+ pr_err("%s: invalid indir len 0x%x, "
+ "driver confused?\r\n", name,
+ (unsigned int)vdir->len);
+ return -1;
+ }
+ vindir = paddr_guest2host(ctx, vdir->addr, vdir->len);
+ /*
+ * Indirects start at the 0th, then follow
+ * their own embedded "next"s until those run
+ * out. Each one's indirect flag must be off
+ * (we don't really have to check, could just
+ * ignore errors...).
+ */
+ next = 0;
+ for (;;) {
+ vp = &vindir[next];
+ if (vp->flags & VRING_DESC_F_INDIRECT) {
+ pr_err("%s: indirect desc has INDIR flag,"
+ " driver confused?\r\n", name);
+ return -1;
+ }
+ _vq_record(i, vp, ctx, iov, n_iov, flags);
+ if (++i > VQ_MAX_DESCRIPTORS)
+ goto loopy;
+ if ((vp->flags & VRING_DESC_F_NEXT) == 0)
+ break;
+ next = vp->next;
+ if (next >= n_indir) {
+ pr_err("%s: invalid next %u > %u, "
+ "driver confused?\r\n",
+ name, (unsigned int)next, n_indir);
+ return -1;
+ }
+ }
+ }
+ if ((vdir->flags & VRING_DESC_F_NEXT) == 0)
+ return i;
+ }
+loopy:
+ pr_err("%s: descriptor loop? count > %d - driver confused?\r\n",
+ name, i);
+ return -1;
+}
+
+/*
+ * Return the currently-first request chain back to the available queue.
+ *
+ * (This chain is the one you handled when you called vq_getchain()
+ * and used its positive return value.)
+ */
+void virtio_vq_retchain(struct virtio_vq_info *vq)
+{
+ vq->last_avail--;
+}
+
+/*
+ * Return specified request chain to the guest, setting its I/O length
+ * to the provided value.
+ *
+ * (This chain is the one you handled when you called vq_getchain()
+ * and used its positive return value.)
+ */
+void virtio_vq_relchain(struct virtio_vq_info *vq, uint16_t idx,
+ uint32_t iolen)
+{
+ uint16_t uidx, mask;
+ volatile struct vring_used *vuh;
+ volatile struct virtio_used *vue;
+
+ /*
+ * Notes:
+ * - mask is N-1 where N is a power of 2 so computes x % N
+ * - vuh points to the "used" data shared with guest
+ * - vue points to the "used" ring entry we want to update
+ * - head is the same value we compute in vq_iovecs().
+ *
+ * (I apologize for the two fields named vu_idx; the
+ * virtio spec calls the one that vue points to, "id"...)
+ */
+ mask = vq->qsize - 1;
+ vuh = vq->used;
+
+ uidx = vuh->idx;
+ vue = &vuh->ring[uidx++ & mask];
+ vue->idx = idx;
+ vue->len = iolen;
+ vuh->idx = uidx;
+}
+
+/*
+ * Driver has finished processing "available" chains and calling
+ * vq_relchain on each one. If driver used all the available
+ * chains, used_all should be set.
+ *
+ * If the "used" index moved we may need to inform the guest, i.e.,
+ * deliver an interrupt. Even if the used index did NOT move we
+ * may need to deliver an interrupt, if the avail ring is empty and
+ * we are supposed to interrupt on empty.
+ *
+ * Note that used_all_avail is provided by the caller because it's
+ * a snapshot of the ring state when he decided to finish interrupt
+ * processing -- it's possible that descriptors became available after
+ * that point. (It's also typically a constant 1/True as well.)
+ */
+void virtio_vq_endchains(struct virtio_vq_info *vq, int used_all_avail)
+{
+ struct virtio_dev_info *dev;
+ uint16_t event_idx, new_idx, old_idx;
+ int intr;
+
+ /*
+ * Interrupt generation: if we're using EVENT_IDX,
+ * interrupt if we've crossed the event threshold.
+ * Otherwise interrupt is generated if we added "used" entries,
+ * but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
+ *
+ * In any case, though, if NOTIFY_ON_EMPTY is set and the
+ * entire avail was processed, we need to interrupt always.
+ */
+ dev = vq->dev;
+ old_idx = vq->save_used;
+ vq->save_used = new_idx = vq->used->idx;
+ if (used_all_avail &&
+ (dev->negotiated_features & VIRTIO_F_NOTIFY_ON_EMPTY))
+ intr = 1;
+ else if (dev->negotiated_features & VIRTIO_RING_F_EVENT_IDX) {
+ event_idx = VQ_USED_EVENT_IDX(vq);
+ /*
+ * This calculation is per docs and the kernel
+ * (see src/sys/dev/virtio/virtio_ring.h).
+ */
+ intr = (uint16_t)(new_idx - event_idx - 1) <
+ (uint16_t)(new_idx - old_idx);
+ } else {
+ intr = new_idx != old_idx &&
+ !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
+ }
+ if (intr)
+ virtio_vq_interrupt(dev, vq);
+}
+
/*
* Initialize the currently-selected virtqueue.
* The guest just gave us a page frame number, from which we can
diff --git a/include/linux/vbs/vq.h b/include/linux/vbs/vq.h
index 55ff810fa094..9ebde05e4663 100644
--- a/include/linux/vbs/vq.h
+++ b/include/linux/vbs/vq.h
@@ -64,6 +64,7 @@
#include <linux/uio.h>
#include <linux/vbs/vbs.h>
+#include <linux/vhm/vhm_vm_mngt.h>
/* virtqueue alignment */
#define VRING_ALIGN 4096
@@ -76,6 +77,30 @@
#define VQ_ALLOC 0x01
#define VQ_BROKED 0x02
+/*
+ * Feature flags.
+ * Note: bits 0 through 23 are reserved to each device type.
+ */
+#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24)
+#define VIRTIO_RING_F_INDIRECT_DESC (1 << 28)
+#define VIRTIO_RING_F_EVENT_IDX (1 << 29)
+
+#define VQ_MAX_DESCRIPTORS 512
+
+/* virtio_desc flags */
+#define VRING_DESC_F_NEXT (1 << 0)
+#define VRING_DESC_F_WRITE (1 << 1)
+#define VRING_DESC_F_INDIRECT (1 << 2)
+
+/* vring_avail flags */
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+
+/* vring_used flags */
+#define VRING_USED_F_NO_NOTIFY 1
+
+/* Functions for dealing with generalized "virtual devices" */
+#define VQ_USED_EVENT_IDX(vq) ((vq)->avail->ring[(vq)->qsize])
+
/* get virtqueue size according to virtio specification */
static inline size_t virtio_vq_ring_size(unsigned int qsz)
{
@@ -92,8 +117,56 @@ static inline size_t virtio_vq_ring_size(unsigned int qsz)
return size;
}
+/* Is this ring ready for I/O? */
+static inline int virtio_vq_ring_ready(struct virtio_vq_info *vq)
+{
+ return (vq->flags & VQ_ALLOC);
+}
+
+/*
+ * Are there "available" descriptors? (This does not count
+ * how many, just returns True if there are some).
+ */
+static inline int virtio_vq_has_descs(struct virtio_vq_info *vq)
+{
+ return (virtio_vq_ring_ready(vq) &&
+ vq->last_avail != vq->avail->idx);
+}
+
+/* Deliver an interrupt to guest on the given virtual queue */
+static inline void virtio_vq_interrupt(struct virtio_dev_info *dev,
+ struct virtio_vq_info *vq)
+{
+ uint16_t msix_idx;
+ uint64_t msix_addr;
+ uint32_t msix_data;
+
+ /* Currently we only support MSIx */
+ msix_idx = vq->msix_idx;
+
+ if (msix_idx == VIRTIO_MSI_NO_VECTOR) {
+ pr_err("msix idx is VIRTIO_MSI_NO_VECTOR!\n");
+ return;
+ }
+
+ msix_addr = vq->msix_addr;
+ msix_data = vq->msix_data;
+
+ pr_debug("virtio_vq_interrupt: vmid is %d\n", dev->_ctx.vmid);
+ vhm_inject_msi(dev->_ctx.vmid, msix_addr, msix_data);
+}
+
+
/* virtqueue initialization APIs */
void virtio_vq_init(struct virtio_vq_info *vq, uint32_t pfn);
void virtio_vq_reset(struct virtio_vq_info *vq);
+/* virtqueue runtime APIs */
+int virtio_vq_getchain(struct virtio_vq_info *vq, uint16_t *pidx,
+ struct iovec *iov, int n_iov, uint16_t *flags);
+void virtio_vq_retchain(struct virtio_vq_info *vq);
+void virtio_vq_relchain(struct virtio_vq_info *vq, uint16_t idx,
+ uint32_t iolen);
+void virtio_vq_endchains(struct virtio_vq_info *vq, int used_all_avail);
+
#endif
--
https://clearlinux.org