From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Hao Li Date: Fri, 31 Aug 2018 10:58:56 +0800 Subject: [PATCH] VBS-K: virtqueue runtime API. This patch added the virtqueue runtime API to the VBS-K framework: - int virtio_vq_getchain(struct virtio_vq_info *vq, uint16_t *pidx, struct iovec *iov, int n_iov, uint16_t *flags); - void virtio_vq_retchain(struct virtio_vq_info *vq); - void virtio_vq_relchain(struct virtio_vq_info *vq, uint16_t idx, uint32_t iolen); - void virtio_vq_endchains(struct virtio_vq_info *vq, int used_all_avail); Change-Id: Ie7f81d96c895a16e210133c19aca99b185b8682d Tracked-On:218445 Signed-off-by: Hao Li Reviewed-on: Reviewed-by: Chi, Mingqiang Reviewed-by: Dong, Eddie Tested-by: Dong, Eddie --- drivers/vbs/vq.c | 270 +++++++++++++++++++++++++++++++++++++++++ include/linux/vbs/vq.h | 73 +++++++++++ 2 files changed, 343 insertions(+) diff --git a/drivers/vbs/vq.c b/drivers/vbs/vq.c index 95a6757a1c85..886f48225de9 100644 --- a/drivers/vbs/vq.c +++ b/drivers/vbs/vq.c @@ -71,6 +71,276 @@ void * paddr_guest2host(struct ctx *ctx, uintptr_t gaddr, size_t len) return map_guest_phys(ctx->vmid, gaddr, len); } +/* + * helper function for vq_getchain(): + * record the i'th "real" descriptor. + */ +static inline void _vq_record(int i, volatile struct virtio_desc *vd, + struct ctx *ctx, struct iovec *iov, + int n_iov, uint16_t *flags) +{ + if (i >= n_iov) + return; + + iov[i].iov_base = paddr_guest2host(ctx, vd->addr, vd->len); + iov[i].iov_len = vd->len; + + if (flags != NULL) + flags[i] = vd->flags; +} + +/* + * Walk descriptor table and put requests into iovec. + * + * Examine the chain of descriptors starting at the "next one" to + * make sure that they describe a sensible request. If so, return + * the number of "real" descriptors that would be needed/used in + * acting on this request. This may be smaller than the number of + * available descriptors, e.g., if there are two available but + * they are two separate requests, this just returns 1. Or, it + * may be larger: if there are indirect descriptors involved, + * there may only be one descriptor available but it may be an + * indirect pointing to eight more. We return 8 in this case, + * i.e., we do not count the indirect descriptors, only the "real" + * ones. + * + * Basically, this vets the vd_flags and vd_next field of each + * descriptor and tells you how many are involved. Since some may + * be indirect, this also needs the vmctx (in the pci_vdev + * at vc->vc_pi) so that it can find indirect descriptors. + * + * As we process each descriptor, we copy and adjust it (guest to + * host address wise, also using the vmtctx) into the given iov[] + * array (of the given size). If the array overflows, we stop + * placing values into the array but keep processing descriptors, + * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1. + * So you, the caller, must not assume that iov[] is as big as the + * return value (you can process the same thing twice to allocate + * a larger iov array if needed, or supply a zero length to find + * out how much space is needed). + * + * If you want to verify the WRITE flag on each descriptor, pass a + * non-NULL "flags" pointer to an array of "uint16_t" of the same size + * as n_iov and we'll copy each vd_flags field after unwinding any + * indirects. + * + * If some descriptor(s) are invalid, this prints a diagnostic message + * and returns -1. If no descriptors are ready now it simply returns 0. + * + * You are assumed to have done a vq_ring_ready() if needed (note + * that vq_has_descs() does one). + */ +int virtio_vq_getchain(struct virtio_vq_info *vq, uint16_t *pidx, + struct iovec *iov, int n_iov, uint16_t *flags) +{ + int i; + unsigned int ndesc, n_indir; + unsigned int idx, next; + struct ctx *ctx; + struct virtio_dev_info *dev; + const char *name; + + volatile struct virtio_desc *vdir, *vindir, *vp; + + dev = vq->dev; + name = dev->name; + + /* + * Note: it's the responsibility of the guest not to + * update vq->vq_avail->va_idx until all of the descriptors + * the guest has written are valid (including all their + * vd_next fields and vd_flags). + * + * Compute (last_avail - va_idx) in integers mod 2**16. This is + * the number of descriptors the device has made available + * since the last time we updated vq->vq_last_avail. + * + * We just need to do the subtraction as an unsigned int, + * then trim off excess bits. + */ + idx = vq->last_avail; + ndesc = (uint16_t)((unsigned int)vq->avail->idx - idx); + + if (ndesc == 0) + return 0; + + if (ndesc > vq->qsize) { + /* XXX need better way to diagnose issues */ + pr_err("%s: ndesc (%u) out of range, driver confused?\r\n", + name, (unsigned int)ndesc); + return -1; + } + + /* + * Now count/parse "involved" descriptors starting from + * the head of the chain. + * + * To prevent loops, we could be more complicated and + * check whether we're re-visiting a previously visited + * index, but we just abort if the count gets excessive. + */ + ctx = &dev->_ctx; + *pidx = next = vq->avail->ring[idx & (vq->qsize - 1)]; + vq->last_avail++; + for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->next) { + if (next >= vq->qsize) { + pr_err("%s: descriptor index %u out of range, " + "driver confused?\r\n", name, next); + return -1; + } + vdir = &vq->desc[next]; + if ((vdir->flags & VRING_DESC_F_INDIRECT) == 0) { + _vq_record(i, vdir, ctx, iov, n_iov, flags); + i++; + } else if ((dev->negotiated_features & + VIRTIO_RING_F_INDIRECT_DESC) == 0) { + pr_err("%s: descriptor has forbidden INDIRECT flag, " + "driver confused?\r\n", name); + return -1; + } else { + n_indir = vdir->len / 16; + if ((vdir->len & 0xf) || n_indir == 0) { + pr_err("%s: invalid indir len 0x%x, " + "driver confused?\r\n", name, + (unsigned int)vdir->len); + return -1; + } + vindir = paddr_guest2host(ctx, vdir->addr, vdir->len); + /* + * Indirects start at the 0th, then follow + * their own embedded "next"s until those run + * out. Each one's indirect flag must be off + * (we don't really have to check, could just + * ignore errors...). + */ + next = 0; + for (;;) { + vp = &vindir[next]; + if (vp->flags & VRING_DESC_F_INDIRECT) { + pr_err("%s: indirect desc has INDIR flag," + " driver confused?\r\n", name); + return -1; + } + _vq_record(i, vp, ctx, iov, n_iov, flags); + if (++i > VQ_MAX_DESCRIPTORS) + goto loopy; + if ((vp->flags & VRING_DESC_F_NEXT) == 0) + break; + next = vp->next; + if (next >= n_indir) { + pr_err("%s: invalid next %u > %u, " + "driver confused?\r\n", + name, (unsigned int)next, n_indir); + return -1; + } + } + } + if ((vdir->flags & VRING_DESC_F_NEXT) == 0) + return i; + } +loopy: + pr_err("%s: descriptor loop? count > %d - driver confused?\r\n", + name, i); + return -1; +} + +/* + * Return the currently-first request chain back to the available queue. + * + * (This chain is the one you handled when you called vq_getchain() + * and used its positive return value.) + */ +void virtio_vq_retchain(struct virtio_vq_info *vq) +{ + vq->last_avail--; +} + +/* + * Return specified request chain to the guest, setting its I/O length + * to the provided value. + * + * (This chain is the one you handled when you called vq_getchain() + * and used its positive return value.) + */ +void virtio_vq_relchain(struct virtio_vq_info *vq, uint16_t idx, + uint32_t iolen) +{ + uint16_t uidx, mask; + volatile struct vring_used *vuh; + volatile struct virtio_used *vue; + + /* + * Notes: + * - mask is N-1 where N is a power of 2 so computes x % N + * - vuh points to the "used" data shared with guest + * - vue points to the "used" ring entry we want to update + * - head is the same value we compute in vq_iovecs(). + * + * (I apologize for the two fields named vu_idx; the + * virtio spec calls the one that vue points to, "id"...) + */ + mask = vq->qsize - 1; + vuh = vq->used; + + uidx = vuh->idx; + vue = &vuh->ring[uidx++ & mask]; + vue->idx = idx; + vue->len = iolen; + vuh->idx = uidx; +} + +/* + * Driver has finished processing "available" chains and calling + * vq_relchain on each one. If driver used all the available + * chains, used_all should be set. + * + * If the "used" index moved we may need to inform the guest, i.e., + * deliver an interrupt. Even if the used index did NOT move we + * may need to deliver an interrupt, if the avail ring is empty and + * we are supposed to interrupt on empty. + * + * Note that used_all_avail is provided by the caller because it's + * a snapshot of the ring state when he decided to finish interrupt + * processing -- it's possible that descriptors became available after + * that point. (It's also typically a constant 1/True as well.) + */ +void virtio_vq_endchains(struct virtio_vq_info *vq, int used_all_avail) +{ + struct virtio_dev_info *dev; + uint16_t event_idx, new_idx, old_idx; + int intr; + + /* + * Interrupt generation: if we're using EVENT_IDX, + * interrupt if we've crossed the event threshold. + * Otherwise interrupt is generated if we added "used" entries, + * but suppressed by VRING_AVAIL_F_NO_INTERRUPT. + * + * In any case, though, if NOTIFY_ON_EMPTY is set and the + * entire avail was processed, we need to interrupt always. + */ + dev = vq->dev; + old_idx = vq->save_used; + vq->save_used = new_idx = vq->used->idx; + if (used_all_avail && + (dev->negotiated_features & VIRTIO_F_NOTIFY_ON_EMPTY)) + intr = 1; + else if (dev->negotiated_features & VIRTIO_RING_F_EVENT_IDX) { + event_idx = VQ_USED_EVENT_IDX(vq); + /* + * This calculation is per docs and the kernel + * (see src/sys/dev/virtio/virtio_ring.h). + */ + intr = (uint16_t)(new_idx - event_idx - 1) < + (uint16_t)(new_idx - old_idx); + } else { + intr = new_idx != old_idx && + !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT); + } + if (intr) + virtio_vq_interrupt(dev, vq); +} + /* * Initialize the currently-selected virtqueue. * The guest just gave us a page frame number, from which we can diff --git a/include/linux/vbs/vq.h b/include/linux/vbs/vq.h index 55ff810fa094..9ebde05e4663 100644 --- a/include/linux/vbs/vq.h +++ b/include/linux/vbs/vq.h @@ -64,6 +64,7 @@ #include #include +#include /* virtqueue alignment */ #define VRING_ALIGN 4096 @@ -76,6 +77,30 @@ #define VQ_ALLOC 0x01 #define VQ_BROKED 0x02 +/* + * Feature flags. + * Note: bits 0 through 23 are reserved to each device type. + */ +#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24) +#define VIRTIO_RING_F_INDIRECT_DESC (1 << 28) +#define VIRTIO_RING_F_EVENT_IDX (1 << 29) + +#define VQ_MAX_DESCRIPTORS 512 + +/* virtio_desc flags */ +#define VRING_DESC_F_NEXT (1 << 0) +#define VRING_DESC_F_WRITE (1 << 1) +#define VRING_DESC_F_INDIRECT (1 << 2) + +/* vring_avail flags */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* vring_used flags */ +#define VRING_USED_F_NO_NOTIFY 1 + +/* Functions for dealing with generalized "virtual devices" */ +#define VQ_USED_EVENT_IDX(vq) ((vq)->avail->ring[(vq)->qsize]) + /* get virtqueue size according to virtio specification */ static inline size_t virtio_vq_ring_size(unsigned int qsz) { @@ -92,8 +117,56 @@ static inline size_t virtio_vq_ring_size(unsigned int qsz) return size; } +/* Is this ring ready for I/O? */ +static inline int virtio_vq_ring_ready(struct virtio_vq_info *vq) +{ + return (vq->flags & VQ_ALLOC); +} + +/* + * Are there "available" descriptors? (This does not count + * how many, just returns True if there are some). + */ +static inline int virtio_vq_has_descs(struct virtio_vq_info *vq) +{ + return (virtio_vq_ring_ready(vq) && + vq->last_avail != vq->avail->idx); +} + +/* Deliver an interrupt to guest on the given virtual queue */ +static inline void virtio_vq_interrupt(struct virtio_dev_info *dev, + struct virtio_vq_info *vq) +{ + uint16_t msix_idx; + uint64_t msix_addr; + uint32_t msix_data; + + /* Currently we only support MSIx */ + msix_idx = vq->msix_idx; + + if (msix_idx == VIRTIO_MSI_NO_VECTOR) { + pr_err("msix idx is VIRTIO_MSI_NO_VECTOR!\n"); + return; + } + + msix_addr = vq->msix_addr; + msix_data = vq->msix_data; + + pr_debug("virtio_vq_interrupt: vmid is %d\n", dev->_ctx.vmid); + vhm_inject_msi(dev->_ctx.vmid, msix_addr, msix_data); +} + + /* virtqueue initialization APIs */ void virtio_vq_init(struct virtio_vq_info *vq, uint32_t pfn); void virtio_vq_reset(struct virtio_vq_info *vq); +/* virtqueue runtime APIs */ +int virtio_vq_getchain(struct virtio_vq_info *vq, uint16_t *pidx, + struct iovec *iov, int n_iov, uint16_t *flags); +void virtio_vq_retchain(struct virtio_vq_info *vq); +void virtio_vq_relchain(struct virtio_vq_info *vq, uint16_t idx, + uint32_t iolen); +void virtio_vq_endchains(struct virtio_vq_info *vq, int used_all_avail); + #endif -- https://clearlinux.org