123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266 |
- /*
- * Copyright (C) 2016 Red Hat, Inc.
- * Author: Michael S. Tsirkin <mst@redhat.com>
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
- * signalling, unconditionally.
- */
- #define _GNU_SOURCE
- #include "main.h"
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- /* Next - Where next entry will be written.
- * Prev - "Next" value when event triggered previously.
- * Event - Peer requested event after writing this entry.
- */
- static inline bool need_event(unsigned short event,
- unsigned short next,
- unsigned short prev)
- {
- return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
- }
- /* Design:
- * Guest adds descriptors with unique index values and DESC_HW in flags.
- * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
- * Flags are always set last.
- */
- #define DESC_HW 0x1
- struct desc {
- unsigned short flags;
- unsigned short index;
- unsigned len;
- unsigned long long addr;
- };
- /* how much padding is needed to avoid false cache sharing */
- #define HOST_GUEST_PADDING 0x80
- /* Mostly read */
- struct event {
- unsigned short kick_index;
- unsigned char reserved0[HOST_GUEST_PADDING - 2];
- unsigned short call_index;
- unsigned char reserved1[HOST_GUEST_PADDING - 2];
- };
- struct data {
- void *buf; /* descriptor is writeable, we can't get buf from there */
- void *data;
- } *data;
- struct desc *ring;
- struct event *event;
- struct guest {
- unsigned avail_idx;
- unsigned last_used_idx;
- unsigned num_free;
- unsigned kicked_avail_idx;
- unsigned char reserved[HOST_GUEST_PADDING - 12];
- } guest;
- struct host {
- /* we do not need to track last avail index
- * unless we have more than one in flight.
- */
- unsigned used_idx;
- unsigned called_used_idx;
- unsigned char reserved[HOST_GUEST_PADDING - 4];
- } host;
- /* implemented by ring */
- void alloc_ring(void)
- {
- int ret;
- int i;
- ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
- if (ret) {
- perror("Unable to allocate ring buffer.\n");
- exit(3);
- }
- event = malloc(sizeof *event);
- if (!event) {
- perror("Unable to allocate event buffer.\n");
- exit(3);
- }
- memset(event, 0, sizeof *event);
- guest.avail_idx = 0;
- guest.kicked_avail_idx = -1;
- guest.last_used_idx = 0;
- host.used_idx = 0;
- host.called_used_idx = -1;
- for (i = 0; i < ring_size; ++i) {
- struct desc desc = {
- .index = i,
- };
- ring[i] = desc;
- }
- guest.num_free = ring_size;
- data = malloc(ring_size * sizeof *data);
- if (!data) {
- perror("Unable to allocate data buffer.\n");
- exit(3);
- }
- memset(data, 0, ring_size * sizeof *data);
- }
- /* guest side */
- int add_inbuf(unsigned len, void *buf, void *datap)
- {
- unsigned head, index;
- if (!guest.num_free)
- return -1;
- guest.num_free--;
- head = (ring_size - 1) & (guest.avail_idx++);
- /* Start with a write. On MESI architectures this helps
- * avoid a shared state with consumer that is polling this descriptor.
- */
- ring[head].addr = (unsigned long)(void*)buf;
- ring[head].len = len;
- /* read below might bypass write above. That is OK because it's just an
- * optimization. If this happens, we will get the cache line in a
- * shared state which is unfortunate, but probably not worth it to
- * add an explicit full barrier to avoid this.
- */
- barrier();
- index = ring[head].index;
- data[index].buf = buf;
- data[index].data = datap;
- /* Barrier A (for pairing) */
- smp_release();
- ring[head].flags = DESC_HW;
- return 0;
- }
- void *get_buf(unsigned *lenp, void **bufp)
- {
- unsigned head = (ring_size - 1) & guest.last_used_idx;
- unsigned index;
- void *datap;
- if (ring[head].flags & DESC_HW)
- return NULL;
- /* Barrier B (for pairing) */
- smp_acquire();
- *lenp = ring[head].len;
- index = ring[head].index & (ring_size - 1);
- datap = data[index].data;
- *bufp = data[index].buf;
- data[index].buf = NULL;
- data[index].data = NULL;
- guest.num_free++;
- guest.last_used_idx++;
- return datap;
- }
- bool used_empty()
- {
- unsigned head = (ring_size - 1) & guest.last_used_idx;
- return (ring[head].flags & DESC_HW);
- }
- void disable_call()
- {
- /* Doing nothing to disable calls might cause
- * extra interrupts, but reduces the number of cache misses.
- */
- }
- bool enable_call()
- {
- event->call_index = guest.last_used_idx;
- /* Flush call index write */
- /* Barrier D (for pairing) */
- smp_mb();
- return used_empty();
- }
- void kick_available(void)
- {
- /* Flush in previous flags write */
- /* Barrier C (for pairing) */
- smp_mb();
- if (!need_event(event->kick_index,
- guest.avail_idx,
- guest.kicked_avail_idx))
- return;
- guest.kicked_avail_idx = guest.avail_idx;
- kick();
- }
- /* host side */
- void disable_kick()
- {
- /* Doing nothing to disable kicks might cause
- * extra interrupts, but reduces the number of cache misses.
- */
- }
- bool enable_kick()
- {
- event->kick_index = host.used_idx;
- /* Barrier C (for pairing) */
- smp_mb();
- return avail_empty();
- }
- bool avail_empty()
- {
- unsigned head = (ring_size - 1) & host.used_idx;
- return !(ring[head].flags & DESC_HW);
- }
- bool use_buf(unsigned *lenp, void **bufp)
- {
- unsigned head = (ring_size - 1) & host.used_idx;
- if (!(ring[head].flags & DESC_HW))
- return false;
- /* make sure length read below is not speculated */
- /* Barrier A (for pairing) */
- smp_acquire();
- /* simple in-order completion: we don't need
- * to touch index at all. This also means we
- * can just modify the descriptor in-place.
- */
- ring[head].len--;
- /* Make sure len is valid before flags.
- * Note: alternative is to write len and flags in one access -
- * possible on 64 bit architectures but wmb is free on Intel anyway
- * so I have no way to test whether it's a gain.
- */
- /* Barrier B (for pairing) */
- smp_release();
- ring[head].flags = 0;
- host.used_idx++;
- return true;
- }
- void call_used(void)
- {
- /* Flush in previous flags write */
- /* Barrier D (for pairing) */
- smp_mb();
- if (!need_event(event->call_index,
- host.used_idx,
- host.called_used_idx))
- return;
- host.called_used_idx = host.used_idx;
- call();
- }
|