ring.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. /*
  2. * Copyright (C) 2016 Red Hat, Inc.
  3. * Author: Michael S. Tsirkin <mst@redhat.com>
  4. * This work is licensed under the terms of the GNU GPL, version 2.
  5. *
  6. * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
  7. * signalling, unconditionally.
  8. */
  9. #define _GNU_SOURCE
  10. #include "main.h"
  11. #include <stdlib.h>
  12. #include <stdio.h>
  13. #include <string.h>
  14. /* Next - Where next entry will be written.
  15. * Prev - "Next" value when event triggered previously.
  16. * Event - Peer requested event after writing this entry.
  17. */
  18. static inline bool need_event(unsigned short event,
  19. unsigned short next,
  20. unsigned short prev)
  21. {
  22. return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
  23. }
  24. /* Design:
  25. * Guest adds descriptors with unique index values and DESC_HW in flags.
  26. * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
  27. * Flags are always set last.
  28. */
  29. #define DESC_HW 0x1
  30. struct desc {
  31. unsigned short flags;
  32. unsigned short index;
  33. unsigned len;
  34. unsigned long long addr;
  35. };
  36. /* how much padding is needed to avoid false cache sharing */
  37. #define HOST_GUEST_PADDING 0x80
  38. /* Mostly read */
  39. struct event {
  40. unsigned short kick_index;
  41. unsigned char reserved0[HOST_GUEST_PADDING - 2];
  42. unsigned short call_index;
  43. unsigned char reserved1[HOST_GUEST_PADDING - 2];
  44. };
  45. struct data {
  46. void *buf; /* descriptor is writeable, we can't get buf from there */
  47. void *data;
  48. } *data;
  49. struct desc *ring;
  50. struct event *event;
  51. struct guest {
  52. unsigned avail_idx;
  53. unsigned last_used_idx;
  54. unsigned num_free;
  55. unsigned kicked_avail_idx;
  56. unsigned char reserved[HOST_GUEST_PADDING - 12];
  57. } guest;
  58. struct host {
  59. /* we do not need to track last avail index
  60. * unless we have more than one in flight.
  61. */
  62. unsigned used_idx;
  63. unsigned called_used_idx;
  64. unsigned char reserved[HOST_GUEST_PADDING - 4];
  65. } host;
  66. /* implemented by ring */
  67. void alloc_ring(void)
  68. {
  69. int ret;
  70. int i;
  71. ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
  72. if (ret) {
  73. perror("Unable to allocate ring buffer.\n");
  74. exit(3);
  75. }
  76. event = malloc(sizeof *event);
  77. if (!event) {
  78. perror("Unable to allocate event buffer.\n");
  79. exit(3);
  80. }
  81. memset(event, 0, sizeof *event);
  82. guest.avail_idx = 0;
  83. guest.kicked_avail_idx = -1;
  84. guest.last_used_idx = 0;
  85. host.used_idx = 0;
  86. host.called_used_idx = -1;
  87. for (i = 0; i < ring_size; ++i) {
  88. struct desc desc = {
  89. .index = i,
  90. };
  91. ring[i] = desc;
  92. }
  93. guest.num_free = ring_size;
  94. data = malloc(ring_size * sizeof *data);
  95. if (!data) {
  96. perror("Unable to allocate data buffer.\n");
  97. exit(3);
  98. }
  99. memset(data, 0, ring_size * sizeof *data);
  100. }
  101. /* guest side */
  102. int add_inbuf(unsigned len, void *buf, void *datap)
  103. {
  104. unsigned head, index;
  105. if (!guest.num_free)
  106. return -1;
  107. guest.num_free--;
  108. head = (ring_size - 1) & (guest.avail_idx++);
  109. /* Start with a write. On MESI architectures this helps
  110. * avoid a shared state with consumer that is polling this descriptor.
  111. */
  112. ring[head].addr = (unsigned long)(void*)buf;
  113. ring[head].len = len;
  114. /* read below might bypass write above. That is OK because it's just an
  115. * optimization. If this happens, we will get the cache line in a
  116. * shared state which is unfortunate, but probably not worth it to
  117. * add an explicit full barrier to avoid this.
  118. */
  119. barrier();
  120. index = ring[head].index;
  121. data[index].buf = buf;
  122. data[index].data = datap;
  123. /* Barrier A (for pairing) */
  124. smp_release();
  125. ring[head].flags = DESC_HW;
  126. return 0;
  127. }
  128. void *get_buf(unsigned *lenp, void **bufp)
  129. {
  130. unsigned head = (ring_size - 1) & guest.last_used_idx;
  131. unsigned index;
  132. void *datap;
  133. if (ring[head].flags & DESC_HW)
  134. return NULL;
  135. /* Barrier B (for pairing) */
  136. smp_acquire();
  137. *lenp = ring[head].len;
  138. index = ring[head].index & (ring_size - 1);
  139. datap = data[index].data;
  140. *bufp = data[index].buf;
  141. data[index].buf = NULL;
  142. data[index].data = NULL;
  143. guest.num_free++;
  144. guest.last_used_idx++;
  145. return datap;
  146. }
  147. bool used_empty()
  148. {
  149. unsigned head = (ring_size - 1) & guest.last_used_idx;
  150. return (ring[head].flags & DESC_HW);
  151. }
  152. void disable_call()
  153. {
  154. /* Doing nothing to disable calls might cause
  155. * extra interrupts, but reduces the number of cache misses.
  156. */
  157. }
  158. bool enable_call()
  159. {
  160. event->call_index = guest.last_used_idx;
  161. /* Flush call index write */
  162. /* Barrier D (for pairing) */
  163. smp_mb();
  164. return used_empty();
  165. }
  166. void kick_available(void)
  167. {
  168. /* Flush in previous flags write */
  169. /* Barrier C (for pairing) */
  170. smp_mb();
  171. if (!need_event(event->kick_index,
  172. guest.avail_idx,
  173. guest.kicked_avail_idx))
  174. return;
  175. guest.kicked_avail_idx = guest.avail_idx;
  176. kick();
  177. }
  178. /* host side */
  179. void disable_kick()
  180. {
  181. /* Doing nothing to disable kicks might cause
  182. * extra interrupts, but reduces the number of cache misses.
  183. */
  184. }
  185. bool enable_kick()
  186. {
  187. event->kick_index = host.used_idx;
  188. /* Barrier C (for pairing) */
  189. smp_mb();
  190. return avail_empty();
  191. }
  192. bool avail_empty()
  193. {
  194. unsigned head = (ring_size - 1) & host.used_idx;
  195. return !(ring[head].flags & DESC_HW);
  196. }
  197. bool use_buf(unsigned *lenp, void **bufp)
  198. {
  199. unsigned head = (ring_size - 1) & host.used_idx;
  200. if (!(ring[head].flags & DESC_HW))
  201. return false;
  202. /* make sure length read below is not speculated */
  203. /* Barrier A (for pairing) */
  204. smp_acquire();
  205. /* simple in-order completion: we don't need
  206. * to touch index at all. This also means we
  207. * can just modify the descriptor in-place.
  208. */
  209. ring[head].len--;
  210. /* Make sure len is valid before flags.
  211. * Note: alternative is to write len and flags in one access -
  212. * possible on 64 bit architectures but wmb is free on Intel anyway
  213. * so I have no way to test whether it's a gain.
  214. */
  215. /* Barrier B (for pairing) */
  216. smp_release();
  217. ring[head].flags = 0;
  218. host.used_idx++;
  219. return true;
  220. }
  221. void call_used(void)
  222. {
  223. /* Flush in previous flags write */
  224. /* Barrier D (for pairing) */
  225. smp_mb();
  226. if (!need_event(event->call_index,
  227. host.used_idx,
  228. host.called_used_idx))
  229. return;
  230. host.called_used_idx = host.used_idx;
  231. call();
  232. }