ruc.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008
  1. /*
  2. * Copyright(c) 2015 - 2017 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. #include <linux/spinlock.h>
  48. #include "hfi.h"
  49. #include "mad.h"
  50. #include "qp.h"
  51. #include "verbs_txreq.h"
  52. #include "trace.h"
  53. /*
  54. * Convert the AETH RNR timeout code into the number of microseconds.
  55. */
  56. const u32 ib_hfi1_rnr_table[32] = {
  57. 655360, /* 00: 655.36 */
  58. 10, /* 01: .01 */
  59. 20, /* 02 .02 */
  60. 30, /* 03: .03 */
  61. 40, /* 04: .04 */
  62. 60, /* 05: .06 */
  63. 80, /* 06: .08 */
  64. 120, /* 07: .12 */
  65. 160, /* 08: .16 */
  66. 240, /* 09: .24 */
  67. 320, /* 0A: .32 */
  68. 480, /* 0B: .48 */
  69. 640, /* 0C: .64 */
  70. 960, /* 0D: .96 */
  71. 1280, /* 0E: 1.28 */
  72. 1920, /* 0F: 1.92 */
  73. 2560, /* 10: 2.56 */
  74. 3840, /* 11: 3.84 */
  75. 5120, /* 12: 5.12 */
  76. 7680, /* 13: 7.68 */
  77. 10240, /* 14: 10.24 */
  78. 15360, /* 15: 15.36 */
  79. 20480, /* 16: 20.48 */
  80. 30720, /* 17: 30.72 */
  81. 40960, /* 18: 40.96 */
  82. 61440, /* 19: 61.44 */
  83. 81920, /* 1A: 81.92 */
  84. 122880, /* 1B: 122.88 */
  85. 163840, /* 1C: 163.84 */
  86. 245760, /* 1D: 245.76 */
  87. 327680, /* 1E: 327.68 */
  88. 491520 /* 1F: 491.52 */
  89. };
  90. /*
  91. * Validate a RWQE and fill in the SGE state.
  92. * Return 1 if OK.
  93. */
  94. static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
  95. {
  96. int i, j, ret;
  97. struct ib_wc wc;
  98. struct rvt_lkey_table *rkt;
  99. struct rvt_pd *pd;
  100. struct rvt_sge_state *ss;
  101. rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
  102. pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
  103. ss = &qp->r_sge;
  104. ss->sg_list = qp->r_sg_list;
  105. qp->r_len = 0;
  106. for (i = j = 0; i < wqe->num_sge; i++) {
  107. if (wqe->sg_list[i].length == 0)
  108. continue;
  109. /* Check LKEY */
  110. if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
  111. &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
  112. goto bad_lkey;
  113. qp->r_len += wqe->sg_list[i].length;
  114. j++;
  115. }
  116. ss->num_sge = j;
  117. ss->total_len = qp->r_len;
  118. ret = 1;
  119. goto bail;
  120. bad_lkey:
  121. while (j) {
  122. struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
  123. rvt_put_mr(sge->mr);
  124. }
  125. ss->num_sge = 0;
  126. memset(&wc, 0, sizeof(wc));
  127. wc.wr_id = wqe->wr_id;
  128. wc.status = IB_WC_LOC_PROT_ERR;
  129. wc.opcode = IB_WC_RECV;
  130. wc.qp = &qp->ibqp;
  131. /* Signal solicited completion event. */
  132. rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
  133. ret = 0;
  134. bail:
  135. return ret;
  136. }
  137. /**
  138. * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
  139. * @qp: the QP
  140. * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
  141. *
  142. * Return -1 if there is a local error, 0 if no RWQE is available,
  143. * otherwise return 1.
  144. *
  145. * Can be called from interrupt level.
  146. */
  147. int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only)
  148. {
  149. unsigned long flags;
  150. struct rvt_rq *rq;
  151. struct rvt_rwq *wq;
  152. struct rvt_srq *srq;
  153. struct rvt_rwqe *wqe;
  154. void (*handler)(struct ib_event *, void *);
  155. u32 tail;
  156. int ret;
  157. if (qp->ibqp.srq) {
  158. srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
  159. handler = srq->ibsrq.event_handler;
  160. rq = &srq->rq;
  161. } else {
  162. srq = NULL;
  163. handler = NULL;
  164. rq = &qp->r_rq;
  165. }
  166. spin_lock_irqsave(&rq->lock, flags);
  167. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
  168. ret = 0;
  169. goto unlock;
  170. }
  171. wq = rq->wq;
  172. tail = wq->tail;
  173. /* Validate tail before using it since it is user writable. */
  174. if (tail >= rq->size)
  175. tail = 0;
  176. if (unlikely(tail == wq->head)) {
  177. ret = 0;
  178. goto unlock;
  179. }
  180. /* Make sure entry is read after head index is read. */
  181. smp_rmb();
  182. wqe = rvt_get_rwqe_ptr(rq, tail);
  183. /*
  184. * Even though we update the tail index in memory, the verbs
  185. * consumer is not supposed to post more entries until a
  186. * completion is generated.
  187. */
  188. if (++tail >= rq->size)
  189. tail = 0;
  190. wq->tail = tail;
  191. if (!wr_id_only && !init_sge(qp, wqe)) {
  192. ret = -1;
  193. goto unlock;
  194. }
  195. qp->r_wr_id = wqe->wr_id;
  196. ret = 1;
  197. set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
  198. if (handler) {
  199. u32 n;
  200. /*
  201. * Validate head pointer value and compute
  202. * the number of remaining WQEs.
  203. */
  204. n = wq->head;
  205. if (n >= rq->size)
  206. n = 0;
  207. if (n < tail)
  208. n += rq->size - tail;
  209. else
  210. n -= tail;
  211. if (n < srq->limit) {
  212. struct ib_event ev;
  213. srq->limit = 0;
  214. spin_unlock_irqrestore(&rq->lock, flags);
  215. ev.device = qp->ibqp.device;
  216. ev.element.srq = qp->ibqp.srq;
  217. ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
  218. handler(&ev, srq->ibsrq.srq_context);
  219. goto bail;
  220. }
  221. }
  222. unlock:
  223. spin_unlock_irqrestore(&rq->lock, flags);
  224. bail:
  225. return ret;
  226. }
  227. static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index)
  228. {
  229. if (!index) {
  230. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  231. return cpu_to_be64(ppd->guid);
  232. }
  233. return ibp->guids[index - 1];
  234. }
  235. static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
  236. {
  237. return (gid->global.interface_id == id &&
  238. (gid->global.subnet_prefix == gid_prefix ||
  239. gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX));
  240. }
  241. /*
  242. *
  243. * This should be called with the QP r_lock held.
  244. *
  245. * The s_lock will be acquired around the hfi1_migrate_qp() call.
  246. */
  247. int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
  248. int has_grh, struct rvt_qp *qp, u32 bth0)
  249. {
  250. __be64 guid;
  251. unsigned long flags;
  252. u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
  253. if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
  254. if (!has_grh) {
  255. if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
  256. goto err;
  257. } else {
  258. if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
  259. goto err;
  260. guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
  261. if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
  262. guid))
  263. goto err;
  264. if (!gid_ok(
  265. &hdr->u.l.grh.sgid,
  266. qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
  267. qp->alt_ah_attr.grh.dgid.global.interface_id))
  268. goto err;
  269. }
  270. if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
  271. sc5, be16_to_cpu(hdr->lrh[3])))) {
  272. hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
  273. (u16)bth0,
  274. (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
  275. 0, qp->ibqp.qp_num,
  276. be16_to_cpu(hdr->lrh[3]),
  277. be16_to_cpu(hdr->lrh[1]));
  278. goto err;
  279. }
  280. /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
  281. if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
  282. ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
  283. goto err;
  284. spin_lock_irqsave(&qp->s_lock, flags);
  285. hfi1_migrate_qp(qp);
  286. spin_unlock_irqrestore(&qp->s_lock, flags);
  287. } else {
  288. if (!has_grh) {
  289. if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
  290. goto err;
  291. } else {
  292. if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
  293. goto err;
  294. guid = get_sguid(ibp,
  295. qp->remote_ah_attr.grh.sgid_index);
  296. if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
  297. guid))
  298. goto err;
  299. if (!gid_ok(
  300. &hdr->u.l.grh.sgid,
  301. qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
  302. qp->remote_ah_attr.grh.dgid.global.interface_id))
  303. goto err;
  304. }
  305. if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
  306. sc5, be16_to_cpu(hdr->lrh[3])))) {
  307. hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
  308. (u16)bth0,
  309. (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
  310. 0, qp->ibqp.qp_num,
  311. be16_to_cpu(hdr->lrh[3]),
  312. be16_to_cpu(hdr->lrh[1]));
  313. goto err;
  314. }
  315. /* Validate the SLID. See Ch. 9.6.1.5 */
  316. if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
  317. ppd_from_ibp(ibp)->port != qp->port_num)
  318. goto err;
  319. if (qp->s_mig_state == IB_MIG_REARM &&
  320. !(bth0 & IB_BTH_MIG_REQ))
  321. qp->s_mig_state = IB_MIG_ARMED;
  322. }
  323. return 0;
  324. err:
  325. return 1;
  326. }
  327. /**
  328. * ruc_loopback - handle UC and RC loopback requests
  329. * @sqp: the sending QP
  330. *
  331. * This is called from hfi1_do_send() to
  332. * forward a WQE addressed to the same HFI.
  333. * Note that although we are single threaded due to the send engine, we still
  334. * have to protect against post_send(). We don't have to worry about
  335. * receive interrupts since this is a connected protocol and all packets
  336. * will pass through here.
  337. */
  338. static void ruc_loopback(struct rvt_qp *sqp)
  339. {
  340. struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
  341. struct rvt_qp *qp;
  342. struct rvt_swqe *wqe;
  343. struct rvt_sge *sge;
  344. unsigned long flags;
  345. struct ib_wc wc;
  346. u64 sdata;
  347. atomic64_t *maddr;
  348. enum ib_wc_status send_status;
  349. int release;
  350. int ret;
  351. int copy_last = 0;
  352. u32 to;
  353. int local_ops = 0;
  354. rcu_read_lock();
  355. /*
  356. * Note that we check the responder QP state after
  357. * checking the requester's state.
  358. */
  359. qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
  360. sqp->remote_qpn);
  361. spin_lock_irqsave(&sqp->s_lock, flags);
  362. /* Return if we are already busy processing a work request. */
  363. if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
  364. !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
  365. goto unlock;
  366. sqp->s_flags |= RVT_S_BUSY;
  367. again:
  368. smp_read_barrier_depends(); /* see post_one_send() */
  369. if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
  370. goto clr_busy;
  371. wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
  372. /* Return if it is not OK to start a new work request. */
  373. if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
  374. if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
  375. goto clr_busy;
  376. /* We are in the error state, flush the work request. */
  377. send_status = IB_WC_WR_FLUSH_ERR;
  378. goto flush_send;
  379. }
  380. /*
  381. * We can rely on the entry not changing without the s_lock
  382. * being held until we update s_last.
  383. * We increment s_cur to indicate s_last is in progress.
  384. */
  385. if (sqp->s_last == sqp->s_cur) {
  386. if (++sqp->s_cur >= sqp->s_size)
  387. sqp->s_cur = 0;
  388. }
  389. spin_unlock_irqrestore(&sqp->s_lock, flags);
  390. if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
  391. qp->ibqp.qp_type != sqp->ibqp.qp_type) {
  392. ibp->rvp.n_pkt_drops++;
  393. /*
  394. * For RC, the requester would timeout and retry so
  395. * shortcut the timeouts and just signal too many retries.
  396. */
  397. if (sqp->ibqp.qp_type == IB_QPT_RC)
  398. send_status = IB_WC_RETRY_EXC_ERR;
  399. else
  400. send_status = IB_WC_SUCCESS;
  401. goto serr;
  402. }
  403. memset(&wc, 0, sizeof(wc));
  404. send_status = IB_WC_SUCCESS;
  405. release = 1;
  406. sqp->s_sge.sge = wqe->sg_list[0];
  407. sqp->s_sge.sg_list = wqe->sg_list + 1;
  408. sqp->s_sge.num_sge = wqe->wr.num_sge;
  409. sqp->s_len = wqe->length;
  410. switch (wqe->wr.opcode) {
  411. case IB_WR_REG_MR:
  412. goto send_comp;
  413. case IB_WR_LOCAL_INV:
  414. if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
  415. if (rvt_invalidate_rkey(sqp,
  416. wqe->wr.ex.invalidate_rkey))
  417. send_status = IB_WC_LOC_PROT_ERR;
  418. local_ops = 1;
  419. }
  420. goto send_comp;
  421. case IB_WR_SEND_WITH_INV:
  422. if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
  423. wc.wc_flags = IB_WC_WITH_INVALIDATE;
  424. wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
  425. }
  426. goto send;
  427. case IB_WR_SEND_WITH_IMM:
  428. wc.wc_flags = IB_WC_WITH_IMM;
  429. wc.ex.imm_data = wqe->wr.ex.imm_data;
  430. /* FALLTHROUGH */
  431. case IB_WR_SEND:
  432. send:
  433. ret = hfi1_rvt_get_rwqe(qp, 0);
  434. if (ret < 0)
  435. goto op_err;
  436. if (!ret)
  437. goto rnr_nak;
  438. break;
  439. case IB_WR_RDMA_WRITE_WITH_IMM:
  440. if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
  441. goto inv_err;
  442. wc.wc_flags = IB_WC_WITH_IMM;
  443. wc.ex.imm_data = wqe->wr.ex.imm_data;
  444. ret = hfi1_rvt_get_rwqe(qp, 1);
  445. if (ret < 0)
  446. goto op_err;
  447. if (!ret)
  448. goto rnr_nak;
  449. /* skip copy_last set and qp_access_flags recheck */
  450. goto do_write;
  451. case IB_WR_RDMA_WRITE:
  452. copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
  453. if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
  454. goto inv_err;
  455. do_write:
  456. if (wqe->length == 0)
  457. break;
  458. if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
  459. wqe->rdma_wr.remote_addr,
  460. wqe->rdma_wr.rkey,
  461. IB_ACCESS_REMOTE_WRITE)))
  462. goto acc_err;
  463. qp->r_sge.sg_list = NULL;
  464. qp->r_sge.num_sge = 1;
  465. qp->r_sge.total_len = wqe->length;
  466. break;
  467. case IB_WR_RDMA_READ:
  468. if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
  469. goto inv_err;
  470. if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
  471. wqe->rdma_wr.remote_addr,
  472. wqe->rdma_wr.rkey,
  473. IB_ACCESS_REMOTE_READ)))
  474. goto acc_err;
  475. release = 0;
  476. sqp->s_sge.sg_list = NULL;
  477. sqp->s_sge.num_sge = 1;
  478. qp->r_sge.sge = wqe->sg_list[0];
  479. qp->r_sge.sg_list = wqe->sg_list + 1;
  480. qp->r_sge.num_sge = wqe->wr.num_sge;
  481. qp->r_sge.total_len = wqe->length;
  482. break;
  483. case IB_WR_ATOMIC_CMP_AND_SWP:
  484. case IB_WR_ATOMIC_FETCH_AND_ADD:
  485. if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
  486. goto inv_err;
  487. if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
  488. wqe->atomic_wr.remote_addr,
  489. wqe->atomic_wr.rkey,
  490. IB_ACCESS_REMOTE_ATOMIC)))
  491. goto acc_err;
  492. /* Perform atomic OP and save result. */
  493. maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
  494. sdata = wqe->atomic_wr.compare_add;
  495. *(u64 *)sqp->s_sge.sge.vaddr =
  496. (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
  497. (u64)atomic64_add_return(sdata, maddr) - sdata :
  498. (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
  499. sdata, wqe->atomic_wr.swap);
  500. rvt_put_mr(qp->r_sge.sge.mr);
  501. qp->r_sge.num_sge = 0;
  502. goto send_comp;
  503. default:
  504. send_status = IB_WC_LOC_QP_OP_ERR;
  505. goto serr;
  506. }
  507. sge = &sqp->s_sge.sge;
  508. while (sqp->s_len) {
  509. u32 len = sqp->s_len;
  510. if (len > sge->length)
  511. len = sge->length;
  512. if (len > sge->sge_length)
  513. len = sge->sge_length;
  514. WARN_ON_ONCE(len == 0);
  515. hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
  516. sge->vaddr += len;
  517. sge->length -= len;
  518. sge->sge_length -= len;
  519. if (sge->sge_length == 0) {
  520. if (!release)
  521. rvt_put_mr(sge->mr);
  522. if (--sqp->s_sge.num_sge)
  523. *sge = *sqp->s_sge.sg_list++;
  524. } else if (sge->length == 0 && sge->mr->lkey) {
  525. if (++sge->n >= RVT_SEGSZ) {
  526. if (++sge->m >= sge->mr->mapsz)
  527. break;
  528. sge->n = 0;
  529. }
  530. sge->vaddr =
  531. sge->mr->map[sge->m]->segs[sge->n].vaddr;
  532. sge->length =
  533. sge->mr->map[sge->m]->segs[sge->n].length;
  534. }
  535. sqp->s_len -= len;
  536. }
  537. if (release)
  538. rvt_put_ss(&qp->r_sge);
  539. if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
  540. goto send_comp;
  541. if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
  542. wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
  543. else
  544. wc.opcode = IB_WC_RECV;
  545. wc.wr_id = qp->r_wr_id;
  546. wc.status = IB_WC_SUCCESS;
  547. wc.byte_len = wqe->length;
  548. wc.qp = &qp->ibqp;
  549. wc.src_qp = qp->remote_qpn;
  550. wc.slid = qp->remote_ah_attr.dlid;
  551. wc.sl = qp->remote_ah_attr.sl;
  552. wc.port_num = 1;
  553. /* Signal completion event if the solicited bit is set. */
  554. rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
  555. wqe->wr.send_flags & IB_SEND_SOLICITED);
  556. send_comp:
  557. spin_lock_irqsave(&sqp->s_lock, flags);
  558. ibp->rvp.n_loop_pkts++;
  559. flush_send:
  560. sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
  561. hfi1_send_complete(sqp, wqe, send_status);
  562. if (local_ops) {
  563. atomic_dec(&sqp->local_ops_pending);
  564. local_ops = 0;
  565. }
  566. goto again;
  567. rnr_nak:
  568. /* Handle RNR NAK */
  569. if (qp->ibqp.qp_type == IB_QPT_UC)
  570. goto send_comp;
  571. ibp->rvp.n_rnr_naks++;
  572. /*
  573. * Note: we don't need the s_lock held since the BUSY flag
  574. * makes this single threaded.
  575. */
  576. if (sqp->s_rnr_retry == 0) {
  577. send_status = IB_WC_RNR_RETRY_EXC_ERR;
  578. goto serr;
  579. }
  580. if (sqp->s_rnr_retry_cnt < 7)
  581. sqp->s_rnr_retry--;
  582. spin_lock_irqsave(&sqp->s_lock, flags);
  583. if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
  584. goto clr_busy;
  585. to = ib_hfi1_rnr_table[qp->r_min_rnr_timer];
  586. hfi1_add_rnr_timer(sqp, to);
  587. goto clr_busy;
  588. op_err:
  589. send_status = IB_WC_REM_OP_ERR;
  590. wc.status = IB_WC_LOC_QP_OP_ERR;
  591. goto err;
  592. inv_err:
  593. send_status = IB_WC_REM_INV_REQ_ERR;
  594. wc.status = IB_WC_LOC_QP_OP_ERR;
  595. goto err;
  596. acc_err:
  597. send_status = IB_WC_REM_ACCESS_ERR;
  598. wc.status = IB_WC_LOC_PROT_ERR;
  599. err:
  600. /* responder goes to error state */
  601. hfi1_rc_error(qp, wc.status);
  602. serr:
  603. spin_lock_irqsave(&sqp->s_lock, flags);
  604. hfi1_send_complete(sqp, wqe, send_status);
  605. if (sqp->ibqp.qp_type == IB_QPT_RC) {
  606. int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
  607. sqp->s_flags &= ~RVT_S_BUSY;
  608. spin_unlock_irqrestore(&sqp->s_lock, flags);
  609. if (lastwqe) {
  610. struct ib_event ev;
  611. ev.device = sqp->ibqp.device;
  612. ev.element.qp = &sqp->ibqp;
  613. ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
  614. sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
  615. }
  616. goto done;
  617. }
  618. clr_busy:
  619. sqp->s_flags &= ~RVT_S_BUSY;
  620. unlock:
  621. spin_unlock_irqrestore(&sqp->s_lock, flags);
  622. done:
  623. rcu_read_unlock();
  624. }
  625. /**
  626. * hfi1_make_grh - construct a GRH header
  627. * @ibp: a pointer to the IB port
  628. * @hdr: a pointer to the GRH header being constructed
  629. * @grh: the global route address to send to
  630. * @hwords: the number of 32 bit words of header being sent
  631. * @nwords: the number of 32 bit words of data being sent
  632. *
  633. * Return the size of the header in 32 bit words.
  634. */
  635. u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
  636. struct ib_global_route *grh, u32 hwords, u32 nwords)
  637. {
  638. hdr->version_tclass_flow =
  639. cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
  640. (grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
  641. (grh->flow_label << IB_GRH_FLOW_SHIFT));
  642. hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
  643. /* next_hdr is defined by C8-7 in ch. 8.4.1 */
  644. hdr->next_hdr = IB_GRH_NEXT_HDR;
  645. hdr->hop_limit = grh->hop_limit;
  646. /* The SGID is 32-bit aligned. */
  647. hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
  648. hdr->sgid.global.interface_id =
  649. grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
  650. ibp->guids[grh->sgid_index - 1] :
  651. cpu_to_be64(ppd_from_ibp(ibp)->guid);
  652. hdr->dgid = grh->dgid;
  653. /* GRH header size in 32-bit words. */
  654. return sizeof(struct ib_grh) / sizeof(u32);
  655. }
  656. #define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4)
  657. /**
  658. * build_ahg - create ahg in s_ahg
  659. * @qp: a pointer to QP
  660. * @npsn: the next PSN for the request/response
  661. *
  662. * This routine handles the AHG by allocating an ahg entry and causing the
  663. * copy of the first middle.
  664. *
  665. * Subsequent middles use the copied entry, editing the
  666. * PSN with 1 or 2 edits.
  667. */
  668. static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
  669. {
  670. struct hfi1_qp_priv *priv = qp->priv;
  671. if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
  672. clear_ahg(qp);
  673. if (!(qp->s_flags & RVT_S_AHG_VALID)) {
  674. /* first middle that needs copy */
  675. if (qp->s_ahgidx < 0)
  676. qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
  677. if (qp->s_ahgidx >= 0) {
  678. qp->s_ahgpsn = npsn;
  679. priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
  680. /* save to protect a change in another thread */
  681. priv->s_ahg->ahgidx = qp->s_ahgidx;
  682. qp->s_flags |= RVT_S_AHG_VALID;
  683. }
  684. } else {
  685. /* subsequent middle after valid */
  686. if (qp->s_ahgidx >= 0) {
  687. priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG;
  688. priv->s_ahg->ahgidx = qp->s_ahgidx;
  689. priv->s_ahg->ahgcount++;
  690. priv->s_ahg->ahgdesc[0] =
  691. sdma_build_ahg_descriptor(
  692. (__force u16)cpu_to_be16((u16)npsn),
  693. BTH2_OFFSET,
  694. 16,
  695. 16);
  696. if ((npsn & 0xffff0000) !=
  697. (qp->s_ahgpsn & 0xffff0000)) {
  698. priv->s_ahg->ahgcount++;
  699. priv->s_ahg->ahgdesc[1] =
  700. sdma_build_ahg_descriptor(
  701. (__force u16)cpu_to_be16(
  702. (u16)(npsn >> 16)),
  703. BTH2_OFFSET,
  704. 0,
  705. 16);
  706. }
  707. }
  708. }
  709. }
  710. void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
  711. u32 bth0, u32 bth2, int middle,
  712. struct hfi1_pkt_state *ps)
  713. {
  714. struct hfi1_qp_priv *priv = qp->priv;
  715. struct hfi1_ibport *ibp = ps->ibp;
  716. u16 lrh0;
  717. u32 nwords;
  718. u32 extra_bytes;
  719. u32 bth1;
  720. /* Construct the header. */
  721. extra_bytes = -qp->s_cur_size & 3;
  722. nwords = (qp->s_cur_size + extra_bytes) >> 2;
  723. lrh0 = HFI1_LRH_BTH;
  724. if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
  725. qp->s_hdrwords += hfi1_make_grh(ibp,
  726. &ps->s_txreq->phdr.hdr.u.l.grh,
  727. &qp->remote_ah_attr.grh,
  728. qp->s_hdrwords, nwords);
  729. lrh0 = HFI1_LRH_GRH;
  730. middle = 0;
  731. }
  732. lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
  733. /*
  734. * reset s_ahg/AHG fields
  735. *
  736. * This insures that the ahgentry/ahgcount
  737. * are at a non-AHG default to protect
  738. * build_verbs_tx_desc() from using
  739. * an include ahgidx.
  740. *
  741. * build_ahg() will modify as appropriate
  742. * to use the AHG feature.
  743. */
  744. priv->s_ahg->tx_flags = 0;
  745. priv->s_ahg->ahgcount = 0;
  746. priv->s_ahg->ahgidx = 0;
  747. if (qp->s_mig_state == IB_MIG_MIGRATED)
  748. bth0 |= IB_BTH_MIG_REQ;
  749. else
  750. middle = 0;
  751. if (middle)
  752. build_ahg(qp, bth2);
  753. else
  754. qp->s_flags &= ~RVT_S_AHG_VALID;
  755. ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
  756. ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
  757. ps->s_txreq->phdr.hdr.lrh[2] =
  758. cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
  759. ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
  760. qp->remote_ah_attr.src_path_bits);
  761. bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
  762. bth0 |= extra_bytes << 20;
  763. ohdr->bth[0] = cpu_to_be32(bth0);
  764. bth1 = qp->remote_qpn;
  765. if (qp->s_flags & RVT_S_ECN) {
  766. qp->s_flags &= ~RVT_S_ECN;
  767. /* we recently received a FECN, so return a BECN */
  768. bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
  769. }
  770. ohdr->bth[1] = cpu_to_be32(bth1);
  771. ohdr->bth[2] = cpu_to_be32(bth2);
  772. }
  773. /* when sending, force a reschedule every one of these periods */
  774. #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
  775. void hfi1_do_send_from_rvt(struct rvt_qp *qp)
  776. {
  777. hfi1_do_send(qp, false);
  778. }
  779. void _hfi1_do_send(struct work_struct *work)
  780. {
  781. struct iowait *wait = container_of(work, struct iowait, iowork);
  782. struct rvt_qp *qp = iowait_to_qp(wait);
  783. hfi1_do_send(qp, true);
  784. }
  785. /**
  786. * hfi1_do_send - perform a send on a QP
  787. * @work: contains a pointer to the QP
  788. * @in_thread: true if in a workqueue thread
  789. *
  790. * Process entries in the send work queue until credit or queue is
  791. * exhausted. Only allow one CPU to send a packet per QP.
  792. * Otherwise, two threads could send packets out of order.
  793. */
  794. void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
  795. {
  796. struct hfi1_pkt_state ps;
  797. struct hfi1_qp_priv *priv = qp->priv;
  798. int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
  799. unsigned long timeout;
  800. unsigned long timeout_int;
  801. int cpu;
  802. ps.dev = to_idev(qp->ibqp.device);
  803. ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
  804. ps.ppd = ppd_from_ibp(ps.ibp);
  805. switch (qp->ibqp.qp_type) {
  806. case IB_QPT_RC:
  807. if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
  808. ) - 1)) ==
  809. ps.ppd->lid)) {
  810. ruc_loopback(qp);
  811. return;
  812. }
  813. make_req = hfi1_make_rc_req;
  814. timeout_int = (qp->timeout_jiffies);
  815. break;
  816. case IB_QPT_UC:
  817. if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
  818. ) - 1)) ==
  819. ps.ppd->lid)) {
  820. ruc_loopback(qp);
  821. return;
  822. }
  823. make_req = hfi1_make_uc_req;
  824. timeout_int = SEND_RESCHED_TIMEOUT;
  825. break;
  826. default:
  827. make_req = hfi1_make_ud_req;
  828. timeout_int = SEND_RESCHED_TIMEOUT;
  829. }
  830. spin_lock_irqsave(&qp->s_lock, ps.flags);
  831. /* Return if we are already busy processing a work request. */
  832. if (!hfi1_send_ok(qp)) {
  833. spin_unlock_irqrestore(&qp->s_lock, ps.flags);
  834. return;
  835. }
  836. qp->s_flags |= RVT_S_BUSY;
  837. timeout = jiffies + (timeout_int) / 8;
  838. cpu = priv->s_sde ? priv->s_sde->cpu :
  839. cpumask_first(cpumask_of_node(ps.ppd->dd->node));
  840. /* insure a pre-built packet is handled */
  841. ps.s_txreq = get_waiting_verbs_txreq(qp);
  842. do {
  843. /* Check for a constructed packet to be sent. */
  844. if (qp->s_hdrwords != 0) {
  845. spin_unlock_irqrestore(&qp->s_lock, ps.flags);
  846. /*
  847. * If the packet cannot be sent now, return and
  848. * the send engine will be woken up later.
  849. */
  850. if (hfi1_verbs_send(qp, &ps))
  851. return;
  852. /* Record that s_ahg is empty. */
  853. qp->s_hdrwords = 0;
  854. /* allow other tasks to run */
  855. if (unlikely(time_after(jiffies, timeout))) {
  856. if (!in_thread ||
  857. workqueue_congested(
  858. cpu,
  859. ps.ppd->hfi1_wq)) {
  860. spin_lock_irqsave(
  861. &qp->s_lock,
  862. ps.flags);
  863. qp->s_flags &= ~RVT_S_BUSY;
  864. hfi1_schedule_send(qp);
  865. spin_unlock_irqrestore(
  866. &qp->s_lock,
  867. ps.flags);
  868. this_cpu_inc(
  869. *ps.ppd->dd->send_schedule);
  870. return;
  871. }
  872. cond_resched();
  873. this_cpu_inc(
  874. *ps.ppd->dd->send_schedule);
  875. timeout = jiffies + (timeout_int) / 8;
  876. }
  877. spin_lock_irqsave(&qp->s_lock, ps.flags);
  878. }
  879. } while (make_req(qp, &ps));
  880. spin_unlock_irqrestore(&qp->s_lock, ps.flags);
  881. }
  882. /*
  883. * This should be called with s_lock held.
  884. */
  885. void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
  886. enum ib_wc_status status)
  887. {
  888. u32 old_last, last;
  889. unsigned i;
  890. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
  891. return;
  892. last = qp->s_last;
  893. old_last = last;
  894. if (++last >= qp->s_size)
  895. last = 0;
  896. qp->s_last = last;
  897. /* See post_send() */
  898. barrier();
  899. for (i = 0; i < wqe->wr.num_sge; i++) {
  900. struct rvt_sge *sge = &wqe->sg_list[i];
  901. rvt_put_mr(sge->mr);
  902. }
  903. if (qp->ibqp.qp_type == IB_QPT_UD ||
  904. qp->ibqp.qp_type == IB_QPT_SMI ||
  905. qp->ibqp.qp_type == IB_QPT_GSI)
  906. atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
  907. /* See ch. 11.2.4.1 and 10.7.3.1 */
  908. if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
  909. (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
  910. status != IB_WC_SUCCESS) {
  911. struct ib_wc wc;
  912. memset(&wc, 0, sizeof(wc));
  913. wc.wr_id = wqe->wr.wr_id;
  914. wc.status = status;
  915. wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
  916. wc.qp = &qp->ibqp;
  917. if (status == IB_WC_SUCCESS)
  918. wc.byte_len = wqe->length;
  919. rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
  920. status != IB_WC_SUCCESS);
  921. }
  922. if (qp->s_acked == old_last)
  923. qp->s_acked = last;
  924. if (qp->s_cur == old_last)
  925. qp->s_cur = last;
  926. if (qp->s_tail == old_last)
  927. qp->s_tail = last;
  928. if (qp->state == IB_QPS_SQD && last == qp->s_cur)
  929. qp->s_draining = 0;
  930. }