wordcopy.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. /* _memcopy.c -- subroutines for memory copy functions.
  2. Copyright (C) 1991-2019 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. Contributed by Torbjorn Granlund (tege@sics.se).
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, see
  15. <http://www.gnu.org/licenses/>. */
  16. /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
  17. #include <stddef.h>
  18. #include <memcopy.h>
  19. /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
  20. block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
  21. Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
  22. #ifndef WORDCOPY_FWD_ALIGNED
  23. # define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned
  24. #endif
  25. void
  26. WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len)
  27. {
  28. op_t a0, a1;
  29. switch (len % 8)
  30. {
  31. case 2:
  32. a0 = ((op_t *) srcp)[0];
  33. srcp -= 6 * OPSIZ;
  34. dstp -= 7 * OPSIZ;
  35. len += 6;
  36. goto do1;
  37. case 3:
  38. a1 = ((op_t *) srcp)[0];
  39. srcp -= 5 * OPSIZ;
  40. dstp -= 6 * OPSIZ;
  41. len += 5;
  42. goto do2;
  43. case 4:
  44. a0 = ((op_t *) srcp)[0];
  45. srcp -= 4 * OPSIZ;
  46. dstp -= 5 * OPSIZ;
  47. len += 4;
  48. goto do3;
  49. case 5:
  50. a1 = ((op_t *) srcp)[0];
  51. srcp -= 3 * OPSIZ;
  52. dstp -= 4 * OPSIZ;
  53. len += 3;
  54. goto do4;
  55. case 6:
  56. a0 = ((op_t *) srcp)[0];
  57. srcp -= 2 * OPSIZ;
  58. dstp -= 3 * OPSIZ;
  59. len += 2;
  60. goto do5;
  61. case 7:
  62. a1 = ((op_t *) srcp)[0];
  63. srcp -= 1 * OPSIZ;
  64. dstp -= 2 * OPSIZ;
  65. len += 1;
  66. goto do6;
  67. case 0:
  68. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  69. return;
  70. a0 = ((op_t *) srcp)[0];
  71. srcp -= 0 * OPSIZ;
  72. dstp -= 1 * OPSIZ;
  73. goto do7;
  74. case 1:
  75. a1 = ((op_t *) srcp)[0];
  76. srcp -=-1 * OPSIZ;
  77. dstp -= 0 * OPSIZ;
  78. len -= 1;
  79. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  80. goto do0;
  81. goto do8; /* No-op. */
  82. }
  83. do
  84. {
  85. do8:
  86. a0 = ((op_t *) srcp)[0];
  87. ((op_t *) dstp)[0] = a1;
  88. do7:
  89. a1 = ((op_t *) srcp)[1];
  90. ((op_t *) dstp)[1] = a0;
  91. do6:
  92. a0 = ((op_t *) srcp)[2];
  93. ((op_t *) dstp)[2] = a1;
  94. do5:
  95. a1 = ((op_t *) srcp)[3];
  96. ((op_t *) dstp)[3] = a0;
  97. do4:
  98. a0 = ((op_t *) srcp)[4];
  99. ((op_t *) dstp)[4] = a1;
  100. do3:
  101. a1 = ((op_t *) srcp)[5];
  102. ((op_t *) dstp)[5] = a0;
  103. do2:
  104. a0 = ((op_t *) srcp)[6];
  105. ((op_t *) dstp)[6] = a1;
  106. do1:
  107. a1 = ((op_t *) srcp)[7];
  108. ((op_t *) dstp)[7] = a0;
  109. srcp += 8 * OPSIZ;
  110. dstp += 8 * OPSIZ;
  111. len -= 8;
  112. }
  113. while (len != 0);
  114. /* This is the right position for do0. Please don't move
  115. it into the loop. */
  116. do0:
  117. ((op_t *) dstp)[0] = a1;
  118. }
  119. /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
  120. block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
  121. DSTP should be aligned for memory operations on `op_t's, but SRCP must
  122. *not* be aligned. */
  123. #ifndef WORDCOPY_FWD_DEST_ALIGNED
  124. # define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned
  125. #endif
  126. void
  127. WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len)
  128. {
  129. op_t a0, a1, a2, a3;
  130. int sh_1, sh_2;
  131. /* Calculate how to shift a word read at the memory operation
  132. aligned srcp to make it aligned for copy. */
  133. sh_1 = 8 * (srcp % OPSIZ);
  134. sh_2 = 8 * OPSIZ - sh_1;
  135. /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
  136. it points in the middle of. */
  137. srcp &= -OPSIZ;
  138. switch (len % 4)
  139. {
  140. case 2:
  141. a1 = ((op_t *) srcp)[0];
  142. a2 = ((op_t *) srcp)[1];
  143. srcp -= 1 * OPSIZ;
  144. dstp -= 3 * OPSIZ;
  145. len += 2;
  146. goto do1;
  147. case 3:
  148. a0 = ((op_t *) srcp)[0];
  149. a1 = ((op_t *) srcp)[1];
  150. srcp -= 0 * OPSIZ;
  151. dstp -= 2 * OPSIZ;
  152. len += 1;
  153. goto do2;
  154. case 0:
  155. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  156. return;
  157. a3 = ((op_t *) srcp)[0];
  158. a0 = ((op_t *) srcp)[1];
  159. srcp -=-1 * OPSIZ;
  160. dstp -= 1 * OPSIZ;
  161. len += 0;
  162. goto do3;
  163. case 1:
  164. a2 = ((op_t *) srcp)[0];
  165. a3 = ((op_t *) srcp)[1];
  166. srcp -=-2 * OPSIZ;
  167. dstp -= 0 * OPSIZ;
  168. len -= 1;
  169. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  170. goto do0;
  171. goto do4; /* No-op. */
  172. }
  173. do
  174. {
  175. do4:
  176. a0 = ((op_t *) srcp)[0];
  177. ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
  178. do3:
  179. a1 = ((op_t *) srcp)[1];
  180. ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
  181. do2:
  182. a2 = ((op_t *) srcp)[2];
  183. ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
  184. do1:
  185. a3 = ((op_t *) srcp)[3];
  186. ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
  187. srcp += 4 * OPSIZ;
  188. dstp += 4 * OPSIZ;
  189. len -= 4;
  190. }
  191. while (len != 0);
  192. /* This is the right position for do0. Please don't move
  193. it into the loop. */
  194. do0:
  195. ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
  196. }
  197. /* _wordcopy_bwd_aligned -- Copy block finishing right before
  198. SRCP to block finishing right before DSTP with LEN `op_t' words
  199. (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
  200. operations on `op_t's. */
  201. #ifndef WORDCOPY_BWD_ALIGNED
  202. # define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned
  203. #endif
  204. void
  205. WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len)
  206. {
  207. op_t a0, a1;
  208. switch (len % 8)
  209. {
  210. case 2:
  211. srcp -= 2 * OPSIZ;
  212. dstp -= 1 * OPSIZ;
  213. a0 = ((op_t *) srcp)[1];
  214. len += 6;
  215. goto do1;
  216. case 3:
  217. srcp -= 3 * OPSIZ;
  218. dstp -= 2 * OPSIZ;
  219. a1 = ((op_t *) srcp)[2];
  220. len += 5;
  221. goto do2;
  222. case 4:
  223. srcp -= 4 * OPSIZ;
  224. dstp -= 3 * OPSIZ;
  225. a0 = ((op_t *) srcp)[3];
  226. len += 4;
  227. goto do3;
  228. case 5:
  229. srcp -= 5 * OPSIZ;
  230. dstp -= 4 * OPSIZ;
  231. a1 = ((op_t *) srcp)[4];
  232. len += 3;
  233. goto do4;
  234. case 6:
  235. srcp -= 6 * OPSIZ;
  236. dstp -= 5 * OPSIZ;
  237. a0 = ((op_t *) srcp)[5];
  238. len += 2;
  239. goto do5;
  240. case 7:
  241. srcp -= 7 * OPSIZ;
  242. dstp -= 6 * OPSIZ;
  243. a1 = ((op_t *) srcp)[6];
  244. len += 1;
  245. goto do6;
  246. case 0:
  247. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  248. return;
  249. srcp -= 8 * OPSIZ;
  250. dstp -= 7 * OPSIZ;
  251. a0 = ((op_t *) srcp)[7];
  252. goto do7;
  253. case 1:
  254. srcp -= 9 * OPSIZ;
  255. dstp -= 8 * OPSIZ;
  256. a1 = ((op_t *) srcp)[8];
  257. len -= 1;
  258. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  259. goto do0;
  260. goto do8; /* No-op. */
  261. }
  262. do
  263. {
  264. do8:
  265. a0 = ((op_t *) srcp)[7];
  266. ((op_t *) dstp)[7] = a1;
  267. do7:
  268. a1 = ((op_t *) srcp)[6];
  269. ((op_t *) dstp)[6] = a0;
  270. do6:
  271. a0 = ((op_t *) srcp)[5];
  272. ((op_t *) dstp)[5] = a1;
  273. do5:
  274. a1 = ((op_t *) srcp)[4];
  275. ((op_t *) dstp)[4] = a0;
  276. do4:
  277. a0 = ((op_t *) srcp)[3];
  278. ((op_t *) dstp)[3] = a1;
  279. do3:
  280. a1 = ((op_t *) srcp)[2];
  281. ((op_t *) dstp)[2] = a0;
  282. do2:
  283. a0 = ((op_t *) srcp)[1];
  284. ((op_t *) dstp)[1] = a1;
  285. do1:
  286. a1 = ((op_t *) srcp)[0];
  287. ((op_t *) dstp)[0] = a0;
  288. srcp -= 8 * OPSIZ;
  289. dstp -= 8 * OPSIZ;
  290. len -= 8;
  291. }
  292. while (len != 0);
  293. /* This is the right position for do0. Please don't move
  294. it into the loop. */
  295. do0:
  296. ((op_t *) dstp)[7] = a1;
  297. }
  298. /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
  299. before SRCP to block finishing right before DSTP with LEN `op_t'
  300. words (not LEN bytes!). DSTP should be aligned for memory
  301. operations on `op_t', but SRCP must *not* be aligned. */
  302. #ifndef WORDCOPY_BWD_DEST_ALIGNED
  303. # define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned
  304. #endif
  305. void
  306. WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len)
  307. {
  308. op_t a0, a1, a2, a3;
  309. int sh_1, sh_2;
  310. /* Calculate how to shift a word read at the memory operation
  311. aligned srcp to make it aligned for copy. */
  312. sh_1 = 8 * (srcp % OPSIZ);
  313. sh_2 = 8 * OPSIZ - sh_1;
  314. /* Make srcp aligned by rounding it down to the beginning of the op_t
  315. it points in the middle of. */
  316. srcp &= -OPSIZ;
  317. srcp += OPSIZ;
  318. switch (len % 4)
  319. {
  320. case 2:
  321. srcp -= 3 * OPSIZ;
  322. dstp -= 1 * OPSIZ;
  323. a2 = ((op_t *) srcp)[2];
  324. a1 = ((op_t *) srcp)[1];
  325. len += 2;
  326. goto do1;
  327. case 3:
  328. srcp -= 4 * OPSIZ;
  329. dstp -= 2 * OPSIZ;
  330. a3 = ((op_t *) srcp)[3];
  331. a2 = ((op_t *) srcp)[2];
  332. len += 1;
  333. goto do2;
  334. case 0:
  335. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  336. return;
  337. srcp -= 5 * OPSIZ;
  338. dstp -= 3 * OPSIZ;
  339. a0 = ((op_t *) srcp)[4];
  340. a3 = ((op_t *) srcp)[3];
  341. goto do3;
  342. case 1:
  343. srcp -= 6 * OPSIZ;
  344. dstp -= 4 * OPSIZ;
  345. a1 = ((op_t *) srcp)[5];
  346. a0 = ((op_t *) srcp)[4];
  347. len -= 1;
  348. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  349. goto do0;
  350. goto do4; /* No-op. */
  351. }
  352. do
  353. {
  354. do4:
  355. a3 = ((op_t *) srcp)[3];
  356. ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
  357. do3:
  358. a2 = ((op_t *) srcp)[2];
  359. ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2);
  360. do2:
  361. a1 = ((op_t *) srcp)[1];
  362. ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2);
  363. do1:
  364. a0 = ((op_t *) srcp)[0];
  365. ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
  366. srcp -= 4 * OPSIZ;
  367. dstp -= 4 * OPSIZ;
  368. len -= 4;
  369. }
  370. while (len != 0);
  371. /* This is the right position for do0. Please don't move
  372. it into the loop. */
  373. do0:
  374. ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
  375. }