sljitNativeX86_64.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918
  1. /*
  2. * Stack-less Just-In-Time compiler
  3. *
  4. * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without modification, are
  7. * permitted provided that the following conditions are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright notice, this list of
  10. * conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright notice, this list
  13. * of conditions and the following disclaimer in the documentation and/or other materials
  14. * provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
  17. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  19. * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  21. * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  22. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  24. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /* x86 64-bit arch dependent functions. */
  27. static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
  28. {
  29. sljit_u8 *inst;
  30. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
  31. FAIL_IF(!inst);
  32. INC_SIZE(2 + sizeof(sljit_sw));
  33. *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
  34. *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
  35. sljit_unaligned_store_sw(inst, imm);
  36. return SLJIT_SUCCESS;
  37. }
  38. static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
  39. {
  40. sljit_s32 type = jump->flags >> TYPE_SHIFT;
  41. int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
  42. /* The relative jump below specialized for this case. */
  43. SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
  44. if (type < SLJIT_JUMP) {
  45. /* Invert type. */
  46. *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
  47. *code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
  48. }
  49. *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
  50. *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
  51. jump->addr = (sljit_uw)code_ptr;
  52. if (jump->flags & JUMP_LABEL)
  53. jump->flags |= PATCH_MD;
  54. else if (short_addr)
  55. sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
  56. else
  57. sljit_unaligned_store_sw(code_ptr, jump->u.target);
  58. code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
  59. *code_ptr++ = REX_B;
  60. *code_ptr++ = GROUP_FF;
  61. *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2];
  62. return code_ptr;
  63. }
  64. static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
  65. {
  66. if (max_label > HALFWORD_MAX) {
  67. put_label->addr -= put_label->flags;
  68. put_label->flags = PATCH_MD;
  69. return code_ptr;
  70. }
  71. if (put_label->flags == 0) {
  72. /* Destination is register. */
  73. code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
  74. SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
  75. SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
  76. if ((code_ptr[0] & 0x07) != 0) {
  77. code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x08);
  78. code_ptr += 2 + sizeof(sljit_s32);
  79. }
  80. else {
  81. code_ptr[0] = code_ptr[1];
  82. code_ptr += 1 + sizeof(sljit_s32);
  83. }
  84. put_label->addr = (sljit_uw)code_ptr;
  85. return code_ptr;
  86. }
  87. code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
  88. SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
  89. SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
  90. if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
  91. code_ptr += 2 + sizeof(sljit_uw);
  92. SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
  93. }
  94. SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
  95. code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x4);
  96. code_ptr[1] = MOV_rm_i32;
  97. code_ptr[2] = (sljit_u8)(code_ptr[2] & ~(0x7 << 3));
  98. code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
  99. put_label->addr = (sljit_uw)code_ptr;
  100. put_label->flags = 0;
  101. return code_ptr;
  102. }
  103. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
  104. sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  105. sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  106. {
  107. sljit_s32 args, i, tmp, size, saved_register_size;
  108. sljit_u8 *inst;
  109. CHECK_ERROR();
  110. CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  111. set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  112. /* Emit ENDBR64 at function entry if needed. */
  113. FAIL_IF(emit_endbranch(compiler));
  114. compiler->mode32 = 0;
  115. #ifdef _WIN64
  116. /* Two/four register slots for parameters plus space for xmm6 register if needed. */
  117. if (fscratches >= 6 || fsaveds >= 1)
  118. compiler->locals_offset = 6 * sizeof(sljit_sw);
  119. else
  120. compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
  121. #endif
  122. /* Including the return address saved by the call instruction. */
  123. saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  124. tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
  125. for (i = SLJIT_S0; i >= tmp; i--) {
  126. size = reg_map[i] >= 8 ? 2 : 1;
  127. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  128. FAIL_IF(!inst);
  129. INC_SIZE(size);
  130. if (reg_map[i] >= 8)
  131. *inst++ = REX_B;
  132. PUSH_REG(reg_lmap[i]);
  133. }
  134. for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
  135. size = reg_map[i] >= 8 ? 2 : 1;
  136. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  137. FAIL_IF(!inst);
  138. INC_SIZE(size);
  139. if (reg_map[i] >= 8)
  140. *inst++ = REX_B;
  141. PUSH_REG(reg_lmap[i]);
  142. }
  143. args = get_arg_count(arg_types);
  144. if (args > 0) {
  145. size = args * 3;
  146. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  147. FAIL_IF(!inst);
  148. INC_SIZE(size);
  149. #ifndef _WIN64
  150. if (args > 0) {
  151. inst[0] = REX_W;
  152. inst[1] = MOV_r_rm;
  153. inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
  154. inst += 3;
  155. }
  156. if (args > 1) {
  157. inst[0] = REX_W | REX_R;
  158. inst[1] = MOV_r_rm;
  159. inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
  160. inst += 3;
  161. }
  162. if (args > 2) {
  163. inst[0] = REX_W | REX_R;
  164. inst[1] = MOV_r_rm;
  165. inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
  166. }
  167. #else
  168. if (args > 0) {
  169. inst[0] = REX_W;
  170. inst[1] = MOV_r_rm;
  171. inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
  172. inst += 3;
  173. }
  174. if (args > 1) {
  175. inst[0] = REX_W;
  176. inst[1] = MOV_r_rm;
  177. inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
  178. inst += 3;
  179. }
  180. if (args > 2) {
  181. inst[0] = REX_W | REX_B;
  182. inst[1] = MOV_r_rm;
  183. inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
  184. }
  185. #endif
  186. }
  187. local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
  188. compiler->local_size = local_size;
  189. #ifdef _WIN64
  190. if (local_size > 0) {
  191. if (local_size <= 4 * 4096) {
  192. if (local_size > 4096)
  193. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
  194. if (local_size > 2 * 4096)
  195. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
  196. if (local_size > 3 * 4096)
  197. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
  198. }
  199. else {
  200. EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
  201. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
  202. SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
  203. EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
  204. FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  205. SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
  206. FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  207. TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
  208. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  209. FAIL_IF(!inst);
  210. INC_SIZE(2);
  211. inst[0] = JNE_i8;
  212. inst[1] = (sljit_s8) -19;
  213. }
  214. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
  215. }
  216. #endif
  217. if (local_size > 0) {
  218. FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  219. SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
  220. }
  221. #ifdef _WIN64
  222. /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
  223. if (fscratches >= 6 || fsaveds >= 1) {
  224. inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
  225. FAIL_IF(!inst);
  226. INC_SIZE(5);
  227. *inst++ = GROUP_0F;
  228. sljit_unaligned_store_s32(inst, 0x20247429);
  229. }
  230. #endif
  231. return SLJIT_SUCCESS;
  232. }
  233. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
  234. sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  235. sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  236. {
  237. sljit_s32 saved_register_size;
  238. CHECK_ERROR();
  239. CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  240. set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  241. #ifdef _WIN64
  242. /* Two/four register slots for parameters plus space for xmm6 register if needed. */
  243. if (fscratches >= 6 || fsaveds >= 1)
  244. compiler->locals_offset = 6 * sizeof(sljit_sw);
  245. else
  246. compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
  247. #endif
  248. /* Including the return address saved by the call instruction. */
  249. saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  250. compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
  251. return SLJIT_SUCCESS;
  252. }
  253. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
  254. {
  255. sljit_s32 i, tmp, size;
  256. sljit_u8 *inst;
  257. CHECK_ERROR();
  258. CHECK(check_sljit_emit_return(compiler, op, src, srcw));
  259. FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
  260. #ifdef _WIN64
  261. /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
  262. if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
  263. inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
  264. FAIL_IF(!inst);
  265. INC_SIZE(5);
  266. *inst++ = GROUP_0F;
  267. sljit_unaligned_store_s32(inst, 0x20247428);
  268. }
  269. #endif
  270. if (compiler->local_size > 0) {
  271. if (compiler->local_size <= 127) {
  272. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  273. FAIL_IF(!inst);
  274. INC_SIZE(4);
  275. *inst++ = REX_W;
  276. *inst++ = GROUP_BINARY_83;
  277. *inst++ = MOD_REG | ADD | 4;
  278. *inst = compiler->local_size;
  279. }
  280. else {
  281. inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
  282. FAIL_IF(!inst);
  283. INC_SIZE(7);
  284. *inst++ = REX_W;
  285. *inst++ = GROUP_BINARY_81;
  286. *inst++ = MOD_REG | ADD | 4;
  287. sljit_unaligned_store_s32(inst, compiler->local_size);
  288. }
  289. }
  290. tmp = compiler->scratches;
  291. for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
  292. size = reg_map[i] >= 8 ? 2 : 1;
  293. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  294. FAIL_IF(!inst);
  295. INC_SIZE(size);
  296. if (reg_map[i] >= 8)
  297. *inst++ = REX_B;
  298. POP_REG(reg_lmap[i]);
  299. }
  300. tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
  301. for (i = tmp; i <= SLJIT_S0; i++) {
  302. size = reg_map[i] >= 8 ? 2 : 1;
  303. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  304. FAIL_IF(!inst);
  305. INC_SIZE(size);
  306. if (reg_map[i] >= 8)
  307. *inst++ = REX_B;
  308. POP_REG(reg_lmap[i]);
  309. }
  310. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  311. FAIL_IF(!inst);
  312. INC_SIZE(1);
  313. RET();
  314. return SLJIT_SUCCESS;
  315. }
  316. /* --------------------------------------------------------------------- */
  317. /* Operators */
  318. /* --------------------------------------------------------------------- */
  319. static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
  320. {
  321. sljit_u8 *inst;
  322. sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32);
  323. inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
  324. FAIL_IF(!inst);
  325. INC_SIZE(length);
  326. if (rex)
  327. *inst++ = rex;
  328. *inst++ = opcode;
  329. sljit_unaligned_store_s32(inst, imm);
  330. return SLJIT_SUCCESS;
  331. }
  332. static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
  333. /* The register or immediate operand. */
  334. sljit_s32 a, sljit_sw imma,
  335. /* The general operand (not immediate). */
  336. sljit_s32 b, sljit_sw immb)
  337. {
  338. sljit_u8 *inst;
  339. sljit_u8 *buf_ptr;
  340. sljit_u8 rex = 0;
  341. sljit_s32 flags = size & ~0xf;
  342. sljit_s32 inst_size;
  343. /* The immediate operand must be 32 bit. */
  344. SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
  345. /* Both cannot be switched on. */
  346. SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
  347. /* Size flags not allowed for typed instructions. */
  348. SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
  349. /* Both size flags cannot be switched on. */
  350. SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
  351. /* SSE2 and immediate is not possible. */
  352. SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
  353. SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
  354. && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
  355. && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
  356. size &= 0xf;
  357. inst_size = size;
  358. if (!compiler->mode32 && !(flags & EX86_NO_REXW))
  359. rex |= REX_W;
  360. else if (flags & EX86_REX)
  361. rex |= REX;
  362. if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
  363. inst_size++;
  364. if (flags & EX86_PREF_66)
  365. inst_size++;
  366. /* Calculate size of b. */
  367. inst_size += 1; /* mod r/m byte. */
  368. if (b & SLJIT_MEM) {
  369. if (!(b & OFFS_REG_MASK)) {
  370. if (NOT_HALFWORD(immb)) {
  371. PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
  372. immb = 0;
  373. if (b & REG_MASK)
  374. b |= TO_OFFS_REG(TMP_REG2);
  375. else
  376. b |= TMP_REG2;
  377. }
  378. else if (reg_lmap[b & REG_MASK] == 4)
  379. b |= TO_OFFS_REG(SLJIT_SP);
  380. }
  381. if ((b & REG_MASK) == SLJIT_UNUSED)
  382. inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
  383. else {
  384. if (reg_map[b & REG_MASK] >= 8)
  385. rex |= REX_B;
  386. if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
  387. /* Immediate operand. */
  388. if (immb <= 127 && immb >= -128)
  389. inst_size += sizeof(sljit_s8);
  390. else
  391. inst_size += sizeof(sljit_s32);
  392. }
  393. else if (reg_lmap[b & REG_MASK] == 5)
  394. inst_size += sizeof(sljit_s8);
  395. if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
  396. inst_size += 1; /* SIB byte. */
  397. if (reg_map[OFFS_REG(b)] >= 8)
  398. rex |= REX_X;
  399. }
  400. }
  401. }
  402. else if (!(flags & EX86_SSE2_OP2)) {
  403. if (reg_map[b] >= 8)
  404. rex |= REX_B;
  405. }
  406. else if (freg_map[b] >= 8)
  407. rex |= REX_B;
  408. if (a & SLJIT_IMM) {
  409. if (flags & EX86_BIN_INS) {
  410. if (imma <= 127 && imma >= -128) {
  411. inst_size += 1;
  412. flags |= EX86_BYTE_ARG;
  413. } else
  414. inst_size += 4;
  415. }
  416. else if (flags & EX86_SHIFT_INS) {
  417. imma &= compiler->mode32 ? 0x1f : 0x3f;
  418. if (imma != 1) {
  419. inst_size ++;
  420. flags |= EX86_BYTE_ARG;
  421. }
  422. } else if (flags & EX86_BYTE_ARG)
  423. inst_size++;
  424. else if (flags & EX86_HALF_ARG)
  425. inst_size += sizeof(short);
  426. else
  427. inst_size += sizeof(sljit_s32);
  428. }
  429. else {
  430. SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
  431. /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
  432. if (!(flags & EX86_SSE2_OP1)) {
  433. if (reg_map[a] >= 8)
  434. rex |= REX_R;
  435. }
  436. else if (freg_map[a] >= 8)
  437. rex |= REX_R;
  438. }
  439. if (rex)
  440. inst_size++;
  441. inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
  442. PTR_FAIL_IF(!inst);
  443. /* Encoding the byte. */
  444. INC_SIZE(inst_size);
  445. if (flags & EX86_PREF_F2)
  446. *inst++ = 0xf2;
  447. if (flags & EX86_PREF_F3)
  448. *inst++ = 0xf3;
  449. if (flags & EX86_PREF_66)
  450. *inst++ = 0x66;
  451. if (rex)
  452. *inst++ = rex;
  453. buf_ptr = inst + size;
  454. /* Encode mod/rm byte. */
  455. if (!(flags & EX86_SHIFT_INS)) {
  456. if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
  457. *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
  458. if (a & SLJIT_IMM)
  459. *buf_ptr = 0;
  460. else if (!(flags & EX86_SSE2_OP1))
  461. *buf_ptr = reg_lmap[a] << 3;
  462. else
  463. *buf_ptr = freg_lmap[a] << 3;
  464. }
  465. else {
  466. if (a & SLJIT_IMM) {
  467. if (imma == 1)
  468. *inst = GROUP_SHIFT_1;
  469. else
  470. *inst = GROUP_SHIFT_N;
  471. } else
  472. *inst = GROUP_SHIFT_CL;
  473. *buf_ptr = 0;
  474. }
  475. if (!(b & SLJIT_MEM))
  476. *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : freg_lmap[b]);
  477. else if ((b & REG_MASK) != SLJIT_UNUSED) {
  478. if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
  479. if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
  480. if (immb <= 127 && immb >= -128)
  481. *buf_ptr |= 0x40;
  482. else
  483. *buf_ptr |= 0x80;
  484. }
  485. if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
  486. *buf_ptr++ |= reg_lmap[b & REG_MASK];
  487. else {
  488. *buf_ptr++ |= 0x04;
  489. *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
  490. }
  491. if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
  492. if (immb <= 127 && immb >= -128)
  493. *buf_ptr++ = immb; /* 8 bit displacement. */
  494. else {
  495. sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
  496. buf_ptr += sizeof(sljit_s32);
  497. }
  498. }
  499. }
  500. else {
  501. if (reg_lmap[b & REG_MASK] == 5)
  502. *buf_ptr |= 0x40;
  503. *buf_ptr++ |= 0x04;
  504. *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
  505. if (reg_lmap[b & REG_MASK] == 5)
  506. *buf_ptr++ = 0;
  507. }
  508. }
  509. else {
  510. *buf_ptr++ |= 0x04;
  511. *buf_ptr++ = 0x25;
  512. sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
  513. buf_ptr += sizeof(sljit_s32);
  514. }
  515. if (a & SLJIT_IMM) {
  516. if (flags & EX86_BYTE_ARG)
  517. *buf_ptr = imma;
  518. else if (flags & EX86_HALF_ARG)
  519. sljit_unaligned_store_s16(buf_ptr, imma);
  520. else if (!(flags & EX86_SHIFT_INS))
  521. sljit_unaligned_store_s32(buf_ptr, imma);
  522. }
  523. return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
  524. }
  525. /* --------------------------------------------------------------------- */
  526. /* Call / return instructions */
  527. /* --------------------------------------------------------------------- */
  528. #ifndef _WIN64
  529. static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw)
  530. {
  531. sljit_s32 src = src_ptr ? (*src_ptr) : 0;
  532. sljit_s32 word_arg_count = 0;
  533. SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
  534. compiler->mode32 = 0;
  535. /* Remove return value. */
  536. arg_types >>= SLJIT_DEF_SHIFT;
  537. while (arg_types) {
  538. if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32)
  539. word_arg_count++;
  540. arg_types >>= SLJIT_DEF_SHIFT;
  541. }
  542. if (word_arg_count == 0)
  543. return SLJIT_SUCCESS;
  544. if (src & SLJIT_MEM) {
  545. ADJUST_LOCAL_OFFSET(src, srcw);
  546. EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
  547. *src_ptr = TMP_REG2;
  548. }
  549. else if (src == SLJIT_R2 && word_arg_count >= SLJIT_R2)
  550. *src_ptr = TMP_REG1;
  551. if (word_arg_count >= 3)
  552. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
  553. return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
  554. }
  555. #else
  556. static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw)
  557. {
  558. sljit_s32 src = src_ptr ? (*src_ptr) : 0;
  559. sljit_s32 arg_count = 0;
  560. sljit_s32 word_arg_count = 0;
  561. sljit_s32 float_arg_count = 0;
  562. sljit_s32 types = 0;
  563. sljit_s32 data_trandfer = 0;
  564. static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
  565. SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
  566. compiler->mode32 = 0;
  567. arg_types >>= SLJIT_DEF_SHIFT;
  568. while (arg_types) {
  569. types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
  570. switch (arg_types & SLJIT_DEF_MASK) {
  571. case SLJIT_ARG_TYPE_F32:
  572. case SLJIT_ARG_TYPE_F64:
  573. arg_count++;
  574. float_arg_count++;
  575. if (arg_count != float_arg_count)
  576. data_trandfer = 1;
  577. break;
  578. default:
  579. arg_count++;
  580. word_arg_count++;
  581. if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
  582. data_trandfer = 1;
  583. if (src == word_arg_regs[arg_count]) {
  584. EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
  585. *src_ptr = TMP_REG2;
  586. }
  587. }
  588. break;
  589. }
  590. arg_types >>= SLJIT_DEF_SHIFT;
  591. }
  592. if (!data_trandfer)
  593. return SLJIT_SUCCESS;
  594. if (src & SLJIT_MEM) {
  595. ADJUST_LOCAL_OFFSET(src, srcw);
  596. EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
  597. *src_ptr = TMP_REG2;
  598. }
  599. while (types) {
  600. switch (types & SLJIT_DEF_MASK) {
  601. case SLJIT_ARG_TYPE_F32:
  602. if (arg_count != float_arg_count)
  603. FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
  604. arg_count--;
  605. float_arg_count--;
  606. break;
  607. case SLJIT_ARG_TYPE_F64:
  608. if (arg_count != float_arg_count)
  609. FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
  610. arg_count--;
  611. float_arg_count--;
  612. break;
  613. default:
  614. if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
  615. EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
  616. arg_count--;
  617. word_arg_count--;
  618. break;
  619. }
  620. types >>= SLJIT_DEF_SHIFT;
  621. }
  622. return SLJIT_SUCCESS;
  623. }
  624. #endif
  625. SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
  626. sljit_s32 arg_types)
  627. {
  628. CHECK_ERROR_PTR();
  629. CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
  630. PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL, 0));
  631. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  632. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  633. compiler->skip_checks = 1;
  634. #endif
  635. return sljit_emit_jump(compiler, type);
  636. }
  637. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
  638. sljit_s32 arg_types,
  639. sljit_s32 src, sljit_sw srcw)
  640. {
  641. CHECK_ERROR();
  642. CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
  643. FAIL_IF(call_with_args(compiler, arg_types, &src, srcw));
  644. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  645. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  646. compiler->skip_checks = 1;
  647. #endif
  648. return sljit_emit_ijump(compiler, type, src, srcw);
  649. }
  650. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
  651. {
  652. sljit_u8 *inst;
  653. CHECK_ERROR();
  654. CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
  655. ADJUST_LOCAL_OFFSET(dst, dstw);
  656. /* For UNUSED dst. Uncommon, but possible. */
  657. if (dst == SLJIT_UNUSED)
  658. dst = TMP_REG1;
  659. if (FAST_IS_REG(dst)) {
  660. if (reg_map[dst] < 8) {
  661. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  662. FAIL_IF(!inst);
  663. INC_SIZE(1);
  664. POP_REG(reg_lmap[dst]);
  665. return SLJIT_SUCCESS;
  666. }
  667. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  668. FAIL_IF(!inst);
  669. INC_SIZE(2);
  670. *inst++ = REX_B;
  671. POP_REG(reg_lmap[dst]);
  672. return SLJIT_SUCCESS;
  673. }
  674. /* REX_W is not necessary (src is not immediate). */
  675. compiler->mode32 = 1;
  676. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  677. FAIL_IF(!inst);
  678. *inst++ = POP_rm;
  679. return SLJIT_SUCCESS;
  680. }
  681. static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
  682. {
  683. sljit_u8 *inst;
  684. if (FAST_IS_REG(src)) {
  685. if (reg_map[src] < 8) {
  686. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
  687. FAIL_IF(!inst);
  688. INC_SIZE(1 + 1);
  689. PUSH_REG(reg_lmap[src]);
  690. }
  691. else {
  692. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
  693. FAIL_IF(!inst);
  694. INC_SIZE(2 + 1);
  695. *inst++ = REX_B;
  696. PUSH_REG(reg_lmap[src]);
  697. }
  698. }
  699. else {
  700. /* REX_W is not necessary (src is not immediate). */
  701. compiler->mode32 = 1;
  702. inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
  703. FAIL_IF(!inst);
  704. *inst++ = GROUP_FF;
  705. *inst |= PUSH_rm;
  706. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  707. FAIL_IF(!inst);
  708. INC_SIZE(1);
  709. }
  710. RET();
  711. return SLJIT_SUCCESS;
  712. }
  713. /* --------------------------------------------------------------------- */
  714. /* Extend input */
  715. /* --------------------------------------------------------------------- */
  716. static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
  717. sljit_s32 dst, sljit_sw dstw,
  718. sljit_s32 src, sljit_sw srcw)
  719. {
  720. sljit_u8* inst;
  721. sljit_s32 dst_r;
  722. compiler->mode32 = 0;
  723. if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  724. return SLJIT_SUCCESS; /* Empty instruction. */
  725. if (src & SLJIT_IMM) {
  726. if (FAST_IS_REG(dst)) {
  727. if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
  728. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
  729. FAIL_IF(!inst);
  730. *inst = MOV_rm_i32;
  731. return SLJIT_SUCCESS;
  732. }
  733. return emit_load_imm64(compiler, dst, srcw);
  734. }
  735. compiler->mode32 = 1;
  736. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
  737. FAIL_IF(!inst);
  738. *inst = MOV_rm_i32;
  739. compiler->mode32 = 0;
  740. return SLJIT_SUCCESS;
  741. }
  742. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  743. if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
  744. dst_r = src;
  745. else {
  746. if (sign) {
  747. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
  748. FAIL_IF(!inst);
  749. *inst++ = MOVSXD_r_rm;
  750. } else {
  751. compiler->mode32 = 1;
  752. FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
  753. compiler->mode32 = 0;
  754. }
  755. }
  756. if (dst & SLJIT_MEM) {
  757. compiler->mode32 = 1;
  758. inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  759. FAIL_IF(!inst);
  760. *inst = MOV_rm_r;
  761. compiler->mode32 = 0;
  762. }
  763. return SLJIT_SUCCESS;
  764. }
  765. static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
  766. {
  767. sljit_s32 tmp, size;
  768. /* Don't adjust shadow stack if it isn't enabled. */
  769. if (!cpu_has_shadow_stack ())
  770. return SLJIT_SUCCESS;
  771. size = compiler->local_size;
  772. tmp = compiler->scratches;
  773. if (tmp >= SLJIT_FIRST_SAVED_REG)
  774. size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * sizeof(sljit_uw);
  775. tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
  776. if (SLJIT_S0 >= tmp)
  777. size += (SLJIT_S0 - tmp + 1) * sizeof(sljit_uw);
  778. return adjust_shadow_stack(compiler, SLJIT_UNUSED, 0, SLJIT_SP, size);
  779. }