sljitNativeX86_common.c 80 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859
  1. /*
  2. * Stack-less Just-In-Time compiler
  3. *
  4. * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without modification, are
  7. * permitted provided that the following conditions are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright notice, this list of
  10. * conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright notice, this list
  13. * of conditions and the following disclaimer in the documentation and/or other materials
  14. * provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
  17. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  19. * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  21. * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  22. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  24. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
  27. {
  28. #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
  29. return "x86" SLJIT_CPUINFO " ABI:fastcall";
  30. #else
  31. return "x86" SLJIT_CPUINFO;
  32. #endif
  33. }
  34. /*
  35. 32b register indexes:
  36. 0 - EAX
  37. 1 - ECX
  38. 2 - EDX
  39. 3 - EBX
  40. 4 - ESP
  41. 5 - EBP
  42. 6 - ESI
  43. 7 - EDI
  44. */
  45. /*
  46. 64b register indexes:
  47. 0 - RAX
  48. 1 - RCX
  49. 2 - RDX
  50. 3 - RBX
  51. 4 - RSP
  52. 5 - RBP
  53. 6 - RSI
  54. 7 - RDI
  55. 8 - R8 - From now on REX prefix is required
  56. 9 - R9
  57. 10 - R10
  58. 11 - R11
  59. 12 - R12
  60. 13 - R13
  61. 14 - R14
  62. 15 - R15
  63. */
  64. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  65. /* Last register + 1. */
  66. #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
  67. static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
  68. 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
  69. };
  70. #define CHECK_EXTRA_REGS(p, w, do) \
  71. if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
  72. if (p <= compiler->scratches) \
  73. w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
  74. else \
  75. w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
  76. p = SLJIT_MEM1(SLJIT_SP); \
  77. do; \
  78. }
  79. #else /* SLJIT_CONFIG_X86_32 */
  80. /* Last register + 1. */
  81. #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
  82. #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
  83. /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
  84. Note: avoid to use r12 and r13 for memory addessing
  85. therefore r12 is better to be a higher saved register. */
  86. #ifndef _WIN64
  87. /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
  88. static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  89. 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
  90. };
  91. /* low-map. reg_map & 0x7. */
  92. static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  93. 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
  94. };
  95. #else
  96. /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
  97. static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  98. 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
  99. };
  100. /* low-map. reg_map & 0x7. */
  101. static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  102. 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
  103. };
  104. #endif
  105. /* Args: xmm0-xmm3 */
  106. static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
  107. 4, 0, 1, 2, 3, 5, 6
  108. };
  109. /* low-map. freg_map & 0x7. */
  110. static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
  111. 4, 0, 1, 2, 3, 5, 6
  112. };
  113. #define REX_W 0x48
  114. #define REX_R 0x44
  115. #define REX_X 0x42
  116. #define REX_B 0x41
  117. #define REX 0x40
  118. #ifndef _WIN64
  119. #define HALFWORD_MAX 0x7fffffffl
  120. #define HALFWORD_MIN -0x80000000l
  121. #else
  122. #define HALFWORD_MAX 0x7fffffffll
  123. #define HALFWORD_MIN -0x80000000ll
  124. #endif
  125. #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
  126. #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
  127. #define CHECK_EXTRA_REGS(p, w, do)
  128. #endif /* SLJIT_CONFIG_X86_32 */
  129. #define TMP_FREG (0)
  130. /* Size flags for emit_x86_instruction: */
  131. #define EX86_BIN_INS 0x0010
  132. #define EX86_SHIFT_INS 0x0020
  133. #define EX86_REX 0x0040
  134. #define EX86_NO_REXW 0x0080
  135. #define EX86_BYTE_ARG 0x0100
  136. #define EX86_HALF_ARG 0x0200
  137. #define EX86_PREF_66 0x0400
  138. #define EX86_PREF_F2 0x0800
  139. #define EX86_PREF_F3 0x1000
  140. #define EX86_SSE2_OP1 0x2000
  141. #define EX86_SSE2_OP2 0x4000
  142. #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
  143. /* --------------------------------------------------------------------- */
  144. /* Instrucion forms */
  145. /* --------------------------------------------------------------------- */
  146. #define ADD (/* BINARY */ 0 << 3)
  147. #define ADD_EAX_i32 0x05
  148. #define ADD_r_rm 0x03
  149. #define ADD_rm_r 0x01
  150. #define ADDSD_x_xm 0x58
  151. #define ADC (/* BINARY */ 2 << 3)
  152. #define ADC_EAX_i32 0x15
  153. #define ADC_r_rm 0x13
  154. #define ADC_rm_r 0x11
  155. #define AND (/* BINARY */ 4 << 3)
  156. #define AND_EAX_i32 0x25
  157. #define AND_r_rm 0x23
  158. #define AND_rm_r 0x21
  159. #define ANDPD_x_xm 0x54
  160. #define BSR_r_rm (/* GROUP_0F */ 0xbd)
  161. #define CALL_i32 0xe8
  162. #define CALL_rm (/* GROUP_FF */ 2 << 3)
  163. #define CDQ 0x99
  164. #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
  165. #define CMP (/* BINARY */ 7 << 3)
  166. #define CMP_EAX_i32 0x3d
  167. #define CMP_r_rm 0x3b
  168. #define CMP_rm_r 0x39
  169. #define CVTPD2PS_x_xm 0x5a
  170. #define CVTSI2SD_x_rm 0x2a
  171. #define CVTTSD2SI_r_xm 0x2c
  172. #define DIV (/* GROUP_F7 */ 6 << 3)
  173. #define DIVSD_x_xm 0x5e
  174. #define FSTPS 0xd9
  175. #define FSTPD 0xdd
  176. #define INT3 0xcc
  177. #define IDIV (/* GROUP_F7 */ 7 << 3)
  178. #define IMUL (/* GROUP_F7 */ 5 << 3)
  179. #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
  180. #define IMUL_r_rm_i8 0x6b
  181. #define IMUL_r_rm_i32 0x69
  182. #define JE_i8 0x74
  183. #define JNE_i8 0x75
  184. #define JMP_i8 0xeb
  185. #define JMP_i32 0xe9
  186. #define JMP_rm (/* GROUP_FF */ 4 << 3)
  187. #define LEA_r_m 0x8d
  188. #define MOV_r_rm 0x8b
  189. #define MOV_r_i32 0xb8
  190. #define MOV_rm_r 0x89
  191. #define MOV_rm_i32 0xc7
  192. #define MOV_rm8_i8 0xc6
  193. #define MOV_rm8_r8 0x88
  194. #define MOVSD_x_xm 0x10
  195. #define MOVSD_xm_x 0x11
  196. #define MOVSXD_r_rm 0x63
  197. #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
  198. #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
  199. #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
  200. #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
  201. #define MUL (/* GROUP_F7 */ 4 << 3)
  202. #define MULSD_x_xm 0x59
  203. #define NEG_rm (/* GROUP_F7 */ 3 << 3)
  204. #define NOP 0x90
  205. #define NOT_rm (/* GROUP_F7 */ 2 << 3)
  206. #define OR (/* BINARY */ 1 << 3)
  207. #define OR_r_rm 0x0b
  208. #define OR_EAX_i32 0x0d
  209. #define OR_rm_r 0x09
  210. #define OR_rm8_r8 0x08
  211. #define POP_r 0x58
  212. #define POP_rm 0x8f
  213. #define POPF 0x9d
  214. #define PREFETCH 0x18
  215. #define PUSH_i32 0x68
  216. #define PUSH_r 0x50
  217. #define PUSH_rm (/* GROUP_FF */ 6 << 3)
  218. #define PUSHF 0x9c
  219. #define RET_near 0xc3
  220. #define RET_i16 0xc2
  221. #define SBB (/* BINARY */ 3 << 3)
  222. #define SBB_EAX_i32 0x1d
  223. #define SBB_r_rm 0x1b
  224. #define SBB_rm_r 0x19
  225. #define SAR (/* SHIFT */ 7 << 3)
  226. #define SHL (/* SHIFT */ 4 << 3)
  227. #define SHR (/* SHIFT */ 5 << 3)
  228. #define SUB (/* BINARY */ 5 << 3)
  229. #define SUB_EAX_i32 0x2d
  230. #define SUB_r_rm 0x2b
  231. #define SUB_rm_r 0x29
  232. #define SUBSD_x_xm 0x5c
  233. #define TEST_EAX_i32 0xa9
  234. #define TEST_rm_r 0x85
  235. #define UCOMISD_x_xm 0x2e
  236. #define UNPCKLPD_x_xm 0x14
  237. #define XCHG_EAX_r 0x90
  238. #define XCHG_r_rm 0x87
  239. #define XOR (/* BINARY */ 6 << 3)
  240. #define XOR_EAX_i32 0x35
  241. #define XOR_r_rm 0x33
  242. #define XOR_rm_r 0x31
  243. #define XORPD_x_xm 0x57
  244. #define GROUP_0F 0x0f
  245. #define GROUP_F7 0xf7
  246. #define GROUP_FF 0xff
  247. #define GROUP_BINARY_81 0x81
  248. #define GROUP_BINARY_83 0x83
  249. #define GROUP_SHIFT_1 0xd1
  250. #define GROUP_SHIFT_N 0xc1
  251. #define GROUP_SHIFT_CL 0xd3
  252. #define MOD_REG 0xc0
  253. #define MOD_DISP8 0x40
  254. #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
  255. #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
  256. #define POP_REG(r) (*inst++ = (POP_r + (r)))
  257. #define RET() (*inst++ = (RET_near))
  258. #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
  259. /* r32, r/m32 */
  260. #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
  261. /* Multithreading does not affect these static variables, since they store
  262. built-in CPU features. Therefore they can be overwritten by different threads
  263. if they detect the CPU features in the same time. */
  264. #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  265. static sljit_s32 cpu_has_sse2 = -1;
  266. #endif
  267. static sljit_s32 cpu_has_cmov = -1;
  268. #ifdef _WIN32_WCE
  269. #include <cmnintrin.h>
  270. #elif defined(_MSC_VER) && _MSC_VER >= 1400
  271. #include <intrin.h>
  272. #endif
  273. /******************************************************/
  274. /* Unaligned-store functions */
  275. /******************************************************/
  276. static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
  277. {
  278. SLJIT_MEMCPY(addr, &value, sizeof(value));
  279. }
  280. static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
  281. {
  282. SLJIT_MEMCPY(addr, &value, sizeof(value));
  283. }
  284. static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
  285. {
  286. SLJIT_MEMCPY(addr, &value, sizeof(value));
  287. }
  288. /******************************************************/
  289. /* Utility functions */
  290. /******************************************************/
  291. static void get_cpu_features(void)
  292. {
  293. sljit_u32 features;
  294. #if defined(_MSC_VER) && _MSC_VER >= 1400
  295. int CPUInfo[4];
  296. __cpuid(CPUInfo, 1);
  297. features = (sljit_u32)CPUInfo[3];
  298. #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
  299. /* AT&T syntax. */
  300. __asm__ (
  301. "movl $0x1, %%eax\n"
  302. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  303. /* On x86-32, there is no red zone, so this
  304. should work (no need for a local variable). */
  305. "push %%ebx\n"
  306. #endif
  307. "cpuid\n"
  308. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  309. "pop %%ebx\n"
  310. #endif
  311. "movl %%edx, %0\n"
  312. : "=g" (features)
  313. :
  314. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  315. : "%eax", "%ecx", "%edx"
  316. #else
  317. : "%rax", "%rbx", "%rcx", "%rdx"
  318. #endif
  319. );
  320. #else /* _MSC_VER && _MSC_VER >= 1400 */
  321. /* Intel syntax. */
  322. __asm {
  323. mov eax, 1
  324. cpuid
  325. mov features, edx
  326. }
  327. #endif /* _MSC_VER && _MSC_VER >= 1400 */
  328. #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  329. cpu_has_sse2 = (features >> 26) & 0x1;
  330. #endif
  331. cpu_has_cmov = (features >> 15) & 0x1;
  332. }
  333. static sljit_u8 get_jump_code(sljit_s32 type)
  334. {
  335. switch (type) {
  336. case SLJIT_EQUAL:
  337. case SLJIT_EQUAL_F64:
  338. return 0x84 /* je */;
  339. case SLJIT_NOT_EQUAL:
  340. case SLJIT_NOT_EQUAL_F64:
  341. return 0x85 /* jne */;
  342. case SLJIT_LESS:
  343. case SLJIT_LESS_F64:
  344. return 0x82 /* jc */;
  345. case SLJIT_GREATER_EQUAL:
  346. case SLJIT_GREATER_EQUAL_F64:
  347. return 0x83 /* jae */;
  348. case SLJIT_GREATER:
  349. case SLJIT_GREATER_F64:
  350. return 0x87 /* jnbe */;
  351. case SLJIT_LESS_EQUAL:
  352. case SLJIT_LESS_EQUAL_F64:
  353. return 0x86 /* jbe */;
  354. case SLJIT_SIG_LESS:
  355. return 0x8c /* jl */;
  356. case SLJIT_SIG_GREATER_EQUAL:
  357. return 0x8d /* jnl */;
  358. case SLJIT_SIG_GREATER:
  359. return 0x8f /* jnle */;
  360. case SLJIT_SIG_LESS_EQUAL:
  361. return 0x8e /* jle */;
  362. case SLJIT_OVERFLOW:
  363. case SLJIT_MUL_OVERFLOW:
  364. return 0x80 /* jo */;
  365. case SLJIT_NOT_OVERFLOW:
  366. case SLJIT_MUL_NOT_OVERFLOW:
  367. return 0x81 /* jno */;
  368. case SLJIT_UNORDERED_F64:
  369. return 0x8a /* jp */;
  370. case SLJIT_ORDERED_F64:
  371. return 0x8b /* jpo */;
  372. }
  373. return 0;
  374. }
  375. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  376. static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
  377. #else
  378. static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
  379. #endif
  380. static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
  381. {
  382. sljit_s32 short_jump;
  383. sljit_uw label_addr;
  384. if (jump->flags & JUMP_LABEL)
  385. label_addr = (sljit_uw)(code + jump->u.label->size);
  386. else
  387. label_addr = jump->u.target - executable_offset;
  388. short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
  389. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  390. if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
  391. return generate_far_jump_code(jump, code_ptr, type);
  392. #endif
  393. if (type == SLJIT_JUMP) {
  394. if (short_jump)
  395. *code_ptr++ = JMP_i8;
  396. else
  397. *code_ptr++ = JMP_i32;
  398. jump->addr++;
  399. }
  400. else if (type >= SLJIT_FAST_CALL) {
  401. short_jump = 0;
  402. *code_ptr++ = CALL_i32;
  403. jump->addr++;
  404. }
  405. else if (short_jump) {
  406. *code_ptr++ = get_jump_code(type) - 0x10;
  407. jump->addr++;
  408. }
  409. else {
  410. *code_ptr++ = GROUP_0F;
  411. *code_ptr++ = get_jump_code(type);
  412. jump->addr += 2;
  413. }
  414. if (short_jump) {
  415. jump->flags |= PATCH_MB;
  416. code_ptr += sizeof(sljit_s8);
  417. } else {
  418. jump->flags |= PATCH_MW;
  419. code_ptr += sizeof(sljit_s32);
  420. }
  421. return code_ptr;
  422. }
  423. SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  424. {
  425. struct sljit_memory_fragment *buf;
  426. sljit_u8 *code;
  427. sljit_u8 *code_ptr;
  428. sljit_u8 *buf_ptr;
  429. sljit_u8 *buf_end;
  430. sljit_u8 len;
  431. sljit_sw executable_offset;
  432. sljit_sw jump_addr;
  433. struct sljit_label *label;
  434. struct sljit_jump *jump;
  435. struct sljit_const *const_;
  436. CHECK_ERROR_PTR();
  437. CHECK_PTR(check_sljit_generate_code(compiler));
  438. reverse_buf(compiler);
  439. /* Second code generation pass. */
  440. code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
  441. PTR_FAIL_WITH_EXEC_IF(code);
  442. buf = compiler->buf;
  443. code_ptr = code;
  444. label = compiler->labels;
  445. jump = compiler->jumps;
  446. const_ = compiler->consts;
  447. executable_offset = SLJIT_EXEC_OFFSET(code);
  448. do {
  449. buf_ptr = buf->memory;
  450. buf_end = buf_ptr + buf->used_size;
  451. do {
  452. len = *buf_ptr++;
  453. if (len > 0) {
  454. /* The code is already generated. */
  455. SLJIT_MEMCPY(code_ptr, buf_ptr, len);
  456. code_ptr += len;
  457. buf_ptr += len;
  458. }
  459. else {
  460. if (*buf_ptr >= 2) {
  461. jump->addr = (sljit_uw)code_ptr;
  462. if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
  463. code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
  464. else {
  465. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  466. code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
  467. #else
  468. code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
  469. #endif
  470. }
  471. jump = jump->next;
  472. }
  473. else if (*buf_ptr == 0) {
  474. label->addr = ((sljit_uw)code_ptr) + executable_offset;
  475. label->size = code_ptr - code;
  476. label = label->next;
  477. }
  478. else { /* *buf_ptr is 1 */
  479. const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
  480. const_ = const_->next;
  481. }
  482. buf_ptr++;
  483. }
  484. } while (buf_ptr < buf_end);
  485. SLJIT_ASSERT(buf_ptr == buf_end);
  486. buf = buf->next;
  487. } while (buf);
  488. SLJIT_ASSERT(!label);
  489. SLJIT_ASSERT(!jump);
  490. SLJIT_ASSERT(!const_);
  491. jump = compiler->jumps;
  492. while (jump) {
  493. jump_addr = jump->addr + executable_offset;
  494. if (jump->flags & PATCH_MB) {
  495. SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
  496. *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
  497. } else if (jump->flags & PATCH_MW) {
  498. if (jump->flags & JUMP_LABEL) {
  499. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  500. sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
  501. #else
  502. SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
  503. sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
  504. #endif
  505. }
  506. else {
  507. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  508. sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
  509. #else
  510. SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
  511. sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
  512. #endif
  513. }
  514. }
  515. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  516. else if (jump->flags & PATCH_MD)
  517. sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
  518. #endif
  519. jump = jump->next;
  520. }
  521. /* Some space may be wasted because of short jumps. */
  522. SLJIT_ASSERT(code_ptr <= code + compiler->size);
  523. compiler->error = SLJIT_ERR_COMPILED;
  524. compiler->executable_offset = executable_offset;
  525. compiler->executable_size = code_ptr - code;
  526. return (void*)(code + executable_offset);
  527. }
  528. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
  529. {
  530. switch (feature_type) {
  531. case SLJIT_HAS_FPU:
  532. #ifdef SLJIT_IS_FPU_AVAILABLE
  533. return SLJIT_IS_FPU_AVAILABLE;
  534. #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  535. if (cpu_has_sse2 == -1)
  536. get_cpu_features();
  537. return cpu_has_sse2;
  538. #else /* SLJIT_DETECT_SSE2 */
  539. return 1;
  540. #endif /* SLJIT_DETECT_SSE2 */
  541. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  542. case SLJIT_HAS_VIRTUAL_REGISTERS:
  543. return 1;
  544. #endif
  545. case SLJIT_HAS_CLZ:
  546. case SLJIT_HAS_CMOV:
  547. if (cpu_has_cmov == -1)
  548. get_cpu_features();
  549. return cpu_has_cmov;
  550. case SLJIT_HAS_SSE2:
  551. #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  552. if (cpu_has_sse2 == -1)
  553. get_cpu_features();
  554. return cpu_has_sse2;
  555. #else
  556. return 1;
  557. #endif
  558. default:
  559. return 0;
  560. }
  561. }
  562. /* --------------------------------------------------------------------- */
  563. /* Operators */
  564. /* --------------------------------------------------------------------- */
  565. #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
  566. static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
  567. sljit_u32 op_types,
  568. sljit_s32 dst, sljit_sw dstw,
  569. sljit_s32 src1, sljit_sw src1w,
  570. sljit_s32 src2, sljit_sw src2w);
  571. static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
  572. sljit_u32 op_types,
  573. sljit_s32 dst, sljit_sw dstw,
  574. sljit_s32 src1, sljit_sw src1w,
  575. sljit_s32 src2, sljit_sw src2w);
  576. static sljit_s32 emit_mov(struct sljit_compiler *compiler,
  577. sljit_s32 dst, sljit_sw dstw,
  578. sljit_s32 src, sljit_sw srcw);
  579. #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
  580. FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  581. static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
  582. sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
  583. static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
  584. sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
  585. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  586. #include "sljitNativeX86_32.c"
  587. #else
  588. #include "sljitNativeX86_64.c"
  589. #endif
  590. static sljit_s32 emit_mov(struct sljit_compiler *compiler,
  591. sljit_s32 dst, sljit_sw dstw,
  592. sljit_s32 src, sljit_sw srcw)
  593. {
  594. sljit_u8* inst;
  595. SLJIT_ASSERT(dst != SLJIT_UNUSED);
  596. if (FAST_IS_REG(src)) {
  597. inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
  598. FAIL_IF(!inst);
  599. *inst = MOV_rm_r;
  600. return SLJIT_SUCCESS;
  601. }
  602. if (src & SLJIT_IMM) {
  603. if (FAST_IS_REG(dst)) {
  604. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  605. return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  606. #else
  607. if (!compiler->mode32) {
  608. if (NOT_HALFWORD(srcw))
  609. return emit_load_imm64(compiler, dst, srcw);
  610. }
  611. else
  612. return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
  613. #endif
  614. }
  615. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  616. if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
  617. /* Immediate to memory move. Only SLJIT_MOV operation copies
  618. an immediate directly into memory so TMP_REG1 can be used. */
  619. FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
  620. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  621. FAIL_IF(!inst);
  622. *inst = MOV_rm_r;
  623. return SLJIT_SUCCESS;
  624. }
  625. #endif
  626. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
  627. FAIL_IF(!inst);
  628. *inst = MOV_rm_i32;
  629. return SLJIT_SUCCESS;
  630. }
  631. if (FAST_IS_REG(dst)) {
  632. inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
  633. FAIL_IF(!inst);
  634. *inst = MOV_r_rm;
  635. return SLJIT_SUCCESS;
  636. }
  637. /* Memory to memory move. Only SLJIT_MOV operation copies
  638. data from memory to memory so TMP_REG1 can be used. */
  639. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
  640. FAIL_IF(!inst);
  641. *inst = MOV_r_rm;
  642. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  643. FAIL_IF(!inst);
  644. *inst = MOV_rm_r;
  645. return SLJIT_SUCCESS;
  646. }
  647. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
  648. {
  649. sljit_u8 *inst;
  650. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  651. sljit_s32 size;
  652. #endif
  653. CHECK_ERROR();
  654. CHECK(check_sljit_emit_op0(compiler, op));
  655. switch (GET_OPCODE(op)) {
  656. case SLJIT_BREAKPOINT:
  657. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  658. FAIL_IF(!inst);
  659. INC_SIZE(1);
  660. *inst = INT3;
  661. break;
  662. case SLJIT_NOP:
  663. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  664. FAIL_IF(!inst);
  665. INC_SIZE(1);
  666. *inst = NOP;
  667. break;
  668. case SLJIT_LMUL_UW:
  669. case SLJIT_LMUL_SW:
  670. case SLJIT_DIVMOD_UW:
  671. case SLJIT_DIVMOD_SW:
  672. case SLJIT_DIV_UW:
  673. case SLJIT_DIV_SW:
  674. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  675. #ifdef _WIN64
  676. SLJIT_ASSERT(
  677. reg_map[SLJIT_R0] == 0
  678. && reg_map[SLJIT_R1] == 2
  679. && reg_map[TMP_REG1] > 7);
  680. #else
  681. SLJIT_ASSERT(
  682. reg_map[SLJIT_R0] == 0
  683. && reg_map[SLJIT_R1] < 7
  684. && reg_map[TMP_REG1] == 2);
  685. #endif
  686. compiler->mode32 = op & SLJIT_I32_OP;
  687. #endif
  688. SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
  689. op = GET_OPCODE(op);
  690. if ((op | 0x2) == SLJIT_DIV_UW) {
  691. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  692. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
  693. inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
  694. #else
  695. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
  696. #endif
  697. FAIL_IF(!inst);
  698. *inst = XOR_r_rm;
  699. }
  700. if ((op | 0x2) == SLJIT_DIV_SW) {
  701. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  702. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
  703. #endif
  704. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  705. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  706. FAIL_IF(!inst);
  707. INC_SIZE(1);
  708. *inst = CDQ;
  709. #else
  710. if (compiler->mode32) {
  711. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  712. FAIL_IF(!inst);
  713. INC_SIZE(1);
  714. *inst = CDQ;
  715. } else {
  716. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  717. FAIL_IF(!inst);
  718. INC_SIZE(2);
  719. *inst++ = REX_W;
  720. *inst = CDQ;
  721. }
  722. #endif
  723. }
  724. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  725. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  726. FAIL_IF(!inst);
  727. INC_SIZE(2);
  728. *inst++ = GROUP_F7;
  729. *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
  730. #else
  731. #ifdef _WIN64
  732. size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
  733. #else
  734. size = (!compiler->mode32) ? 3 : 2;
  735. #endif
  736. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  737. FAIL_IF(!inst);
  738. INC_SIZE(size);
  739. #ifdef _WIN64
  740. if (!compiler->mode32)
  741. *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
  742. else if (op >= SLJIT_DIVMOD_UW)
  743. *inst++ = REX_B;
  744. *inst++ = GROUP_F7;
  745. *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
  746. #else
  747. if (!compiler->mode32)
  748. *inst++ = REX_W;
  749. *inst++ = GROUP_F7;
  750. *inst = MOD_REG | reg_map[SLJIT_R1];
  751. #endif
  752. #endif
  753. switch (op) {
  754. case SLJIT_LMUL_UW:
  755. *inst |= MUL;
  756. break;
  757. case SLJIT_LMUL_SW:
  758. *inst |= IMUL;
  759. break;
  760. case SLJIT_DIVMOD_UW:
  761. case SLJIT_DIV_UW:
  762. *inst |= DIV;
  763. break;
  764. case SLJIT_DIVMOD_SW:
  765. case SLJIT_DIV_SW:
  766. *inst |= IDIV;
  767. break;
  768. }
  769. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
  770. if (op <= SLJIT_DIVMOD_SW)
  771. EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
  772. #else
  773. if (op >= SLJIT_DIV_UW)
  774. EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
  775. #endif
  776. break;
  777. }
  778. return SLJIT_SUCCESS;
  779. }
  780. #define ENCODE_PREFIX(prefix) \
  781. do { \
  782. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
  783. FAIL_IF(!inst); \
  784. INC_SIZE(1); \
  785. *inst = (prefix); \
  786. } while (0)
  787. static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
  788. sljit_s32 dst, sljit_sw dstw,
  789. sljit_s32 src, sljit_sw srcw)
  790. {
  791. sljit_u8* inst;
  792. sljit_s32 dst_r;
  793. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  794. sljit_s32 work_r;
  795. #endif
  796. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  797. compiler->mode32 = 0;
  798. #endif
  799. if (src & SLJIT_IMM) {
  800. if (FAST_IS_REG(dst)) {
  801. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  802. return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  803. #else
  804. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
  805. FAIL_IF(!inst);
  806. *inst = MOV_rm_i32;
  807. return SLJIT_SUCCESS;
  808. #endif
  809. }
  810. inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
  811. FAIL_IF(!inst);
  812. *inst = MOV_rm8_i8;
  813. return SLJIT_SUCCESS;
  814. }
  815. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  816. if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
  817. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  818. if (reg_map[src] >= 4) {
  819. SLJIT_ASSERT(dst_r == TMP_REG1);
  820. EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
  821. } else
  822. dst_r = src;
  823. #else
  824. dst_r = src;
  825. #endif
  826. }
  827. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  828. else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
  829. /* src, dst are registers. */
  830. SLJIT_ASSERT(SLOW_IS_REG(dst));
  831. if (reg_map[dst] < 4) {
  832. if (dst != src)
  833. EMIT_MOV(compiler, dst, 0, src, 0);
  834. inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
  835. FAIL_IF(!inst);
  836. *inst++ = GROUP_0F;
  837. *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
  838. }
  839. else {
  840. if (dst != src)
  841. EMIT_MOV(compiler, dst, 0, src, 0);
  842. if (sign) {
  843. /* shl reg, 24 */
  844. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  845. FAIL_IF(!inst);
  846. *inst |= SHL;
  847. /* sar reg, 24 */
  848. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  849. FAIL_IF(!inst);
  850. *inst |= SAR;
  851. }
  852. else {
  853. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
  854. FAIL_IF(!inst);
  855. *(inst + 1) |= AND;
  856. }
  857. }
  858. return SLJIT_SUCCESS;
  859. }
  860. #endif
  861. else {
  862. /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
  863. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  864. FAIL_IF(!inst);
  865. *inst++ = GROUP_0F;
  866. *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
  867. }
  868. if (dst & SLJIT_MEM) {
  869. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  870. if (dst_r == TMP_REG1) {
  871. /* Find a non-used register, whose reg_map[src] < 4. */
  872. if ((dst & REG_MASK) == SLJIT_R0) {
  873. if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
  874. work_r = SLJIT_R2;
  875. else
  876. work_r = SLJIT_R1;
  877. }
  878. else {
  879. if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
  880. work_r = SLJIT_R0;
  881. else if ((dst & REG_MASK) == SLJIT_R1)
  882. work_r = SLJIT_R2;
  883. else
  884. work_r = SLJIT_R1;
  885. }
  886. if (work_r == SLJIT_R0) {
  887. ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
  888. }
  889. else {
  890. inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  891. FAIL_IF(!inst);
  892. *inst = XCHG_r_rm;
  893. }
  894. inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
  895. FAIL_IF(!inst);
  896. *inst = MOV_rm8_r8;
  897. if (work_r == SLJIT_R0) {
  898. ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
  899. }
  900. else {
  901. inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  902. FAIL_IF(!inst);
  903. *inst = XCHG_r_rm;
  904. }
  905. }
  906. else {
  907. inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  908. FAIL_IF(!inst);
  909. *inst = MOV_rm8_r8;
  910. }
  911. #else
  912. inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
  913. FAIL_IF(!inst);
  914. *inst = MOV_rm8_r8;
  915. #endif
  916. }
  917. return SLJIT_SUCCESS;
  918. }
  919. static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
  920. sljit_s32 src, sljit_sw srcw)
  921. {
  922. sljit_u8* inst;
  923. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  924. compiler->mode32 = 1;
  925. #endif
  926. inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
  927. FAIL_IF(!inst);
  928. *inst++ = GROUP_0F;
  929. *inst++ = PREFETCH;
  930. if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
  931. *inst |= (3 << 3);
  932. else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
  933. *inst |= (2 << 3);
  934. else
  935. *inst |= (1 << 3);
  936. return SLJIT_SUCCESS;
  937. }
  938. static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
  939. sljit_s32 dst, sljit_sw dstw,
  940. sljit_s32 src, sljit_sw srcw)
  941. {
  942. sljit_u8* inst;
  943. sljit_s32 dst_r;
  944. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  945. compiler->mode32 = 0;
  946. #endif
  947. if (src & SLJIT_IMM) {
  948. if (FAST_IS_REG(dst)) {
  949. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  950. return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  951. #else
  952. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
  953. FAIL_IF(!inst);
  954. *inst = MOV_rm_i32;
  955. return SLJIT_SUCCESS;
  956. #endif
  957. }
  958. inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
  959. FAIL_IF(!inst);
  960. *inst = MOV_rm_i32;
  961. return SLJIT_SUCCESS;
  962. }
  963. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  964. if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
  965. dst_r = src;
  966. else {
  967. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  968. FAIL_IF(!inst);
  969. *inst++ = GROUP_0F;
  970. *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
  971. }
  972. if (dst & SLJIT_MEM) {
  973. inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
  974. FAIL_IF(!inst);
  975. *inst = MOV_rm_r;
  976. }
  977. return SLJIT_SUCCESS;
  978. }
  979. static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
  980. sljit_s32 dst, sljit_sw dstw,
  981. sljit_s32 src, sljit_sw srcw)
  982. {
  983. sljit_u8* inst;
  984. if (dst == src && dstw == srcw) {
  985. /* Same input and output */
  986. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  987. FAIL_IF(!inst);
  988. *inst++ = GROUP_F7;
  989. *inst |= opcode;
  990. return SLJIT_SUCCESS;
  991. }
  992. if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
  993. dst = TMP_REG1;
  994. if (FAST_IS_REG(dst)) {
  995. EMIT_MOV(compiler, dst, 0, src, srcw);
  996. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
  997. FAIL_IF(!inst);
  998. *inst++ = GROUP_F7;
  999. *inst |= opcode;
  1000. return SLJIT_SUCCESS;
  1001. }
  1002. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  1003. inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
  1004. FAIL_IF(!inst);
  1005. *inst++ = GROUP_F7;
  1006. *inst |= opcode;
  1007. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1008. return SLJIT_SUCCESS;
  1009. }
  1010. static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
  1011. sljit_s32 dst, sljit_sw dstw,
  1012. sljit_s32 src, sljit_sw srcw)
  1013. {
  1014. sljit_u8* inst;
  1015. if (dst == SLJIT_UNUSED)
  1016. dst = TMP_REG1;
  1017. if (FAST_IS_REG(dst)) {
  1018. EMIT_MOV(compiler, dst, 0, src, srcw);
  1019. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
  1020. FAIL_IF(!inst);
  1021. *inst++ = GROUP_F7;
  1022. *inst |= NOT_rm;
  1023. inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
  1024. FAIL_IF(!inst);
  1025. *inst = OR_r_rm;
  1026. return SLJIT_SUCCESS;
  1027. }
  1028. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  1029. inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
  1030. FAIL_IF(!inst);
  1031. *inst++ = GROUP_F7;
  1032. *inst |= NOT_rm;
  1033. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
  1034. FAIL_IF(!inst);
  1035. *inst = OR_r_rm;
  1036. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1037. return SLJIT_SUCCESS;
  1038. }
  1039. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1040. static const sljit_sw emit_clz_arg = 32 + 31;
  1041. #endif
  1042. static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
  1043. sljit_s32 dst, sljit_sw dstw,
  1044. sljit_s32 src, sljit_sw srcw)
  1045. {
  1046. sljit_u8* inst;
  1047. sljit_s32 dst_r;
  1048. SLJIT_UNUSED_ARG(op_flags);
  1049. if (cpu_has_cmov == -1)
  1050. get_cpu_features();
  1051. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  1052. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  1053. FAIL_IF(!inst);
  1054. *inst++ = GROUP_0F;
  1055. *inst = BSR_r_rm;
  1056. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1057. if (cpu_has_cmov) {
  1058. if (dst_r != TMP_REG1) {
  1059. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
  1060. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
  1061. }
  1062. else
  1063. inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
  1064. FAIL_IF(!inst);
  1065. *inst++ = GROUP_0F;
  1066. *inst = CMOVE_r_rm;
  1067. }
  1068. else
  1069. FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
  1070. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
  1071. #else
  1072. if (cpu_has_cmov) {
  1073. EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
  1074. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
  1075. FAIL_IF(!inst);
  1076. *inst++ = GROUP_0F;
  1077. *inst = CMOVE_r_rm;
  1078. }
  1079. else
  1080. FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
  1081. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
  1082. #endif
  1083. FAIL_IF(!inst);
  1084. *(inst + 1) |= XOR;
  1085. if (dst & SLJIT_MEM)
  1086. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1087. return SLJIT_SUCCESS;
  1088. }
  1089. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
  1090. sljit_s32 dst, sljit_sw dstw,
  1091. sljit_s32 src, sljit_sw srcw)
  1092. {
  1093. sljit_s32 op_flags = GET_ALL_FLAGS(op);
  1094. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1095. sljit_s32 dst_is_ereg = 0;
  1096. #endif
  1097. CHECK_ERROR();
  1098. CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
  1099. ADJUST_LOCAL_OFFSET(dst, dstw);
  1100. ADJUST_LOCAL_OFFSET(src, srcw);
  1101. CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
  1102. CHECK_EXTRA_REGS(src, srcw, (void)0);
  1103. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1104. compiler->mode32 = op_flags & SLJIT_I32_OP;
  1105. #endif
  1106. if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
  1107. if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
  1108. return emit_prefetch(compiler, op, src, srcw);
  1109. return SLJIT_SUCCESS;
  1110. }
  1111. op = GET_OPCODE(op);
  1112. if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
  1113. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1114. compiler->mode32 = 0;
  1115. #endif
  1116. if (FAST_IS_REG(src) && src == dst) {
  1117. if (!TYPE_CAST_NEEDED(op))
  1118. return SLJIT_SUCCESS;
  1119. }
  1120. if (op_flags & SLJIT_I32_OP) {
  1121. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1122. if (src & SLJIT_MEM) {
  1123. if (op == SLJIT_MOV_S32)
  1124. op = SLJIT_MOV_U32;
  1125. }
  1126. else if (src & SLJIT_IMM) {
  1127. if (op == SLJIT_MOV_U32)
  1128. op = SLJIT_MOV_S32;
  1129. }
  1130. #endif
  1131. }
  1132. if (src & SLJIT_IMM) {
  1133. switch (op) {
  1134. case SLJIT_MOV_U8:
  1135. srcw = (sljit_u8)srcw;
  1136. break;
  1137. case SLJIT_MOV_S8:
  1138. srcw = (sljit_s8)srcw;
  1139. break;
  1140. case SLJIT_MOV_U16:
  1141. srcw = (sljit_u16)srcw;
  1142. break;
  1143. case SLJIT_MOV_S16:
  1144. srcw = (sljit_s16)srcw;
  1145. break;
  1146. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1147. case SLJIT_MOV_U32:
  1148. srcw = (sljit_u32)srcw;
  1149. break;
  1150. case SLJIT_MOV_S32:
  1151. srcw = (sljit_s32)srcw;
  1152. break;
  1153. #endif
  1154. }
  1155. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1156. if (SLJIT_UNLIKELY(dst_is_ereg))
  1157. return emit_mov(compiler, dst, dstw, src, srcw);
  1158. #endif
  1159. }
  1160. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1161. if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
  1162. SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
  1163. dst = TMP_REG1;
  1164. }
  1165. #endif
  1166. switch (op) {
  1167. case SLJIT_MOV:
  1168. case SLJIT_MOV_P:
  1169. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1170. case SLJIT_MOV_U32:
  1171. case SLJIT_MOV_S32:
  1172. #endif
  1173. FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  1174. break;
  1175. case SLJIT_MOV_U8:
  1176. FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
  1177. break;
  1178. case SLJIT_MOV_S8:
  1179. FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
  1180. break;
  1181. case SLJIT_MOV_U16:
  1182. FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
  1183. break;
  1184. case SLJIT_MOV_S16:
  1185. FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
  1186. break;
  1187. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1188. case SLJIT_MOV_U32:
  1189. FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
  1190. break;
  1191. case SLJIT_MOV_S32:
  1192. FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
  1193. break;
  1194. #endif
  1195. }
  1196. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1197. if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
  1198. return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
  1199. #endif
  1200. return SLJIT_SUCCESS;
  1201. }
  1202. switch (op) {
  1203. case SLJIT_NOT:
  1204. if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
  1205. return emit_not_with_flags(compiler, dst, dstw, src, srcw);
  1206. return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
  1207. case SLJIT_NEG:
  1208. return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
  1209. case SLJIT_CLZ:
  1210. return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
  1211. }
  1212. return SLJIT_SUCCESS;
  1213. }
  1214. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1215. #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
  1216. if (IS_HALFWORD(immw) || compiler->mode32) { \
  1217. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
  1218. FAIL_IF(!inst); \
  1219. *(inst + 1) |= (op_imm); \
  1220. } \
  1221. else { \
  1222. FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
  1223. inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
  1224. FAIL_IF(!inst); \
  1225. *inst = (op_mr); \
  1226. }
  1227. #define BINARY_EAX_IMM(op_eax_imm, immw) \
  1228. FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
  1229. #else
  1230. #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
  1231. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
  1232. FAIL_IF(!inst); \
  1233. *(inst + 1) |= (op_imm);
  1234. #define BINARY_EAX_IMM(op_eax_imm, immw) \
  1235. FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
  1236. #endif
  1237. static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
  1238. sljit_u32 op_types,
  1239. sljit_s32 dst, sljit_sw dstw,
  1240. sljit_s32 src1, sljit_sw src1w,
  1241. sljit_s32 src2, sljit_sw src2w)
  1242. {
  1243. sljit_u8* inst;
  1244. sljit_u8 op_eax_imm = (op_types >> 24);
  1245. sljit_u8 op_rm = (op_types >> 16) & 0xff;
  1246. sljit_u8 op_mr = (op_types >> 8) & 0xff;
  1247. sljit_u8 op_imm = op_types & 0xff;
  1248. if (dst == SLJIT_UNUSED) {
  1249. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1250. if (src2 & SLJIT_IMM) {
  1251. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1252. }
  1253. else {
  1254. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1255. FAIL_IF(!inst);
  1256. *inst = op_rm;
  1257. }
  1258. return SLJIT_SUCCESS;
  1259. }
  1260. if (dst == src1 && dstw == src1w) {
  1261. if (src2 & SLJIT_IMM) {
  1262. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1263. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1264. #else
  1265. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
  1266. #endif
  1267. BINARY_EAX_IMM(op_eax_imm, src2w);
  1268. }
  1269. else {
  1270. BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
  1271. }
  1272. }
  1273. else if (FAST_IS_REG(dst)) {
  1274. inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
  1275. FAIL_IF(!inst);
  1276. *inst = op_rm;
  1277. }
  1278. else if (FAST_IS_REG(src2)) {
  1279. /* Special exception for sljit_emit_op_flags. */
  1280. inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
  1281. FAIL_IF(!inst);
  1282. *inst = op_mr;
  1283. }
  1284. else {
  1285. EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
  1286. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  1287. FAIL_IF(!inst);
  1288. *inst = op_mr;
  1289. }
  1290. return SLJIT_SUCCESS;
  1291. }
  1292. /* Only for cumulative operations. */
  1293. if (dst == src2 && dstw == src2w) {
  1294. if (src1 & SLJIT_IMM) {
  1295. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1296. if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
  1297. #else
  1298. if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
  1299. #endif
  1300. BINARY_EAX_IMM(op_eax_imm, src1w);
  1301. }
  1302. else {
  1303. BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
  1304. }
  1305. }
  1306. else if (FAST_IS_REG(dst)) {
  1307. inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
  1308. FAIL_IF(!inst);
  1309. *inst = op_rm;
  1310. }
  1311. else if (FAST_IS_REG(src1)) {
  1312. inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
  1313. FAIL_IF(!inst);
  1314. *inst = op_mr;
  1315. }
  1316. else {
  1317. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1318. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  1319. FAIL_IF(!inst);
  1320. *inst = op_mr;
  1321. }
  1322. return SLJIT_SUCCESS;
  1323. }
  1324. /* General version. */
  1325. if (FAST_IS_REG(dst)) {
  1326. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1327. if (src2 & SLJIT_IMM) {
  1328. BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
  1329. }
  1330. else {
  1331. inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
  1332. FAIL_IF(!inst);
  1333. *inst = op_rm;
  1334. }
  1335. }
  1336. else {
  1337. /* This version requires less memory writing. */
  1338. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1339. if (src2 & SLJIT_IMM) {
  1340. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1341. }
  1342. else {
  1343. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1344. FAIL_IF(!inst);
  1345. *inst = op_rm;
  1346. }
  1347. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1348. }
  1349. return SLJIT_SUCCESS;
  1350. }
  1351. static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
  1352. sljit_u32 op_types,
  1353. sljit_s32 dst, sljit_sw dstw,
  1354. sljit_s32 src1, sljit_sw src1w,
  1355. sljit_s32 src2, sljit_sw src2w)
  1356. {
  1357. sljit_u8* inst;
  1358. sljit_u8 op_eax_imm = (op_types >> 24);
  1359. sljit_u8 op_rm = (op_types >> 16) & 0xff;
  1360. sljit_u8 op_mr = (op_types >> 8) & 0xff;
  1361. sljit_u8 op_imm = op_types & 0xff;
  1362. if (dst == SLJIT_UNUSED) {
  1363. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1364. if (src2 & SLJIT_IMM) {
  1365. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1366. }
  1367. else {
  1368. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1369. FAIL_IF(!inst);
  1370. *inst = op_rm;
  1371. }
  1372. return SLJIT_SUCCESS;
  1373. }
  1374. if (dst == src1 && dstw == src1w) {
  1375. if (src2 & SLJIT_IMM) {
  1376. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1377. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1378. #else
  1379. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
  1380. #endif
  1381. BINARY_EAX_IMM(op_eax_imm, src2w);
  1382. }
  1383. else {
  1384. BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
  1385. }
  1386. }
  1387. else if (FAST_IS_REG(dst)) {
  1388. inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
  1389. FAIL_IF(!inst);
  1390. *inst = op_rm;
  1391. }
  1392. else if (FAST_IS_REG(src2)) {
  1393. inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
  1394. FAIL_IF(!inst);
  1395. *inst = op_mr;
  1396. }
  1397. else {
  1398. EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
  1399. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  1400. FAIL_IF(!inst);
  1401. *inst = op_mr;
  1402. }
  1403. return SLJIT_SUCCESS;
  1404. }
  1405. /* General version. */
  1406. if (FAST_IS_REG(dst) && dst != src2) {
  1407. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1408. if (src2 & SLJIT_IMM) {
  1409. BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
  1410. }
  1411. else {
  1412. inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
  1413. FAIL_IF(!inst);
  1414. *inst = op_rm;
  1415. }
  1416. }
  1417. else {
  1418. /* This version requires less memory writing. */
  1419. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1420. if (src2 & SLJIT_IMM) {
  1421. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1422. }
  1423. else {
  1424. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1425. FAIL_IF(!inst);
  1426. *inst = op_rm;
  1427. }
  1428. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1429. }
  1430. return SLJIT_SUCCESS;
  1431. }
  1432. static sljit_s32 emit_mul(struct sljit_compiler *compiler,
  1433. sljit_s32 dst, sljit_sw dstw,
  1434. sljit_s32 src1, sljit_sw src1w,
  1435. sljit_s32 src2, sljit_sw src2w)
  1436. {
  1437. sljit_u8* inst;
  1438. sljit_s32 dst_r;
  1439. dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
  1440. /* Register destination. */
  1441. if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
  1442. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
  1443. FAIL_IF(!inst);
  1444. *inst++ = GROUP_0F;
  1445. *inst = IMUL_r_rm;
  1446. }
  1447. else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
  1448. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
  1449. FAIL_IF(!inst);
  1450. *inst++ = GROUP_0F;
  1451. *inst = IMUL_r_rm;
  1452. }
  1453. else if (src1 & SLJIT_IMM) {
  1454. if (src2 & SLJIT_IMM) {
  1455. EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
  1456. src2 = dst_r;
  1457. src2w = 0;
  1458. }
  1459. if (src1w <= 127 && src1w >= -128) {
  1460. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
  1461. FAIL_IF(!inst);
  1462. *inst = IMUL_r_rm_i8;
  1463. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  1464. FAIL_IF(!inst);
  1465. INC_SIZE(1);
  1466. *inst = (sljit_s8)src1w;
  1467. }
  1468. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1469. else {
  1470. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
  1471. FAIL_IF(!inst);
  1472. *inst = IMUL_r_rm_i32;
  1473. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  1474. FAIL_IF(!inst);
  1475. INC_SIZE(4);
  1476. sljit_unaligned_store_sw(inst, src1w);
  1477. }
  1478. #else
  1479. else if (IS_HALFWORD(src1w)) {
  1480. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
  1481. FAIL_IF(!inst);
  1482. *inst = IMUL_r_rm_i32;
  1483. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  1484. FAIL_IF(!inst);
  1485. INC_SIZE(4);
  1486. sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
  1487. }
  1488. else {
  1489. if (dst_r != src2)
  1490. EMIT_MOV(compiler, dst_r, 0, src2, src2w);
  1491. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
  1492. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
  1493. FAIL_IF(!inst);
  1494. *inst++ = GROUP_0F;
  1495. *inst = IMUL_r_rm;
  1496. }
  1497. #endif
  1498. }
  1499. else if (src2 & SLJIT_IMM) {
  1500. /* Note: src1 is NOT immediate. */
  1501. if (src2w <= 127 && src2w >= -128) {
  1502. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
  1503. FAIL_IF(!inst);
  1504. *inst = IMUL_r_rm_i8;
  1505. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  1506. FAIL_IF(!inst);
  1507. INC_SIZE(1);
  1508. *inst = (sljit_s8)src2w;
  1509. }
  1510. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1511. else {
  1512. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
  1513. FAIL_IF(!inst);
  1514. *inst = IMUL_r_rm_i32;
  1515. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  1516. FAIL_IF(!inst);
  1517. INC_SIZE(4);
  1518. sljit_unaligned_store_sw(inst, src2w);
  1519. }
  1520. #else
  1521. else if (IS_HALFWORD(src2w)) {
  1522. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
  1523. FAIL_IF(!inst);
  1524. *inst = IMUL_r_rm_i32;
  1525. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  1526. FAIL_IF(!inst);
  1527. INC_SIZE(4);
  1528. sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
  1529. }
  1530. else {
  1531. if (dst_r != src1)
  1532. EMIT_MOV(compiler, dst_r, 0, src1, src1w);
  1533. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
  1534. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
  1535. FAIL_IF(!inst);
  1536. *inst++ = GROUP_0F;
  1537. *inst = IMUL_r_rm;
  1538. }
  1539. #endif
  1540. }
  1541. else {
  1542. /* Neither argument is immediate. */
  1543. if (ADDRESSING_DEPENDS_ON(src2, dst_r))
  1544. dst_r = TMP_REG1;
  1545. EMIT_MOV(compiler, dst_r, 0, src1, src1w);
  1546. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
  1547. FAIL_IF(!inst);
  1548. *inst++ = GROUP_0F;
  1549. *inst = IMUL_r_rm;
  1550. }
  1551. if (dst & SLJIT_MEM)
  1552. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1553. return SLJIT_SUCCESS;
  1554. }
  1555. static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
  1556. sljit_s32 dst, sljit_sw dstw,
  1557. sljit_s32 src1, sljit_sw src1w,
  1558. sljit_s32 src2, sljit_sw src2w)
  1559. {
  1560. sljit_u8* inst;
  1561. sljit_s32 dst_r, done = 0;
  1562. /* These cases better be left to handled by normal way. */
  1563. if (dst == src1 && dstw == src1w)
  1564. return SLJIT_ERR_UNSUPPORTED;
  1565. if (dst == src2 && dstw == src2w)
  1566. return SLJIT_ERR_UNSUPPORTED;
  1567. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  1568. if (FAST_IS_REG(src1)) {
  1569. if (FAST_IS_REG(src2)) {
  1570. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
  1571. FAIL_IF(!inst);
  1572. *inst = LEA_r_m;
  1573. done = 1;
  1574. }
  1575. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1576. if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1577. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
  1578. #else
  1579. if (src2 & SLJIT_IMM) {
  1580. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
  1581. #endif
  1582. FAIL_IF(!inst);
  1583. *inst = LEA_r_m;
  1584. done = 1;
  1585. }
  1586. }
  1587. else if (FAST_IS_REG(src2)) {
  1588. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1589. if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
  1590. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
  1591. #else
  1592. if (src1 & SLJIT_IMM) {
  1593. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
  1594. #endif
  1595. FAIL_IF(!inst);
  1596. *inst = LEA_r_m;
  1597. done = 1;
  1598. }
  1599. }
  1600. if (done) {
  1601. if (dst_r == TMP_REG1)
  1602. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  1603. return SLJIT_SUCCESS;
  1604. }
  1605. return SLJIT_ERR_UNSUPPORTED;
  1606. }
  1607. static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
  1608. sljit_s32 src1, sljit_sw src1w,
  1609. sljit_s32 src2, sljit_sw src2w)
  1610. {
  1611. sljit_u8* inst;
  1612. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1613. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1614. #else
  1615. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
  1616. #endif
  1617. BINARY_EAX_IMM(CMP_EAX_i32, src2w);
  1618. return SLJIT_SUCCESS;
  1619. }
  1620. if (FAST_IS_REG(src1)) {
  1621. if (src2 & SLJIT_IMM) {
  1622. BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
  1623. }
  1624. else {
  1625. inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
  1626. FAIL_IF(!inst);
  1627. *inst = CMP_r_rm;
  1628. }
  1629. return SLJIT_SUCCESS;
  1630. }
  1631. if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
  1632. inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
  1633. FAIL_IF(!inst);
  1634. *inst = CMP_rm_r;
  1635. return SLJIT_SUCCESS;
  1636. }
  1637. if (src2 & SLJIT_IMM) {
  1638. if (src1 & SLJIT_IMM) {
  1639. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1640. src1 = TMP_REG1;
  1641. src1w = 0;
  1642. }
  1643. BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
  1644. }
  1645. else {
  1646. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1647. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1648. FAIL_IF(!inst);
  1649. *inst = CMP_r_rm;
  1650. }
  1651. return SLJIT_SUCCESS;
  1652. }
  1653. static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
  1654. sljit_s32 src1, sljit_sw src1w,
  1655. sljit_s32 src2, sljit_sw src2w)
  1656. {
  1657. sljit_u8* inst;
  1658. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1659. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1660. #else
  1661. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
  1662. #endif
  1663. BINARY_EAX_IMM(TEST_EAX_i32, src2w);
  1664. return SLJIT_SUCCESS;
  1665. }
  1666. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1667. if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
  1668. #else
  1669. if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
  1670. #endif
  1671. BINARY_EAX_IMM(TEST_EAX_i32, src1w);
  1672. return SLJIT_SUCCESS;
  1673. }
  1674. if (!(src1 & SLJIT_IMM)) {
  1675. if (src2 & SLJIT_IMM) {
  1676. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1677. if (IS_HALFWORD(src2w) || compiler->mode32) {
  1678. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
  1679. FAIL_IF(!inst);
  1680. *inst = GROUP_F7;
  1681. }
  1682. else {
  1683. FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
  1684. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
  1685. FAIL_IF(!inst);
  1686. *inst = TEST_rm_r;
  1687. }
  1688. #else
  1689. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
  1690. FAIL_IF(!inst);
  1691. *inst = GROUP_F7;
  1692. #endif
  1693. return SLJIT_SUCCESS;
  1694. }
  1695. else if (FAST_IS_REG(src1)) {
  1696. inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
  1697. FAIL_IF(!inst);
  1698. *inst = TEST_rm_r;
  1699. return SLJIT_SUCCESS;
  1700. }
  1701. }
  1702. if (!(src2 & SLJIT_IMM)) {
  1703. if (src1 & SLJIT_IMM) {
  1704. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1705. if (IS_HALFWORD(src1w) || compiler->mode32) {
  1706. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
  1707. FAIL_IF(!inst);
  1708. *inst = GROUP_F7;
  1709. }
  1710. else {
  1711. FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
  1712. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1713. FAIL_IF(!inst);
  1714. *inst = TEST_rm_r;
  1715. }
  1716. #else
  1717. inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
  1718. FAIL_IF(!inst);
  1719. *inst = GROUP_F7;
  1720. #endif
  1721. return SLJIT_SUCCESS;
  1722. }
  1723. else if (FAST_IS_REG(src2)) {
  1724. inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
  1725. FAIL_IF(!inst);
  1726. *inst = TEST_rm_r;
  1727. return SLJIT_SUCCESS;
  1728. }
  1729. }
  1730. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1731. if (src2 & SLJIT_IMM) {
  1732. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1733. if (IS_HALFWORD(src2w) || compiler->mode32) {
  1734. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
  1735. FAIL_IF(!inst);
  1736. *inst = GROUP_F7;
  1737. }
  1738. else {
  1739. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
  1740. inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
  1741. FAIL_IF(!inst);
  1742. *inst = TEST_rm_r;
  1743. }
  1744. #else
  1745. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
  1746. FAIL_IF(!inst);
  1747. *inst = GROUP_F7;
  1748. #endif
  1749. }
  1750. else {
  1751. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1752. FAIL_IF(!inst);
  1753. *inst = TEST_rm_r;
  1754. }
  1755. return SLJIT_SUCCESS;
  1756. }
  1757. static sljit_s32 emit_shift(struct sljit_compiler *compiler,
  1758. sljit_u8 mode,
  1759. sljit_s32 dst, sljit_sw dstw,
  1760. sljit_s32 src1, sljit_sw src1w,
  1761. sljit_s32 src2, sljit_sw src2w)
  1762. {
  1763. sljit_u8* inst;
  1764. if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
  1765. if (dst == src1 && dstw == src1w) {
  1766. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
  1767. FAIL_IF(!inst);
  1768. *inst |= mode;
  1769. return SLJIT_SUCCESS;
  1770. }
  1771. if (dst == SLJIT_UNUSED) {
  1772. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1773. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
  1774. FAIL_IF(!inst);
  1775. *inst |= mode;
  1776. return SLJIT_SUCCESS;
  1777. }
  1778. if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
  1779. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1780. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1781. FAIL_IF(!inst);
  1782. *inst |= mode;
  1783. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1784. return SLJIT_SUCCESS;
  1785. }
  1786. if (FAST_IS_REG(dst)) {
  1787. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1788. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
  1789. FAIL_IF(!inst);
  1790. *inst |= mode;
  1791. return SLJIT_SUCCESS;
  1792. }
  1793. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1794. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
  1795. FAIL_IF(!inst);
  1796. *inst |= mode;
  1797. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1798. return SLJIT_SUCCESS;
  1799. }
  1800. if (dst == SLJIT_PREF_SHIFT_REG) {
  1801. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1802. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1803. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1804. FAIL_IF(!inst);
  1805. *inst |= mode;
  1806. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1807. }
  1808. else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
  1809. if (src1 != dst)
  1810. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1811. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
  1812. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1813. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
  1814. FAIL_IF(!inst);
  1815. *inst |= mode;
  1816. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1817. }
  1818. else {
  1819. /* This case is complex since ecx itself may be used for
  1820. addressing, and this case must be supported as well. */
  1821. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1822. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1823. EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
  1824. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1825. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1826. FAIL_IF(!inst);
  1827. *inst |= mode;
  1828. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
  1829. #else
  1830. EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
  1831. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1832. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1833. FAIL_IF(!inst);
  1834. *inst |= mode;
  1835. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
  1836. #endif
  1837. if (dst != SLJIT_UNUSED)
  1838. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  1839. }
  1840. return SLJIT_SUCCESS;
  1841. }
  1842. static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
  1843. sljit_u8 mode, sljit_s32 set_flags,
  1844. sljit_s32 dst, sljit_sw dstw,
  1845. sljit_s32 src1, sljit_sw src1w,
  1846. sljit_s32 src2, sljit_sw src2w)
  1847. {
  1848. /* The CPU does not set flags if the shift count is 0. */
  1849. if (src2 & SLJIT_IMM) {
  1850. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1851. if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
  1852. return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
  1853. #else
  1854. if ((src2w & 0x1f) != 0)
  1855. return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
  1856. #endif
  1857. if (!set_flags)
  1858. return emit_mov(compiler, dst, dstw, src1, src1w);
  1859. /* OR dst, src, 0 */
  1860. return emit_cum_binary(compiler, BINARY_OPCODE(OR),
  1861. dst, dstw, src1, src1w, SLJIT_IMM, 0);
  1862. }
  1863. if (!set_flags)
  1864. return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
  1865. if (!FAST_IS_REG(dst))
  1866. FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
  1867. FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
  1868. if (FAST_IS_REG(dst))
  1869. return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0);
  1870. return SLJIT_SUCCESS;
  1871. }
  1872. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
  1873. sljit_s32 dst, sljit_sw dstw,
  1874. sljit_s32 src1, sljit_sw src1w,
  1875. sljit_s32 src2, sljit_sw src2w)
  1876. {
  1877. CHECK_ERROR();
  1878. CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
  1879. ADJUST_LOCAL_OFFSET(dst, dstw);
  1880. ADJUST_LOCAL_OFFSET(src1, src1w);
  1881. ADJUST_LOCAL_OFFSET(src2, src2w);
  1882. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  1883. CHECK_EXTRA_REGS(src1, src1w, (void)0);
  1884. CHECK_EXTRA_REGS(src2, src2w, (void)0);
  1885. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1886. compiler->mode32 = op & SLJIT_I32_OP;
  1887. #endif
  1888. if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
  1889. return SLJIT_SUCCESS;
  1890. switch (GET_OPCODE(op)) {
  1891. case SLJIT_ADD:
  1892. if (!HAS_FLAGS(op)) {
  1893. if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
  1894. return compiler->error;
  1895. }
  1896. return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
  1897. dst, dstw, src1, src1w, src2, src2w);
  1898. case SLJIT_ADDC:
  1899. return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
  1900. dst, dstw, src1, src1w, src2, src2w);
  1901. case SLJIT_SUB:
  1902. if (!HAS_FLAGS(op)) {
  1903. if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
  1904. return compiler->error;
  1905. }
  1906. if (dst == SLJIT_UNUSED)
  1907. return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
  1908. return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  1909. dst, dstw, src1, src1w, src2, src2w);
  1910. case SLJIT_SUBC:
  1911. return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
  1912. dst, dstw, src1, src1w, src2, src2w);
  1913. case SLJIT_MUL:
  1914. return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
  1915. case SLJIT_AND:
  1916. if (dst == SLJIT_UNUSED)
  1917. return emit_test_binary(compiler, src1, src1w, src2, src2w);
  1918. return emit_cum_binary(compiler, BINARY_OPCODE(AND),
  1919. dst, dstw, src1, src1w, src2, src2w);
  1920. case SLJIT_OR:
  1921. return emit_cum_binary(compiler, BINARY_OPCODE(OR),
  1922. dst, dstw, src1, src1w, src2, src2w);
  1923. case SLJIT_XOR:
  1924. return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
  1925. dst, dstw, src1, src1w, src2, src2w);
  1926. case SLJIT_SHL:
  1927. return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
  1928. dst, dstw, src1, src1w, src2, src2w);
  1929. case SLJIT_LSHR:
  1930. return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
  1931. dst, dstw, src1, src1w, src2, src2w);
  1932. case SLJIT_ASHR:
  1933. return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
  1934. dst, dstw, src1, src1w, src2, src2w);
  1935. }
  1936. return SLJIT_SUCCESS;
  1937. }
  1938. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
  1939. {
  1940. CHECK_REG_INDEX(check_sljit_get_register_index(reg));
  1941. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1942. if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
  1943. return -1;
  1944. #endif
  1945. return reg_map[reg];
  1946. }
  1947. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
  1948. {
  1949. CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
  1950. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1951. return reg;
  1952. #else
  1953. return freg_map[reg];
  1954. #endif
  1955. }
  1956. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
  1957. void *instruction, sljit_s32 size)
  1958. {
  1959. sljit_u8 *inst;
  1960. CHECK_ERROR();
  1961. CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
  1962. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  1963. FAIL_IF(!inst);
  1964. INC_SIZE(size);
  1965. SLJIT_MEMCPY(inst, instruction, size);
  1966. return SLJIT_SUCCESS;
  1967. }
  1968. /* --------------------------------------------------------------------- */
  1969. /* Floating point operators */
  1970. /* --------------------------------------------------------------------- */
  1971. /* Alignment(3) + 4 * 16 bytes. */
  1972. static sljit_s32 sse2_data[3 + (4 * 4)];
  1973. static sljit_s32 *sse2_buffer;
  1974. static void init_compiler(void)
  1975. {
  1976. /* Align to 16 bytes. */
  1977. sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
  1978. /* Single precision constants (each constant is 16 byte long). */
  1979. sse2_buffer[0] = 0x80000000;
  1980. sse2_buffer[4] = 0x7fffffff;
  1981. /* Double precision constants (each constant is 16 byte long). */
  1982. sse2_buffer[8] = 0;
  1983. sse2_buffer[9] = 0x80000000;
  1984. sse2_buffer[12] = 0xffffffff;
  1985. sse2_buffer[13] = 0x7fffffff;
  1986. }
  1987. static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
  1988. sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
  1989. {
  1990. sljit_u8 *inst;
  1991. inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
  1992. FAIL_IF(!inst);
  1993. *inst++ = GROUP_0F;
  1994. *inst = opcode;
  1995. return SLJIT_SUCCESS;
  1996. }
  1997. static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
  1998. sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
  1999. {
  2000. sljit_u8 *inst;
  2001. inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
  2002. FAIL_IF(!inst);
  2003. *inst++ = GROUP_0F;
  2004. *inst = opcode;
  2005. return SLJIT_SUCCESS;
  2006. }
  2007. static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
  2008. sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
  2009. {
  2010. return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
  2011. }
  2012. static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
  2013. sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
  2014. {
  2015. return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
  2016. }
  2017. static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
  2018. sljit_s32 dst, sljit_sw dstw,
  2019. sljit_s32 src, sljit_sw srcw)
  2020. {
  2021. sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  2022. sljit_u8 *inst;
  2023. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2024. if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
  2025. compiler->mode32 = 0;
  2026. #endif
  2027. inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
  2028. FAIL_IF(!inst);
  2029. *inst++ = GROUP_0F;
  2030. *inst = CVTTSD2SI_r_xm;
  2031. if (dst & SLJIT_MEM)
  2032. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  2033. return SLJIT_SUCCESS;
  2034. }
  2035. static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
  2036. sljit_s32 dst, sljit_sw dstw,
  2037. sljit_s32 src, sljit_sw srcw)
  2038. {
  2039. sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
  2040. sljit_u8 *inst;
  2041. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2042. if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
  2043. compiler->mode32 = 0;
  2044. #endif
  2045. if (src & SLJIT_IMM) {
  2046. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2047. if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
  2048. srcw = (sljit_s32)srcw;
  2049. #endif
  2050. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  2051. src = TMP_REG1;
  2052. srcw = 0;
  2053. }
  2054. inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
  2055. FAIL_IF(!inst);
  2056. *inst++ = GROUP_0F;
  2057. *inst = CVTSI2SD_x_rm;
  2058. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2059. compiler->mode32 = 1;
  2060. #endif
  2061. if (dst_r == TMP_FREG)
  2062. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
  2063. return SLJIT_SUCCESS;
  2064. }
  2065. static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
  2066. sljit_s32 src1, sljit_sw src1w,
  2067. sljit_s32 src2, sljit_sw src2w)
  2068. {
  2069. if (!FAST_IS_REG(src1)) {
  2070. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
  2071. src1 = TMP_FREG;
  2072. }
  2073. return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
  2074. }
  2075. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
  2076. sljit_s32 dst, sljit_sw dstw,
  2077. sljit_s32 src, sljit_sw srcw)
  2078. {
  2079. sljit_s32 dst_r;
  2080. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2081. compiler->mode32 = 1;
  2082. #endif
  2083. CHECK_ERROR();
  2084. SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
  2085. if (GET_OPCODE(op) == SLJIT_MOV_F64) {
  2086. if (FAST_IS_REG(dst))
  2087. return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
  2088. if (FAST_IS_REG(src))
  2089. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
  2090. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
  2091. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
  2092. }
  2093. if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
  2094. dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
  2095. if (FAST_IS_REG(src)) {
  2096. /* We overwrite the high bits of source. From SLJIT point of view,
  2097. this is not an issue.
  2098. Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
  2099. FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
  2100. }
  2101. else {
  2102. FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
  2103. src = TMP_FREG;
  2104. }
  2105. FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
  2106. if (dst_r == TMP_FREG)
  2107. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
  2108. return SLJIT_SUCCESS;
  2109. }
  2110. if (FAST_IS_REG(dst)) {
  2111. dst_r = dst;
  2112. if (dst != src)
  2113. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
  2114. }
  2115. else {
  2116. dst_r = TMP_FREG;
  2117. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
  2118. }
  2119. switch (GET_OPCODE(op)) {
  2120. case SLJIT_NEG_F64:
  2121. FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
  2122. break;
  2123. case SLJIT_ABS_F64:
  2124. FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
  2125. break;
  2126. }
  2127. if (dst_r == TMP_FREG)
  2128. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
  2129. return SLJIT_SUCCESS;
  2130. }
  2131. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
  2132. sljit_s32 dst, sljit_sw dstw,
  2133. sljit_s32 src1, sljit_sw src1w,
  2134. sljit_s32 src2, sljit_sw src2w)
  2135. {
  2136. sljit_s32 dst_r;
  2137. CHECK_ERROR();
  2138. CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
  2139. ADJUST_LOCAL_OFFSET(dst, dstw);
  2140. ADJUST_LOCAL_OFFSET(src1, src1w);
  2141. ADJUST_LOCAL_OFFSET(src2, src2w);
  2142. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2143. compiler->mode32 = 1;
  2144. #endif
  2145. if (FAST_IS_REG(dst)) {
  2146. dst_r = dst;
  2147. if (dst == src1)
  2148. ; /* Do nothing here. */
  2149. else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
  2150. /* Swap arguments. */
  2151. src2 = src1;
  2152. src2w = src1w;
  2153. }
  2154. else if (dst != src2)
  2155. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
  2156. else {
  2157. dst_r = TMP_FREG;
  2158. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
  2159. }
  2160. }
  2161. else {
  2162. dst_r = TMP_FREG;
  2163. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
  2164. }
  2165. switch (GET_OPCODE(op)) {
  2166. case SLJIT_ADD_F64:
  2167. FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
  2168. break;
  2169. case SLJIT_SUB_F64:
  2170. FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
  2171. break;
  2172. case SLJIT_MUL_F64:
  2173. FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
  2174. break;
  2175. case SLJIT_DIV_F64:
  2176. FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
  2177. break;
  2178. }
  2179. if (dst_r == TMP_FREG)
  2180. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
  2181. return SLJIT_SUCCESS;
  2182. }
  2183. /* --------------------------------------------------------------------- */
  2184. /* Conditional instructions */
  2185. /* --------------------------------------------------------------------- */
  2186. SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
  2187. {
  2188. sljit_u8 *inst;
  2189. struct sljit_label *label;
  2190. CHECK_ERROR_PTR();
  2191. CHECK_PTR(check_sljit_emit_label(compiler));
  2192. if (compiler->last_label && compiler->last_label->size == compiler->size)
  2193. return compiler->last_label;
  2194. label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
  2195. PTR_FAIL_IF(!label);
  2196. set_label(label, compiler);
  2197. inst = (sljit_u8*)ensure_buf(compiler, 2);
  2198. PTR_FAIL_IF(!inst);
  2199. *inst++ = 0;
  2200. *inst++ = 0;
  2201. return label;
  2202. }
  2203. SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
  2204. {
  2205. sljit_u8 *inst;
  2206. struct sljit_jump *jump;
  2207. CHECK_ERROR_PTR();
  2208. CHECK_PTR(check_sljit_emit_jump(compiler, type));
  2209. jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
  2210. PTR_FAIL_IF_NULL(jump);
  2211. set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
  2212. type &= 0xff;
  2213. /* Worst case size. */
  2214. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2215. compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
  2216. #else
  2217. compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
  2218. #endif
  2219. inst = (sljit_u8*)ensure_buf(compiler, 2);
  2220. PTR_FAIL_IF_NULL(inst);
  2221. *inst++ = 0;
  2222. *inst++ = type + 2;
  2223. return jump;
  2224. }
  2225. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
  2226. {
  2227. sljit_u8 *inst;
  2228. struct sljit_jump *jump;
  2229. CHECK_ERROR();
  2230. CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
  2231. ADJUST_LOCAL_OFFSET(src, srcw);
  2232. CHECK_EXTRA_REGS(src, srcw, (void)0);
  2233. if (src == SLJIT_IMM) {
  2234. jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
  2235. FAIL_IF_NULL(jump);
  2236. set_jump(jump, compiler, JUMP_ADDR);
  2237. jump->u.target = srcw;
  2238. /* Worst case size. */
  2239. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2240. compiler->size += 5;
  2241. #else
  2242. compiler->size += 10 + 3;
  2243. #endif
  2244. inst = (sljit_u8*)ensure_buf(compiler, 2);
  2245. FAIL_IF_NULL(inst);
  2246. *inst++ = 0;
  2247. *inst++ = type + 2;
  2248. }
  2249. else {
  2250. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2251. /* REX_W is not necessary (src is not immediate). */
  2252. compiler->mode32 = 1;
  2253. #endif
  2254. inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
  2255. FAIL_IF(!inst);
  2256. *inst++ = GROUP_FF;
  2257. *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
  2258. }
  2259. return SLJIT_SUCCESS;
  2260. }
  2261. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
  2262. sljit_s32 dst, sljit_sw dstw,
  2263. sljit_s32 type)
  2264. {
  2265. sljit_u8 *inst;
  2266. sljit_u8 cond_set = 0;
  2267. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2268. sljit_s32 reg;
  2269. #endif
  2270. /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
  2271. sljit_s32 dst_save = dst;
  2272. sljit_sw dstw_save = dstw;
  2273. CHECK_ERROR();
  2274. CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
  2275. ADJUST_LOCAL_OFFSET(dst, dstw);
  2276. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  2277. type &= 0xff;
  2278. /* setcc = jcc + 0x10. */
  2279. cond_set = get_jump_code(type) + 0x10;
  2280. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2281. if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
  2282. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
  2283. FAIL_IF(!inst);
  2284. INC_SIZE(4 + 3);
  2285. /* Set low register to conditional flag. */
  2286. *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
  2287. *inst++ = GROUP_0F;
  2288. *inst++ = cond_set;
  2289. *inst++ = MOD_REG | reg_lmap[TMP_REG1];
  2290. *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
  2291. *inst++ = OR_rm8_r8;
  2292. *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
  2293. return SLJIT_SUCCESS;
  2294. }
  2295. reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
  2296. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
  2297. FAIL_IF(!inst);
  2298. INC_SIZE(4 + 4);
  2299. /* Set low register to conditional flag. */
  2300. *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
  2301. *inst++ = GROUP_0F;
  2302. *inst++ = cond_set;
  2303. *inst++ = MOD_REG | reg_lmap[reg];
  2304. *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
  2305. /* The movzx instruction does not affect flags. */
  2306. *inst++ = GROUP_0F;
  2307. *inst++ = MOVZX_r_rm8;
  2308. *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
  2309. if (reg != TMP_REG1)
  2310. return SLJIT_SUCCESS;
  2311. if (GET_OPCODE(op) < SLJIT_ADD) {
  2312. compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
  2313. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  2314. }
  2315. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  2316. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  2317. compiler->skip_checks = 1;
  2318. #endif
  2319. return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
  2320. #else
  2321. /* The SLJIT_CONFIG_X86_32 code path starts here. */
  2322. if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
  2323. if (reg_map[dst] <= 4) {
  2324. /* Low byte is accessible. */
  2325. inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
  2326. FAIL_IF(!inst);
  2327. INC_SIZE(3 + 3);
  2328. /* Set low byte to conditional flag. */
  2329. *inst++ = GROUP_0F;
  2330. *inst++ = cond_set;
  2331. *inst++ = MOD_REG | reg_map[dst];
  2332. *inst++ = GROUP_0F;
  2333. *inst++ = MOVZX_r_rm8;
  2334. *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
  2335. return SLJIT_SUCCESS;
  2336. }
  2337. /* Low byte is not accessible. */
  2338. if (cpu_has_cmov == -1)
  2339. get_cpu_features();
  2340. if (cpu_has_cmov) {
  2341. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
  2342. /* a xor reg, reg operation would overwrite the flags. */
  2343. EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
  2344. inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
  2345. FAIL_IF(!inst);
  2346. INC_SIZE(3);
  2347. *inst++ = GROUP_0F;
  2348. /* cmovcc = setcc - 0x50. */
  2349. *inst++ = cond_set - 0x50;
  2350. *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
  2351. return SLJIT_SUCCESS;
  2352. }
  2353. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
  2354. FAIL_IF(!inst);
  2355. INC_SIZE(1 + 3 + 3 + 1);
  2356. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2357. /* Set al to conditional flag. */
  2358. *inst++ = GROUP_0F;
  2359. *inst++ = cond_set;
  2360. *inst++ = MOD_REG | 0 /* eax */;
  2361. *inst++ = GROUP_0F;
  2362. *inst++ = MOVZX_r_rm8;
  2363. *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
  2364. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2365. return SLJIT_SUCCESS;
  2366. }
  2367. if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
  2368. SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
  2369. if (dst != SLJIT_R0) {
  2370. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
  2371. FAIL_IF(!inst);
  2372. INC_SIZE(1 + 3 + 2 + 1);
  2373. /* Set low register to conditional flag. */
  2374. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2375. *inst++ = GROUP_0F;
  2376. *inst++ = cond_set;
  2377. *inst++ = MOD_REG | 0 /* eax */;
  2378. *inst++ = OR_rm8_r8;
  2379. *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
  2380. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2381. }
  2382. else {
  2383. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
  2384. FAIL_IF(!inst);
  2385. INC_SIZE(2 + 3 + 2 + 2);
  2386. /* Set low register to conditional flag. */
  2387. *inst++ = XCHG_r_rm;
  2388. *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
  2389. *inst++ = GROUP_0F;
  2390. *inst++ = cond_set;
  2391. *inst++ = MOD_REG | 1 /* ecx */;
  2392. *inst++ = OR_rm8_r8;
  2393. *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
  2394. *inst++ = XCHG_r_rm;
  2395. *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
  2396. }
  2397. return SLJIT_SUCCESS;
  2398. }
  2399. /* Set TMP_REG1 to the bit. */
  2400. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
  2401. FAIL_IF(!inst);
  2402. INC_SIZE(1 + 3 + 3 + 1);
  2403. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2404. /* Set al to conditional flag. */
  2405. *inst++ = GROUP_0F;
  2406. *inst++ = cond_set;
  2407. *inst++ = MOD_REG | 0 /* eax */;
  2408. *inst++ = GROUP_0F;
  2409. *inst++ = MOVZX_r_rm8;
  2410. *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
  2411. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2412. if (GET_OPCODE(op) < SLJIT_ADD)
  2413. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  2414. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  2415. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  2416. compiler->skip_checks = 1;
  2417. #endif
  2418. return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
  2419. #endif /* SLJIT_CONFIG_X86_64 */
  2420. }
  2421. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
  2422. sljit_s32 dst_reg,
  2423. sljit_s32 src, sljit_sw srcw)
  2424. {
  2425. sljit_u8* inst;
  2426. CHECK_ERROR();
  2427. CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
  2428. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2429. dst_reg &= ~SLJIT_I32_OP;
  2430. if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
  2431. return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
  2432. #else
  2433. if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
  2434. return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
  2435. #endif
  2436. /* ADJUST_LOCAL_OFFSET is not needed. */
  2437. CHECK_EXTRA_REGS(src, srcw, (void)0);
  2438. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2439. compiler->mode32 = dst_reg & SLJIT_I32_OP;
  2440. dst_reg &= ~SLJIT_I32_OP;
  2441. #endif
  2442. if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
  2443. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
  2444. src = TMP_REG1;
  2445. srcw = 0;
  2446. }
  2447. inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
  2448. FAIL_IF(!inst);
  2449. *inst++ = GROUP_0F;
  2450. *inst = get_jump_code(type & 0xff) - 0x40;
  2451. return SLJIT_SUCCESS;
  2452. }
  2453. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
  2454. {
  2455. CHECK_ERROR();
  2456. CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
  2457. ADJUST_LOCAL_OFFSET(dst, dstw);
  2458. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  2459. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2460. compiler->mode32 = 0;
  2461. #endif
  2462. ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
  2463. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2464. if (NOT_HALFWORD(offset)) {
  2465. FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
  2466. #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
  2467. SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
  2468. return compiler->error;
  2469. #else
  2470. return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
  2471. #endif
  2472. }
  2473. #endif
  2474. if (offset != 0)
  2475. return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
  2476. return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
  2477. }
  2478. SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
  2479. {
  2480. sljit_u8 *inst;
  2481. struct sljit_const *const_;
  2482. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2483. sljit_s32 reg;
  2484. #endif
  2485. CHECK_ERROR_PTR();
  2486. CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
  2487. ADJUST_LOCAL_OFFSET(dst, dstw);
  2488. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  2489. const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
  2490. PTR_FAIL_IF(!const_);
  2491. set_const(const_, compiler);
  2492. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2493. compiler->mode32 = 0;
  2494. reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
  2495. if (emit_load_imm64(compiler, reg, init_value))
  2496. return NULL;
  2497. #else
  2498. if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
  2499. return NULL;
  2500. #endif
  2501. inst = (sljit_u8*)ensure_buf(compiler, 2);
  2502. PTR_FAIL_IF(!inst);
  2503. *inst++ = 0;
  2504. *inst++ = 1;
  2505. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2506. if (dst & SLJIT_MEM)
  2507. if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
  2508. return NULL;
  2509. #endif
  2510. return const_;
  2511. }
  2512. SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
  2513. {
  2514. SLJIT_UNUSED_ARG(executable_offset);
  2515. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2516. sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
  2517. #else
  2518. sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
  2519. #endif
  2520. }
  2521. SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
  2522. {
  2523. SLJIT_UNUSED_ARG(executable_offset);
  2524. sljit_unaligned_store_sw((void*)addr, new_constant);
  2525. }