sljitNativeX86_common.c 84 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004
  1. /*
  2. * Stack-less Just-In-Time compiler
  3. *
  4. * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without modification, are
  7. * permitted provided that the following conditions are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright notice, this list of
  10. * conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright notice, this list
  13. * of conditions and the following disclaimer in the documentation and/or other materials
  14. * provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
  17. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  19. * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  21. * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  22. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  24. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
  27. {
  28. return "x86" SLJIT_CPUINFO;
  29. }
  30. /*
  31. 32b register indexes:
  32. 0 - EAX
  33. 1 - ECX
  34. 2 - EDX
  35. 3 - EBX
  36. 4 - none
  37. 5 - EBP
  38. 6 - ESI
  39. 7 - EDI
  40. */
  41. /*
  42. 64b register indexes:
  43. 0 - RAX
  44. 1 - RCX
  45. 2 - RDX
  46. 3 - RBX
  47. 4 - none
  48. 5 - RBP
  49. 6 - RSI
  50. 7 - RDI
  51. 8 - R8 - From now on REX prefix is required
  52. 9 - R9
  53. 10 - R10
  54. 11 - R11
  55. 12 - R12
  56. 13 - R13
  57. 14 - R14
  58. 15 - R15
  59. */
  60. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  61. /* Last register + 1. */
  62. #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
  63. static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
  64. 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
  65. };
  66. #define CHECK_EXTRA_REGS(p, w, do) \
  67. if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
  68. w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
  69. p = SLJIT_MEM1(SLJIT_SP); \
  70. do; \
  71. }
  72. #else /* SLJIT_CONFIG_X86_32 */
  73. /* Last register + 1. */
  74. #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
  75. #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
  76. #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
  77. /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
  78. Note: avoid to use r12 and r13 for memory addessing
  79. therefore r12 is better for SAVED_EREG than SAVED_REG. */
  80. #ifndef _WIN64
  81. /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
  82. static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
  83. 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
  84. };
  85. /* low-map. reg_map & 0x7. */
  86. static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
  87. 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
  88. };
  89. #else
  90. /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
  91. static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
  92. 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
  93. };
  94. /* low-map. reg_map & 0x7. */
  95. static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
  96. 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
  97. };
  98. #endif
  99. #define REX_W 0x48
  100. #define REX_R 0x44
  101. #define REX_X 0x42
  102. #define REX_B 0x41
  103. #define REX 0x40
  104. #ifndef _WIN64
  105. #define HALFWORD_MAX 0x7fffffffl
  106. #define HALFWORD_MIN -0x80000000l
  107. #else
  108. #define HALFWORD_MAX 0x7fffffffll
  109. #define HALFWORD_MIN -0x80000000ll
  110. #endif
  111. #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
  112. #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
  113. #define CHECK_EXTRA_REGS(p, w, do)
  114. #endif /* SLJIT_CONFIG_X86_32 */
  115. #define TMP_FREG (0)
  116. /* Size flags for emit_x86_instruction: */
  117. #define EX86_BIN_INS 0x0010
  118. #define EX86_SHIFT_INS 0x0020
  119. #define EX86_REX 0x0040
  120. #define EX86_NO_REXW 0x0080
  121. #define EX86_BYTE_ARG 0x0100
  122. #define EX86_HALF_ARG 0x0200
  123. #define EX86_PREF_66 0x0400
  124. #define EX86_PREF_F2 0x0800
  125. #define EX86_PREF_F3 0x1000
  126. #define EX86_SSE2_OP1 0x2000
  127. #define EX86_SSE2_OP2 0x4000
  128. #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
  129. /* --------------------------------------------------------------------- */
  130. /* Instrucion forms */
  131. /* --------------------------------------------------------------------- */
  132. #define ADD (/* BINARY */ 0 << 3)
  133. #define ADD_EAX_i32 0x05
  134. #define ADD_r_rm 0x03
  135. #define ADD_rm_r 0x01
  136. #define ADDSD_x_xm 0x58
  137. #define ADC (/* BINARY */ 2 << 3)
  138. #define ADC_EAX_i32 0x15
  139. #define ADC_r_rm 0x13
  140. #define ADC_rm_r 0x11
  141. #define AND (/* BINARY */ 4 << 3)
  142. #define AND_EAX_i32 0x25
  143. #define AND_r_rm 0x23
  144. #define AND_rm_r 0x21
  145. #define ANDPD_x_xm 0x54
  146. #define BSR_r_rm (/* GROUP_0F */ 0xbd)
  147. #define CALL_i32 0xe8
  148. #define CALL_rm (/* GROUP_FF */ 2 << 3)
  149. #define CDQ 0x99
  150. #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
  151. #define CMP (/* BINARY */ 7 << 3)
  152. #define CMP_EAX_i32 0x3d
  153. #define CMP_r_rm 0x3b
  154. #define CMP_rm_r 0x39
  155. #define CVTPD2PS_x_xm 0x5a
  156. #define CVTSI2SD_x_rm 0x2a
  157. #define CVTTSD2SI_r_xm 0x2c
  158. #define DIV (/* GROUP_F7 */ 6 << 3)
  159. #define DIVSD_x_xm 0x5e
  160. #define INT3 0xcc
  161. #define IDIV (/* GROUP_F7 */ 7 << 3)
  162. #define IMUL (/* GROUP_F7 */ 5 << 3)
  163. #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
  164. #define IMUL_r_rm_i8 0x6b
  165. #define IMUL_r_rm_i32 0x69
  166. #define JE_i8 0x74
  167. #define JNE_i8 0x75
  168. #define JMP_i8 0xeb
  169. #define JMP_i32 0xe9
  170. #define JMP_rm (/* GROUP_FF */ 4 << 3)
  171. #define LEA_r_m 0x8d
  172. #define MOV_r_rm 0x8b
  173. #define MOV_r_i32 0xb8
  174. #define MOV_rm_r 0x89
  175. #define MOV_rm_i32 0xc7
  176. #define MOV_rm8_i8 0xc6
  177. #define MOV_rm8_r8 0x88
  178. #define MOVSD_x_xm 0x10
  179. #define MOVSD_xm_x 0x11
  180. #define MOVSXD_r_rm 0x63
  181. #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
  182. #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
  183. #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
  184. #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
  185. #define MUL (/* GROUP_F7 */ 4 << 3)
  186. #define MULSD_x_xm 0x59
  187. #define NEG_rm (/* GROUP_F7 */ 3 << 3)
  188. #define NOP 0x90
  189. #define NOT_rm (/* GROUP_F7 */ 2 << 3)
  190. #define OR (/* BINARY */ 1 << 3)
  191. #define OR_r_rm 0x0b
  192. #define OR_EAX_i32 0x0d
  193. #define OR_rm_r 0x09
  194. #define OR_rm8_r8 0x08
  195. #define POP_r 0x58
  196. #define POP_rm 0x8f
  197. #define POPF 0x9d
  198. #define PUSH_i32 0x68
  199. #define PUSH_r 0x50
  200. #define PUSH_rm (/* GROUP_FF */ 6 << 3)
  201. #define PUSHF 0x9c
  202. #define RET_near 0xc3
  203. #define RET_i16 0xc2
  204. #define SBB (/* BINARY */ 3 << 3)
  205. #define SBB_EAX_i32 0x1d
  206. #define SBB_r_rm 0x1b
  207. #define SBB_rm_r 0x19
  208. #define SAR (/* SHIFT */ 7 << 3)
  209. #define SHL (/* SHIFT */ 4 << 3)
  210. #define SHR (/* SHIFT */ 5 << 3)
  211. #define SUB (/* BINARY */ 5 << 3)
  212. #define SUB_EAX_i32 0x2d
  213. #define SUB_r_rm 0x2b
  214. #define SUB_rm_r 0x29
  215. #define SUBSD_x_xm 0x5c
  216. #define TEST_EAX_i32 0xa9
  217. #define TEST_rm_r 0x85
  218. #define UCOMISD_x_xm 0x2e
  219. #define UNPCKLPD_x_xm 0x14
  220. #define XCHG_EAX_r 0x90
  221. #define XCHG_r_rm 0x87
  222. #define XOR (/* BINARY */ 6 << 3)
  223. #define XOR_EAX_i32 0x35
  224. #define XOR_r_rm 0x33
  225. #define XOR_rm_r 0x31
  226. #define XORPD_x_xm 0x57
  227. #define GROUP_0F 0x0f
  228. #define GROUP_F7 0xf7
  229. #define GROUP_FF 0xff
  230. #define GROUP_BINARY_81 0x81
  231. #define GROUP_BINARY_83 0x83
  232. #define GROUP_SHIFT_1 0xd1
  233. #define GROUP_SHIFT_N 0xc1
  234. #define GROUP_SHIFT_CL 0xd3
  235. #define MOD_REG 0xc0
  236. #define MOD_DISP8 0x40
  237. #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
  238. #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
  239. #define POP_REG(r) (*inst++ = (POP_r + (r)))
  240. #define RET() (*inst++ = (RET_near))
  241. #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
  242. /* r32, r/m32 */
  243. #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
  244. /* Multithreading does not affect these static variables, since they store
  245. built-in CPU features. Therefore they can be overwritten by different threads
  246. if they detect the CPU features in the same time. */
  247. #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  248. static sljit_si cpu_has_sse2 = -1;
  249. #endif
  250. static sljit_si cpu_has_cmov = -1;
  251. #ifdef _WIN32_WCE
  252. #include <cmnintrin.h>
  253. #elif defined(_MSC_VER) && _MSC_VER >= 1400
  254. #include <intrin.h>
  255. #endif
  256. static void get_cpu_features(void)
  257. {
  258. sljit_ui features;
  259. #if defined(_MSC_VER) && _MSC_VER >= 1400
  260. int CPUInfo[4];
  261. __cpuid(CPUInfo, 1);
  262. features = (sljit_ui)CPUInfo[3];
  263. #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
  264. /* AT&T syntax. */
  265. __asm__ (
  266. "movl $0x1, %%eax\n"
  267. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  268. /* On x86-32, there is no red zone, so this
  269. should work (no need for a local variable). */
  270. "push %%ebx\n"
  271. #endif
  272. "cpuid\n"
  273. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  274. "pop %%ebx\n"
  275. #endif
  276. "movl %%edx, %0\n"
  277. : "=g" (features)
  278. :
  279. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  280. : "%eax", "%ecx", "%edx"
  281. #else
  282. : "%rax", "%rbx", "%rcx", "%rdx"
  283. #endif
  284. );
  285. #else /* _MSC_VER && _MSC_VER >= 1400 */
  286. /* Intel syntax. */
  287. __asm {
  288. mov eax, 1
  289. cpuid
  290. mov features, edx
  291. }
  292. #endif /* _MSC_VER && _MSC_VER >= 1400 */
  293. #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  294. cpu_has_sse2 = (features >> 26) & 0x1;
  295. #endif
  296. cpu_has_cmov = (features >> 15) & 0x1;
  297. }
  298. static sljit_ub get_jump_code(sljit_si type)
  299. {
  300. switch (type) {
  301. case SLJIT_EQUAL:
  302. case SLJIT_D_EQUAL:
  303. return 0x84 /* je */;
  304. case SLJIT_NOT_EQUAL:
  305. case SLJIT_D_NOT_EQUAL:
  306. return 0x85 /* jne */;
  307. case SLJIT_LESS:
  308. case SLJIT_D_LESS:
  309. return 0x82 /* jc */;
  310. case SLJIT_GREATER_EQUAL:
  311. case SLJIT_D_GREATER_EQUAL:
  312. return 0x83 /* jae */;
  313. case SLJIT_GREATER:
  314. case SLJIT_D_GREATER:
  315. return 0x87 /* jnbe */;
  316. case SLJIT_LESS_EQUAL:
  317. case SLJIT_D_LESS_EQUAL:
  318. return 0x86 /* jbe */;
  319. case SLJIT_SIG_LESS:
  320. return 0x8c /* jl */;
  321. case SLJIT_SIG_GREATER_EQUAL:
  322. return 0x8d /* jnl */;
  323. case SLJIT_SIG_GREATER:
  324. return 0x8f /* jnle */;
  325. case SLJIT_SIG_LESS_EQUAL:
  326. return 0x8e /* jle */;
  327. case SLJIT_OVERFLOW:
  328. case SLJIT_MUL_OVERFLOW:
  329. return 0x80 /* jo */;
  330. case SLJIT_NOT_OVERFLOW:
  331. case SLJIT_MUL_NOT_OVERFLOW:
  332. return 0x81 /* jno */;
  333. case SLJIT_D_UNORDERED:
  334. return 0x8a /* jp */;
  335. case SLJIT_D_ORDERED:
  336. return 0x8b /* jpo */;
  337. }
  338. return 0;
  339. }
  340. static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
  341. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  342. static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
  343. #endif
  344. static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
  345. {
  346. sljit_si short_jump;
  347. sljit_uw label_addr;
  348. if (jump->flags & JUMP_LABEL)
  349. label_addr = (sljit_uw)(code + jump->u.label->size);
  350. else
  351. label_addr = jump->u.target;
  352. short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
  353. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  354. if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
  355. return generate_far_jump_code(jump, code_ptr, type);
  356. #endif
  357. if (type == SLJIT_JUMP) {
  358. if (short_jump)
  359. *code_ptr++ = JMP_i8;
  360. else
  361. *code_ptr++ = JMP_i32;
  362. jump->addr++;
  363. }
  364. else if (type >= SLJIT_FAST_CALL) {
  365. short_jump = 0;
  366. *code_ptr++ = CALL_i32;
  367. jump->addr++;
  368. }
  369. else if (short_jump) {
  370. *code_ptr++ = get_jump_code(type) - 0x10;
  371. jump->addr++;
  372. }
  373. else {
  374. *code_ptr++ = GROUP_0F;
  375. *code_ptr++ = get_jump_code(type);
  376. jump->addr += 2;
  377. }
  378. if (short_jump) {
  379. jump->flags |= PATCH_MB;
  380. code_ptr += sizeof(sljit_sb);
  381. } else {
  382. jump->flags |= PATCH_MW;
  383. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  384. code_ptr += sizeof(sljit_sw);
  385. #else
  386. code_ptr += sizeof(sljit_si);
  387. #endif
  388. }
  389. return code_ptr;
  390. }
  391. SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  392. {
  393. struct sljit_memory_fragment *buf;
  394. sljit_ub *code;
  395. sljit_ub *code_ptr;
  396. sljit_ub *buf_ptr;
  397. sljit_ub *buf_end;
  398. sljit_ub len;
  399. struct sljit_label *label;
  400. struct sljit_jump *jump;
  401. struct sljit_const *const_;
  402. CHECK_ERROR_PTR();
  403. CHECK_PTR(check_sljit_generate_code(compiler));
  404. reverse_buf(compiler);
  405. /* Second code generation pass. */
  406. code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
  407. PTR_FAIL_WITH_EXEC_IF(code);
  408. buf = compiler->buf;
  409. code_ptr = code;
  410. label = compiler->labels;
  411. jump = compiler->jumps;
  412. const_ = compiler->consts;
  413. do {
  414. buf_ptr = buf->memory;
  415. buf_end = buf_ptr + buf->used_size;
  416. do {
  417. len = *buf_ptr++;
  418. if (len > 0) {
  419. /* The code is already generated. */
  420. SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
  421. code_ptr += len;
  422. buf_ptr += len;
  423. }
  424. else {
  425. if (*buf_ptr >= 4) {
  426. jump->addr = (sljit_uw)code_ptr;
  427. if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
  428. code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
  429. else
  430. code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
  431. jump = jump->next;
  432. }
  433. else if (*buf_ptr == 0) {
  434. label->addr = (sljit_uw)code_ptr;
  435. label->size = code_ptr - code;
  436. label = label->next;
  437. }
  438. else if (*buf_ptr == 1) {
  439. const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
  440. const_ = const_->next;
  441. }
  442. else {
  443. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  444. *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
  445. buf_ptr++;
  446. *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
  447. code_ptr += sizeof(sljit_sw);
  448. buf_ptr += sizeof(sljit_sw) - 1;
  449. #else
  450. code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
  451. buf_ptr += sizeof(sljit_sw);
  452. #endif
  453. }
  454. buf_ptr++;
  455. }
  456. } while (buf_ptr < buf_end);
  457. SLJIT_ASSERT(buf_ptr == buf_end);
  458. buf = buf->next;
  459. } while (buf);
  460. SLJIT_ASSERT(!label);
  461. SLJIT_ASSERT(!jump);
  462. SLJIT_ASSERT(!const_);
  463. jump = compiler->jumps;
  464. while (jump) {
  465. if (jump->flags & PATCH_MB) {
  466. SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
  467. *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
  468. } else if (jump->flags & PATCH_MW) {
  469. if (jump->flags & JUMP_LABEL) {
  470. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  471. *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
  472. #else
  473. SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
  474. *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
  475. #endif
  476. }
  477. else {
  478. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  479. *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
  480. #else
  481. SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
  482. *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
  483. #endif
  484. }
  485. }
  486. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  487. else if (jump->flags & PATCH_MD)
  488. *(sljit_sw*)jump->addr = jump->u.label->addr;
  489. #endif
  490. jump = jump->next;
  491. }
  492. /* Maybe we waste some space because of short jumps. */
  493. SLJIT_ASSERT(code_ptr <= code + compiler->size);
  494. compiler->error = SLJIT_ERR_COMPILED;
  495. compiler->executable_size = code_ptr - code;
  496. return (void*)code;
  497. }
  498. /* --------------------------------------------------------------------- */
  499. /* Operators */
  500. /* --------------------------------------------------------------------- */
  501. static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
  502. sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
  503. sljit_si dst, sljit_sw dstw,
  504. sljit_si src1, sljit_sw src1w,
  505. sljit_si src2, sljit_sw src2w);
  506. static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
  507. sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
  508. sljit_si dst, sljit_sw dstw,
  509. sljit_si src1, sljit_sw src1w,
  510. sljit_si src2, sljit_sw src2w);
  511. static sljit_si emit_mov(struct sljit_compiler *compiler,
  512. sljit_si dst, sljit_sw dstw,
  513. sljit_si src, sljit_sw srcw);
  514. static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
  515. {
  516. sljit_ub *inst;
  517. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  518. inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  519. FAIL_IF(!inst);
  520. INC_SIZE(5);
  521. #else
  522. inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
  523. FAIL_IF(!inst);
  524. INC_SIZE(6);
  525. *inst++ = REX_W;
  526. #endif
  527. *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
  528. *inst++ = 0x64;
  529. *inst++ = 0x24;
  530. *inst++ = (sljit_ub)sizeof(sljit_sw);
  531. *inst++ = PUSHF;
  532. compiler->flags_saved = 1;
  533. return SLJIT_SUCCESS;
  534. }
  535. static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
  536. {
  537. sljit_ub *inst;
  538. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  539. inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  540. FAIL_IF(!inst);
  541. INC_SIZE(5);
  542. *inst++ = POPF;
  543. #else
  544. inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
  545. FAIL_IF(!inst);
  546. INC_SIZE(6);
  547. *inst++ = POPF;
  548. *inst++ = REX_W;
  549. #endif
  550. *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
  551. *inst++ = 0x64;
  552. *inst++ = 0x24;
  553. *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
  554. compiler->flags_saved = keep_flags;
  555. return SLJIT_SUCCESS;
  556. }
  557. #ifdef _WIN32
  558. #include <malloc.h>
  559. static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
  560. {
  561. /* Workaround for calling the internal _chkstk() function on Windows.
  562. This function touches all 4k pages belongs to the requested stack space,
  563. which size is passed in local_size. This is necessary on Windows where
  564. the stack can only grow in 4k steps. However, this function just burn
  565. CPU cycles if the stack is large enough. However, you don't know it in
  566. advance, so it must always be called. I think this is a bad design in
  567. general even if it has some reasons. */
  568. *(volatile sljit_si*)alloca(local_size) = 0;
  569. }
  570. #endif
  571. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  572. #include "sljitNativeX86_32.c"
  573. #else
  574. #include "sljitNativeX86_64.c"
  575. #endif
  576. static sljit_si emit_mov(struct sljit_compiler *compiler,
  577. sljit_si dst, sljit_sw dstw,
  578. sljit_si src, sljit_sw srcw)
  579. {
  580. sljit_ub* inst;
  581. if (dst == SLJIT_UNUSED) {
  582. /* No destination, doesn't need to setup flags. */
  583. if (src & SLJIT_MEM) {
  584. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
  585. FAIL_IF(!inst);
  586. *inst = MOV_r_rm;
  587. }
  588. return SLJIT_SUCCESS;
  589. }
  590. if (FAST_IS_REG(src)) {
  591. inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
  592. FAIL_IF(!inst);
  593. *inst = MOV_rm_r;
  594. return SLJIT_SUCCESS;
  595. }
  596. if (src & SLJIT_IMM) {
  597. if (FAST_IS_REG(dst)) {
  598. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  599. return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  600. #else
  601. if (!compiler->mode32) {
  602. if (NOT_HALFWORD(srcw))
  603. return emit_load_imm64(compiler, dst, srcw);
  604. }
  605. else
  606. return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
  607. #endif
  608. }
  609. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  610. if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
  611. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
  612. inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
  613. FAIL_IF(!inst);
  614. *inst = MOV_rm_r;
  615. return SLJIT_SUCCESS;
  616. }
  617. #endif
  618. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
  619. FAIL_IF(!inst);
  620. *inst = MOV_rm_i32;
  621. return SLJIT_SUCCESS;
  622. }
  623. if (FAST_IS_REG(dst)) {
  624. inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
  625. FAIL_IF(!inst);
  626. *inst = MOV_r_rm;
  627. return SLJIT_SUCCESS;
  628. }
  629. /* Memory to memory move. Requires two instruction. */
  630. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
  631. FAIL_IF(!inst);
  632. *inst = MOV_r_rm;
  633. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  634. FAIL_IF(!inst);
  635. *inst = MOV_rm_r;
  636. return SLJIT_SUCCESS;
  637. }
  638. #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
  639. FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  640. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
  641. {
  642. sljit_ub *inst;
  643. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  644. sljit_si size;
  645. #endif
  646. CHECK_ERROR();
  647. CHECK(check_sljit_emit_op0(compiler, op));
  648. switch (GET_OPCODE(op)) {
  649. case SLJIT_BREAKPOINT:
  650. inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  651. FAIL_IF(!inst);
  652. INC_SIZE(1);
  653. *inst = INT3;
  654. break;
  655. case SLJIT_NOP:
  656. inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  657. FAIL_IF(!inst);
  658. INC_SIZE(1);
  659. *inst = NOP;
  660. break;
  661. case SLJIT_LUMUL:
  662. case SLJIT_LSMUL:
  663. case SLJIT_UDIVMOD:
  664. case SLJIT_SDIVMOD:
  665. case SLJIT_UDIVI:
  666. case SLJIT_SDIVI:
  667. compiler->flags_saved = 0;
  668. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  669. #ifdef _WIN64
  670. SLJIT_COMPILE_ASSERT(
  671. reg_map[SLJIT_R0] == 0
  672. && reg_map[SLJIT_R1] == 2
  673. && reg_map[TMP_REG1] > 7,
  674. invalid_register_assignment_for_div_mul);
  675. #else
  676. SLJIT_COMPILE_ASSERT(
  677. reg_map[SLJIT_R0] == 0
  678. && reg_map[SLJIT_R1] < 7
  679. && reg_map[TMP_REG1] == 2,
  680. invalid_register_assignment_for_div_mul);
  681. #endif
  682. compiler->mode32 = op & SLJIT_INT_OP;
  683. #endif
  684. SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
  685. op = GET_OPCODE(op);
  686. if ((op | 0x2) == SLJIT_UDIVI) {
  687. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  688. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
  689. inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
  690. #else
  691. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
  692. #endif
  693. FAIL_IF(!inst);
  694. *inst = XOR_r_rm;
  695. }
  696. if ((op | 0x2) == SLJIT_SDIVI) {
  697. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  698. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
  699. #endif
  700. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  701. inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  702. FAIL_IF(!inst);
  703. INC_SIZE(1);
  704. *inst = CDQ;
  705. #else
  706. if (compiler->mode32) {
  707. inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  708. FAIL_IF(!inst);
  709. INC_SIZE(1);
  710. *inst = CDQ;
  711. } else {
  712. inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
  713. FAIL_IF(!inst);
  714. INC_SIZE(2);
  715. *inst++ = REX_W;
  716. *inst = CDQ;
  717. }
  718. #endif
  719. }
  720. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  721. inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
  722. FAIL_IF(!inst);
  723. INC_SIZE(2);
  724. *inst++ = GROUP_F7;
  725. *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
  726. #else
  727. #ifdef _WIN64
  728. size = (!compiler->mode32 || op >= SLJIT_UDIVMOD) ? 3 : 2;
  729. #else
  730. size = (!compiler->mode32) ? 3 : 2;
  731. #endif
  732. inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
  733. FAIL_IF(!inst);
  734. INC_SIZE(size);
  735. #ifdef _WIN64
  736. if (!compiler->mode32)
  737. *inst++ = REX_W | ((op >= SLJIT_UDIVMOD) ? REX_B : 0);
  738. else if (op >= SLJIT_UDIVMOD)
  739. *inst++ = REX_B;
  740. *inst++ = GROUP_F7;
  741. *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
  742. #else
  743. if (!compiler->mode32)
  744. *inst++ = REX_W;
  745. *inst++ = GROUP_F7;
  746. *inst = MOD_REG | reg_map[SLJIT_R1];
  747. #endif
  748. #endif
  749. switch (op) {
  750. case SLJIT_LUMUL:
  751. *inst |= MUL;
  752. break;
  753. case SLJIT_LSMUL:
  754. *inst |= IMUL;
  755. break;
  756. case SLJIT_UDIVMOD:
  757. case SLJIT_UDIVI:
  758. *inst |= DIV;
  759. break;
  760. case SLJIT_SDIVMOD:
  761. case SLJIT_SDIVI:
  762. *inst |= IDIV;
  763. break;
  764. }
  765. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
  766. if (op <= SLJIT_SDIVMOD)
  767. EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
  768. #else
  769. if (op >= SLJIT_UDIVI)
  770. EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
  771. #endif
  772. break;
  773. }
  774. return SLJIT_SUCCESS;
  775. }
  776. #define ENCODE_PREFIX(prefix) \
  777. do { \
  778. inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
  779. FAIL_IF(!inst); \
  780. INC_SIZE(1); \
  781. *inst = (prefix); \
  782. } while (0)
  783. static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
  784. sljit_si dst, sljit_sw dstw,
  785. sljit_si src, sljit_sw srcw)
  786. {
  787. sljit_ub* inst;
  788. sljit_si dst_r;
  789. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  790. sljit_si work_r;
  791. #endif
  792. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  793. compiler->mode32 = 0;
  794. #endif
  795. if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  796. return SLJIT_SUCCESS; /* Empty instruction. */
  797. if (src & SLJIT_IMM) {
  798. if (FAST_IS_REG(dst)) {
  799. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  800. return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  801. #else
  802. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
  803. FAIL_IF(!inst);
  804. *inst = MOV_rm_i32;
  805. return SLJIT_SUCCESS;
  806. #endif
  807. }
  808. inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
  809. FAIL_IF(!inst);
  810. *inst = MOV_rm8_i8;
  811. return SLJIT_SUCCESS;
  812. }
  813. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  814. if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
  815. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  816. if (reg_map[src] >= 4) {
  817. SLJIT_ASSERT(dst_r == TMP_REG1);
  818. EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
  819. } else
  820. dst_r = src;
  821. #else
  822. dst_r = src;
  823. #endif
  824. }
  825. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  826. else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
  827. /* src, dst are registers. */
  828. SLJIT_ASSERT(SLOW_IS_REG(dst));
  829. if (reg_map[dst] < 4) {
  830. if (dst != src)
  831. EMIT_MOV(compiler, dst, 0, src, 0);
  832. inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
  833. FAIL_IF(!inst);
  834. *inst++ = GROUP_0F;
  835. *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
  836. }
  837. else {
  838. if (dst != src)
  839. EMIT_MOV(compiler, dst, 0, src, 0);
  840. if (sign) {
  841. /* shl reg, 24 */
  842. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  843. FAIL_IF(!inst);
  844. *inst |= SHL;
  845. /* sar reg, 24 */
  846. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  847. FAIL_IF(!inst);
  848. *inst |= SAR;
  849. }
  850. else {
  851. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
  852. FAIL_IF(!inst);
  853. *(inst + 1) |= AND;
  854. }
  855. }
  856. return SLJIT_SUCCESS;
  857. }
  858. #endif
  859. else {
  860. /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
  861. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  862. FAIL_IF(!inst);
  863. *inst++ = GROUP_0F;
  864. *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
  865. }
  866. if (dst & SLJIT_MEM) {
  867. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  868. if (dst_r == TMP_REG1) {
  869. /* Find a non-used register, whose reg_map[src] < 4. */
  870. if ((dst & REG_MASK) == SLJIT_R0) {
  871. if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
  872. work_r = SLJIT_R2;
  873. else
  874. work_r = SLJIT_R1;
  875. }
  876. else {
  877. if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
  878. work_r = SLJIT_R0;
  879. else if ((dst & REG_MASK) == SLJIT_R1)
  880. work_r = SLJIT_R2;
  881. else
  882. work_r = SLJIT_R1;
  883. }
  884. if (work_r == SLJIT_R0) {
  885. ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
  886. }
  887. else {
  888. inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  889. FAIL_IF(!inst);
  890. *inst = XCHG_r_rm;
  891. }
  892. inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
  893. FAIL_IF(!inst);
  894. *inst = MOV_rm8_r8;
  895. if (work_r == SLJIT_R0) {
  896. ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
  897. }
  898. else {
  899. inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  900. FAIL_IF(!inst);
  901. *inst = XCHG_r_rm;
  902. }
  903. }
  904. else {
  905. inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  906. FAIL_IF(!inst);
  907. *inst = MOV_rm8_r8;
  908. }
  909. #else
  910. inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
  911. FAIL_IF(!inst);
  912. *inst = MOV_rm8_r8;
  913. #endif
  914. }
  915. return SLJIT_SUCCESS;
  916. }
  917. static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
  918. sljit_si dst, sljit_sw dstw,
  919. sljit_si src, sljit_sw srcw)
  920. {
  921. sljit_ub* inst;
  922. sljit_si dst_r;
  923. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  924. compiler->mode32 = 0;
  925. #endif
  926. if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  927. return SLJIT_SUCCESS; /* Empty instruction. */
  928. if (src & SLJIT_IMM) {
  929. if (FAST_IS_REG(dst)) {
  930. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  931. return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  932. #else
  933. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
  934. FAIL_IF(!inst);
  935. *inst = MOV_rm_i32;
  936. return SLJIT_SUCCESS;
  937. #endif
  938. }
  939. inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
  940. FAIL_IF(!inst);
  941. *inst = MOV_rm_i32;
  942. return SLJIT_SUCCESS;
  943. }
  944. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  945. if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
  946. dst_r = src;
  947. else {
  948. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  949. FAIL_IF(!inst);
  950. *inst++ = GROUP_0F;
  951. *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
  952. }
  953. if (dst & SLJIT_MEM) {
  954. inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
  955. FAIL_IF(!inst);
  956. *inst = MOV_rm_r;
  957. }
  958. return SLJIT_SUCCESS;
  959. }
  960. static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
  961. sljit_si dst, sljit_sw dstw,
  962. sljit_si src, sljit_sw srcw)
  963. {
  964. sljit_ub* inst;
  965. if (dst == SLJIT_UNUSED) {
  966. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  967. inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
  968. FAIL_IF(!inst);
  969. *inst++ = GROUP_F7;
  970. *inst |= opcode;
  971. return SLJIT_SUCCESS;
  972. }
  973. if (dst == src && dstw == srcw) {
  974. /* Same input and output */
  975. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  976. FAIL_IF(!inst);
  977. *inst++ = GROUP_F7;
  978. *inst |= opcode;
  979. return SLJIT_SUCCESS;
  980. }
  981. if (FAST_IS_REG(dst)) {
  982. EMIT_MOV(compiler, dst, 0, src, srcw);
  983. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  984. FAIL_IF(!inst);
  985. *inst++ = GROUP_F7;
  986. *inst |= opcode;
  987. return SLJIT_SUCCESS;
  988. }
  989. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  990. inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
  991. FAIL_IF(!inst);
  992. *inst++ = GROUP_F7;
  993. *inst |= opcode;
  994. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  995. return SLJIT_SUCCESS;
  996. }
  997. static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
  998. sljit_si dst, sljit_sw dstw,
  999. sljit_si src, sljit_sw srcw)
  1000. {
  1001. sljit_ub* inst;
  1002. if (dst == SLJIT_UNUSED) {
  1003. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  1004. inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
  1005. FAIL_IF(!inst);
  1006. *inst++ = GROUP_F7;
  1007. *inst |= NOT_rm;
  1008. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
  1009. FAIL_IF(!inst);
  1010. *inst = OR_r_rm;
  1011. return SLJIT_SUCCESS;
  1012. }
  1013. if (FAST_IS_REG(dst)) {
  1014. EMIT_MOV(compiler, dst, 0, src, srcw);
  1015. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  1016. FAIL_IF(!inst);
  1017. *inst++ = GROUP_F7;
  1018. *inst |= NOT_rm;
  1019. inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
  1020. FAIL_IF(!inst);
  1021. *inst = OR_r_rm;
  1022. return SLJIT_SUCCESS;
  1023. }
  1024. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  1025. inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
  1026. FAIL_IF(!inst);
  1027. *inst++ = GROUP_F7;
  1028. *inst |= NOT_rm;
  1029. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
  1030. FAIL_IF(!inst);
  1031. *inst = OR_r_rm;
  1032. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1033. return SLJIT_SUCCESS;
  1034. }
  1035. static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
  1036. sljit_si dst, sljit_sw dstw,
  1037. sljit_si src, sljit_sw srcw)
  1038. {
  1039. sljit_ub* inst;
  1040. sljit_si dst_r;
  1041. SLJIT_UNUSED_ARG(op_flags);
  1042. if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
  1043. /* Just set the zero flag. */
  1044. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  1045. inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
  1046. FAIL_IF(!inst);
  1047. *inst++ = GROUP_F7;
  1048. *inst |= NOT_rm;
  1049. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1050. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
  1051. #else
  1052. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
  1053. #endif
  1054. FAIL_IF(!inst);
  1055. *inst |= SHR;
  1056. return SLJIT_SUCCESS;
  1057. }
  1058. if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
  1059. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
  1060. src = TMP_REG1;
  1061. srcw = 0;
  1062. }
  1063. inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
  1064. FAIL_IF(!inst);
  1065. *inst++ = GROUP_0F;
  1066. *inst = BSR_r_rm;
  1067. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1068. if (FAST_IS_REG(dst))
  1069. dst_r = dst;
  1070. else {
  1071. /* Find an unused temporary register. */
  1072. if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
  1073. dst_r = SLJIT_R0;
  1074. else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
  1075. dst_r = SLJIT_R1;
  1076. else
  1077. dst_r = SLJIT_R2;
  1078. EMIT_MOV(compiler, dst, dstw, dst_r, 0);
  1079. }
  1080. EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
  1081. #else
  1082. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
  1083. compiler->mode32 = 0;
  1084. EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
  1085. compiler->mode32 = op_flags & SLJIT_INT_OP;
  1086. #endif
  1087. if (cpu_has_cmov == -1)
  1088. get_cpu_features();
  1089. if (cpu_has_cmov) {
  1090. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
  1091. FAIL_IF(!inst);
  1092. *inst++ = GROUP_0F;
  1093. *inst = CMOVNE_r_rm;
  1094. } else {
  1095. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1096. inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  1097. FAIL_IF(!inst);
  1098. INC_SIZE(4);
  1099. *inst++ = JE_i8;
  1100. *inst++ = 2;
  1101. *inst++ = MOV_r_rm;
  1102. *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
  1103. #else
  1104. inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  1105. FAIL_IF(!inst);
  1106. INC_SIZE(5);
  1107. *inst++ = JE_i8;
  1108. *inst++ = 3;
  1109. *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
  1110. *inst++ = MOV_r_rm;
  1111. *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
  1112. #endif
  1113. }
  1114. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1115. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
  1116. #else
  1117. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
  1118. #endif
  1119. FAIL_IF(!inst);
  1120. *(inst + 1) |= XOR;
  1121. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1122. if (dst & SLJIT_MEM) {
  1123. inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  1124. FAIL_IF(!inst);
  1125. *inst = XCHG_r_rm;
  1126. }
  1127. #else
  1128. if (dst & SLJIT_MEM)
  1129. EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
  1130. #endif
  1131. return SLJIT_SUCCESS;
  1132. }
  1133. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
  1134. sljit_si dst, sljit_sw dstw,
  1135. sljit_si src, sljit_sw srcw)
  1136. {
  1137. sljit_ub* inst;
  1138. sljit_si update = 0;
  1139. sljit_si op_flags = GET_ALL_FLAGS(op);
  1140. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1141. sljit_si dst_is_ereg = 0;
  1142. sljit_si src_is_ereg = 0;
  1143. #else
  1144. # define src_is_ereg 0
  1145. #endif
  1146. CHECK_ERROR();
  1147. CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
  1148. ADJUST_LOCAL_OFFSET(dst, dstw);
  1149. ADJUST_LOCAL_OFFSET(src, srcw);
  1150. CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
  1151. CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
  1152. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1153. compiler->mode32 = op_flags & SLJIT_INT_OP;
  1154. #endif
  1155. op = GET_OPCODE(op);
  1156. if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
  1157. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1158. compiler->mode32 = 0;
  1159. #endif
  1160. if (op_flags & SLJIT_INT_OP) {
  1161. if (FAST_IS_REG(src) && src == dst) {
  1162. if (!TYPE_CAST_NEEDED(op))
  1163. return SLJIT_SUCCESS;
  1164. }
  1165. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1166. if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
  1167. op = SLJIT_MOV_UI;
  1168. if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
  1169. op = SLJIT_MOVU_UI;
  1170. if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
  1171. op = SLJIT_MOV_SI;
  1172. if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
  1173. op = SLJIT_MOVU_SI;
  1174. #endif
  1175. }
  1176. SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
  1177. if (op >= SLJIT_MOVU) {
  1178. update = 1;
  1179. op -= 8;
  1180. }
  1181. if (src & SLJIT_IMM) {
  1182. switch (op) {
  1183. case SLJIT_MOV_UB:
  1184. srcw = (sljit_ub)srcw;
  1185. break;
  1186. case SLJIT_MOV_SB:
  1187. srcw = (sljit_sb)srcw;
  1188. break;
  1189. case SLJIT_MOV_UH:
  1190. srcw = (sljit_uh)srcw;
  1191. break;
  1192. case SLJIT_MOV_SH:
  1193. srcw = (sljit_sh)srcw;
  1194. break;
  1195. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1196. case SLJIT_MOV_UI:
  1197. srcw = (sljit_ui)srcw;
  1198. break;
  1199. case SLJIT_MOV_SI:
  1200. srcw = (sljit_si)srcw;
  1201. break;
  1202. #endif
  1203. }
  1204. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1205. if (SLJIT_UNLIKELY(dst_is_ereg))
  1206. return emit_mov(compiler, dst, dstw, src, srcw);
  1207. #endif
  1208. }
  1209. if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
  1210. inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
  1211. FAIL_IF(!inst);
  1212. *inst = LEA_r_m;
  1213. src &= SLJIT_MEM | 0xf;
  1214. srcw = 0;
  1215. }
  1216. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1217. if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
  1218. SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
  1219. dst = TMP_REG1;
  1220. }
  1221. #endif
  1222. switch (op) {
  1223. case SLJIT_MOV:
  1224. case SLJIT_MOV_P:
  1225. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1226. case SLJIT_MOV_UI:
  1227. case SLJIT_MOV_SI:
  1228. #endif
  1229. FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  1230. break;
  1231. case SLJIT_MOV_UB:
  1232. FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
  1233. break;
  1234. case SLJIT_MOV_SB:
  1235. FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
  1236. break;
  1237. case SLJIT_MOV_UH:
  1238. FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
  1239. break;
  1240. case SLJIT_MOV_SH:
  1241. FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
  1242. break;
  1243. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1244. case SLJIT_MOV_UI:
  1245. FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
  1246. break;
  1247. case SLJIT_MOV_SI:
  1248. FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
  1249. break;
  1250. #endif
  1251. }
  1252. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1253. if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
  1254. return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
  1255. #endif
  1256. if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
  1257. inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
  1258. FAIL_IF(!inst);
  1259. *inst = LEA_r_m;
  1260. }
  1261. return SLJIT_SUCCESS;
  1262. }
  1263. if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
  1264. compiler->flags_saved = 0;
  1265. switch (op) {
  1266. case SLJIT_NOT:
  1267. if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
  1268. return emit_not_with_flags(compiler, dst, dstw, src, srcw);
  1269. return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
  1270. case SLJIT_NEG:
  1271. if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
  1272. FAIL_IF(emit_save_flags(compiler));
  1273. return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
  1274. case SLJIT_CLZ:
  1275. if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
  1276. FAIL_IF(emit_save_flags(compiler));
  1277. return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
  1278. }
  1279. return SLJIT_SUCCESS;
  1280. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1281. # undef src_is_ereg
  1282. #endif
  1283. }
  1284. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1285. #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
  1286. if (IS_HALFWORD(immw) || compiler->mode32) { \
  1287. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
  1288. FAIL_IF(!inst); \
  1289. *(inst + 1) |= (op_imm); \
  1290. } \
  1291. else { \
  1292. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
  1293. inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
  1294. FAIL_IF(!inst); \
  1295. *inst = (op_mr); \
  1296. }
  1297. #define BINARY_EAX_IMM(op_eax_imm, immw) \
  1298. FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
  1299. #else
  1300. #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
  1301. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
  1302. FAIL_IF(!inst); \
  1303. *(inst + 1) |= (op_imm);
  1304. #define BINARY_EAX_IMM(op_eax_imm, immw) \
  1305. FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
  1306. #endif
  1307. static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
  1308. sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
  1309. sljit_si dst, sljit_sw dstw,
  1310. sljit_si src1, sljit_sw src1w,
  1311. sljit_si src2, sljit_sw src2w)
  1312. {
  1313. sljit_ub* inst;
  1314. if (dst == SLJIT_UNUSED) {
  1315. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1316. if (src2 & SLJIT_IMM) {
  1317. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1318. }
  1319. else {
  1320. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1321. FAIL_IF(!inst);
  1322. *inst = op_rm;
  1323. }
  1324. return SLJIT_SUCCESS;
  1325. }
  1326. if (dst == src1 && dstw == src1w) {
  1327. if (src2 & SLJIT_IMM) {
  1328. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1329. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1330. #else
  1331. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
  1332. #endif
  1333. BINARY_EAX_IMM(op_eax_imm, src2w);
  1334. }
  1335. else {
  1336. BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
  1337. }
  1338. }
  1339. else if (FAST_IS_REG(dst)) {
  1340. inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
  1341. FAIL_IF(!inst);
  1342. *inst = op_rm;
  1343. }
  1344. else if (FAST_IS_REG(src2)) {
  1345. /* Special exception for sljit_emit_op_flags. */
  1346. inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
  1347. FAIL_IF(!inst);
  1348. *inst = op_mr;
  1349. }
  1350. else {
  1351. EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
  1352. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  1353. FAIL_IF(!inst);
  1354. *inst = op_mr;
  1355. }
  1356. return SLJIT_SUCCESS;
  1357. }
  1358. /* Only for cumulative operations. */
  1359. if (dst == src2 && dstw == src2w) {
  1360. if (src1 & SLJIT_IMM) {
  1361. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1362. if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
  1363. #else
  1364. if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
  1365. #endif
  1366. BINARY_EAX_IMM(op_eax_imm, src1w);
  1367. }
  1368. else {
  1369. BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
  1370. }
  1371. }
  1372. else if (FAST_IS_REG(dst)) {
  1373. inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
  1374. FAIL_IF(!inst);
  1375. *inst = op_rm;
  1376. }
  1377. else if (FAST_IS_REG(src1)) {
  1378. inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
  1379. FAIL_IF(!inst);
  1380. *inst = op_mr;
  1381. }
  1382. else {
  1383. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1384. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  1385. FAIL_IF(!inst);
  1386. *inst = op_mr;
  1387. }
  1388. return SLJIT_SUCCESS;
  1389. }
  1390. /* General version. */
  1391. if (FAST_IS_REG(dst)) {
  1392. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1393. if (src2 & SLJIT_IMM) {
  1394. BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
  1395. }
  1396. else {
  1397. inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
  1398. FAIL_IF(!inst);
  1399. *inst = op_rm;
  1400. }
  1401. }
  1402. else {
  1403. /* This version requires less memory writing. */
  1404. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1405. if (src2 & SLJIT_IMM) {
  1406. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1407. }
  1408. else {
  1409. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1410. FAIL_IF(!inst);
  1411. *inst = op_rm;
  1412. }
  1413. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1414. }
  1415. return SLJIT_SUCCESS;
  1416. }
  1417. static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
  1418. sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
  1419. sljit_si dst, sljit_sw dstw,
  1420. sljit_si src1, sljit_sw src1w,
  1421. sljit_si src2, sljit_sw src2w)
  1422. {
  1423. sljit_ub* inst;
  1424. if (dst == SLJIT_UNUSED) {
  1425. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1426. if (src2 & SLJIT_IMM) {
  1427. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1428. }
  1429. else {
  1430. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1431. FAIL_IF(!inst);
  1432. *inst = op_rm;
  1433. }
  1434. return SLJIT_SUCCESS;
  1435. }
  1436. if (dst == src1 && dstw == src1w) {
  1437. if (src2 & SLJIT_IMM) {
  1438. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1439. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1440. #else
  1441. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
  1442. #endif
  1443. BINARY_EAX_IMM(op_eax_imm, src2w);
  1444. }
  1445. else {
  1446. BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
  1447. }
  1448. }
  1449. else if (FAST_IS_REG(dst)) {
  1450. inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
  1451. FAIL_IF(!inst);
  1452. *inst = op_rm;
  1453. }
  1454. else if (FAST_IS_REG(src2)) {
  1455. inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
  1456. FAIL_IF(!inst);
  1457. *inst = op_mr;
  1458. }
  1459. else {
  1460. EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
  1461. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  1462. FAIL_IF(!inst);
  1463. *inst = op_mr;
  1464. }
  1465. return SLJIT_SUCCESS;
  1466. }
  1467. /* General version. */
  1468. if (FAST_IS_REG(dst) && dst != src2) {
  1469. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1470. if (src2 & SLJIT_IMM) {
  1471. BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
  1472. }
  1473. else {
  1474. inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
  1475. FAIL_IF(!inst);
  1476. *inst = op_rm;
  1477. }
  1478. }
  1479. else {
  1480. /* This version requires less memory writing. */
  1481. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1482. if (src2 & SLJIT_IMM) {
  1483. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1484. }
  1485. else {
  1486. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1487. FAIL_IF(!inst);
  1488. *inst = op_rm;
  1489. }
  1490. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1491. }
  1492. return SLJIT_SUCCESS;
  1493. }
  1494. static sljit_si emit_mul(struct sljit_compiler *compiler,
  1495. sljit_si dst, sljit_sw dstw,
  1496. sljit_si src1, sljit_sw src1w,
  1497. sljit_si src2, sljit_sw src2w)
  1498. {
  1499. sljit_ub* inst;
  1500. sljit_si dst_r;
  1501. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  1502. /* Register destination. */
  1503. if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
  1504. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
  1505. FAIL_IF(!inst);
  1506. *inst++ = GROUP_0F;
  1507. *inst = IMUL_r_rm;
  1508. }
  1509. else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
  1510. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
  1511. FAIL_IF(!inst);
  1512. *inst++ = GROUP_0F;
  1513. *inst = IMUL_r_rm;
  1514. }
  1515. else if (src1 & SLJIT_IMM) {
  1516. if (src2 & SLJIT_IMM) {
  1517. EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
  1518. src2 = dst_r;
  1519. src2w = 0;
  1520. }
  1521. if (src1w <= 127 && src1w >= -128) {
  1522. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
  1523. FAIL_IF(!inst);
  1524. *inst = IMUL_r_rm_i8;
  1525. inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  1526. FAIL_IF(!inst);
  1527. INC_SIZE(1);
  1528. *inst = (sljit_sb)src1w;
  1529. }
  1530. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1531. else {
  1532. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
  1533. FAIL_IF(!inst);
  1534. *inst = IMUL_r_rm_i32;
  1535. inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  1536. FAIL_IF(!inst);
  1537. INC_SIZE(4);
  1538. *(sljit_sw*)inst = src1w;
  1539. }
  1540. #else
  1541. else if (IS_HALFWORD(src1w)) {
  1542. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
  1543. FAIL_IF(!inst);
  1544. *inst = IMUL_r_rm_i32;
  1545. inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  1546. FAIL_IF(!inst);
  1547. INC_SIZE(4);
  1548. *(sljit_si*)inst = (sljit_si)src1w;
  1549. }
  1550. else {
  1551. EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
  1552. if (dst_r != src2)
  1553. EMIT_MOV(compiler, dst_r, 0, src2, src2w);
  1554. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
  1555. FAIL_IF(!inst);
  1556. *inst++ = GROUP_0F;
  1557. *inst = IMUL_r_rm;
  1558. }
  1559. #endif
  1560. }
  1561. else if (src2 & SLJIT_IMM) {
  1562. /* Note: src1 is NOT immediate. */
  1563. if (src2w <= 127 && src2w >= -128) {
  1564. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
  1565. FAIL_IF(!inst);
  1566. *inst = IMUL_r_rm_i8;
  1567. inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  1568. FAIL_IF(!inst);
  1569. INC_SIZE(1);
  1570. *inst = (sljit_sb)src2w;
  1571. }
  1572. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1573. else {
  1574. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
  1575. FAIL_IF(!inst);
  1576. *inst = IMUL_r_rm_i32;
  1577. inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  1578. FAIL_IF(!inst);
  1579. INC_SIZE(4);
  1580. *(sljit_sw*)inst = src2w;
  1581. }
  1582. #else
  1583. else if (IS_HALFWORD(src2w)) {
  1584. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
  1585. FAIL_IF(!inst);
  1586. *inst = IMUL_r_rm_i32;
  1587. inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  1588. FAIL_IF(!inst);
  1589. INC_SIZE(4);
  1590. *(sljit_si*)inst = (sljit_si)src2w;
  1591. }
  1592. else {
  1593. EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
  1594. if (dst_r != src1)
  1595. EMIT_MOV(compiler, dst_r, 0, src1, src1w);
  1596. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
  1597. FAIL_IF(!inst);
  1598. *inst++ = GROUP_0F;
  1599. *inst = IMUL_r_rm;
  1600. }
  1601. #endif
  1602. }
  1603. else {
  1604. /* Neither argument is immediate. */
  1605. if (ADDRESSING_DEPENDS_ON(src2, dst_r))
  1606. dst_r = TMP_REG1;
  1607. EMIT_MOV(compiler, dst_r, 0, src1, src1w);
  1608. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
  1609. FAIL_IF(!inst);
  1610. *inst++ = GROUP_0F;
  1611. *inst = IMUL_r_rm;
  1612. }
  1613. if (dst_r == TMP_REG1)
  1614. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1615. return SLJIT_SUCCESS;
  1616. }
  1617. static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
  1618. sljit_si dst, sljit_sw dstw,
  1619. sljit_si src1, sljit_sw src1w,
  1620. sljit_si src2, sljit_sw src2w)
  1621. {
  1622. sljit_ub* inst;
  1623. sljit_si dst_r, done = 0;
  1624. /* These cases better be left to handled by normal way. */
  1625. if (!keep_flags) {
  1626. if (dst == src1 && dstw == src1w)
  1627. return SLJIT_ERR_UNSUPPORTED;
  1628. if (dst == src2 && dstw == src2w)
  1629. return SLJIT_ERR_UNSUPPORTED;
  1630. }
  1631. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  1632. if (FAST_IS_REG(src1)) {
  1633. if (FAST_IS_REG(src2)) {
  1634. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
  1635. FAIL_IF(!inst);
  1636. *inst = LEA_r_m;
  1637. done = 1;
  1638. }
  1639. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1640. if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1641. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
  1642. #else
  1643. if (src2 & SLJIT_IMM) {
  1644. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
  1645. #endif
  1646. FAIL_IF(!inst);
  1647. *inst = LEA_r_m;
  1648. done = 1;
  1649. }
  1650. }
  1651. else if (FAST_IS_REG(src2)) {
  1652. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1653. if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
  1654. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
  1655. #else
  1656. if (src1 & SLJIT_IMM) {
  1657. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
  1658. #endif
  1659. FAIL_IF(!inst);
  1660. *inst = LEA_r_m;
  1661. done = 1;
  1662. }
  1663. }
  1664. if (done) {
  1665. if (dst_r == TMP_REG1)
  1666. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  1667. return SLJIT_SUCCESS;
  1668. }
  1669. return SLJIT_ERR_UNSUPPORTED;
  1670. }
  1671. static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
  1672. sljit_si src1, sljit_sw src1w,
  1673. sljit_si src2, sljit_sw src2w)
  1674. {
  1675. sljit_ub* inst;
  1676. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1677. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1678. #else
  1679. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
  1680. #endif
  1681. BINARY_EAX_IMM(CMP_EAX_i32, src2w);
  1682. return SLJIT_SUCCESS;
  1683. }
  1684. if (FAST_IS_REG(src1)) {
  1685. if (src2 & SLJIT_IMM) {
  1686. BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
  1687. }
  1688. else {
  1689. inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
  1690. FAIL_IF(!inst);
  1691. *inst = CMP_r_rm;
  1692. }
  1693. return SLJIT_SUCCESS;
  1694. }
  1695. if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
  1696. inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
  1697. FAIL_IF(!inst);
  1698. *inst = CMP_rm_r;
  1699. return SLJIT_SUCCESS;
  1700. }
  1701. if (src2 & SLJIT_IMM) {
  1702. if (src1 & SLJIT_IMM) {
  1703. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1704. src1 = TMP_REG1;
  1705. src1w = 0;
  1706. }
  1707. BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
  1708. }
  1709. else {
  1710. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1711. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1712. FAIL_IF(!inst);
  1713. *inst = CMP_r_rm;
  1714. }
  1715. return SLJIT_SUCCESS;
  1716. }
  1717. static sljit_si emit_test_binary(struct sljit_compiler *compiler,
  1718. sljit_si src1, sljit_sw src1w,
  1719. sljit_si src2, sljit_sw src2w)
  1720. {
  1721. sljit_ub* inst;
  1722. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1723. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1724. #else
  1725. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
  1726. #endif
  1727. BINARY_EAX_IMM(TEST_EAX_i32, src2w);
  1728. return SLJIT_SUCCESS;
  1729. }
  1730. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1731. if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
  1732. #else
  1733. if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
  1734. #endif
  1735. BINARY_EAX_IMM(TEST_EAX_i32, src1w);
  1736. return SLJIT_SUCCESS;
  1737. }
  1738. if (!(src1 & SLJIT_IMM)) {
  1739. if (src2 & SLJIT_IMM) {
  1740. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1741. if (IS_HALFWORD(src2w) || compiler->mode32) {
  1742. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
  1743. FAIL_IF(!inst);
  1744. *inst = GROUP_F7;
  1745. }
  1746. else {
  1747. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
  1748. inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
  1749. FAIL_IF(!inst);
  1750. *inst = TEST_rm_r;
  1751. }
  1752. #else
  1753. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
  1754. FAIL_IF(!inst);
  1755. *inst = GROUP_F7;
  1756. #endif
  1757. return SLJIT_SUCCESS;
  1758. }
  1759. else if (FAST_IS_REG(src1)) {
  1760. inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
  1761. FAIL_IF(!inst);
  1762. *inst = TEST_rm_r;
  1763. return SLJIT_SUCCESS;
  1764. }
  1765. }
  1766. if (!(src2 & SLJIT_IMM)) {
  1767. if (src1 & SLJIT_IMM) {
  1768. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1769. if (IS_HALFWORD(src1w) || compiler->mode32) {
  1770. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
  1771. FAIL_IF(!inst);
  1772. *inst = GROUP_F7;
  1773. }
  1774. else {
  1775. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
  1776. inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
  1777. FAIL_IF(!inst);
  1778. *inst = TEST_rm_r;
  1779. }
  1780. #else
  1781. inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
  1782. FAIL_IF(!inst);
  1783. *inst = GROUP_F7;
  1784. #endif
  1785. return SLJIT_SUCCESS;
  1786. }
  1787. else if (FAST_IS_REG(src2)) {
  1788. inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
  1789. FAIL_IF(!inst);
  1790. *inst = TEST_rm_r;
  1791. return SLJIT_SUCCESS;
  1792. }
  1793. }
  1794. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1795. if (src2 & SLJIT_IMM) {
  1796. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1797. if (IS_HALFWORD(src2w) || compiler->mode32) {
  1798. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
  1799. FAIL_IF(!inst);
  1800. *inst = GROUP_F7;
  1801. }
  1802. else {
  1803. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
  1804. inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
  1805. FAIL_IF(!inst);
  1806. *inst = TEST_rm_r;
  1807. }
  1808. #else
  1809. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
  1810. FAIL_IF(!inst);
  1811. *inst = GROUP_F7;
  1812. #endif
  1813. }
  1814. else {
  1815. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1816. FAIL_IF(!inst);
  1817. *inst = TEST_rm_r;
  1818. }
  1819. return SLJIT_SUCCESS;
  1820. }
  1821. static sljit_si emit_shift(struct sljit_compiler *compiler,
  1822. sljit_ub mode,
  1823. sljit_si dst, sljit_sw dstw,
  1824. sljit_si src1, sljit_sw src1w,
  1825. sljit_si src2, sljit_sw src2w)
  1826. {
  1827. sljit_ub* inst;
  1828. if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
  1829. if (dst == src1 && dstw == src1w) {
  1830. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
  1831. FAIL_IF(!inst);
  1832. *inst |= mode;
  1833. return SLJIT_SUCCESS;
  1834. }
  1835. if (dst == SLJIT_UNUSED) {
  1836. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1837. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
  1838. FAIL_IF(!inst);
  1839. *inst |= mode;
  1840. return SLJIT_SUCCESS;
  1841. }
  1842. if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
  1843. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1844. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1845. FAIL_IF(!inst);
  1846. *inst |= mode;
  1847. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1848. return SLJIT_SUCCESS;
  1849. }
  1850. if (FAST_IS_REG(dst)) {
  1851. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1852. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
  1853. FAIL_IF(!inst);
  1854. *inst |= mode;
  1855. return SLJIT_SUCCESS;
  1856. }
  1857. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1858. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
  1859. FAIL_IF(!inst);
  1860. *inst |= mode;
  1861. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1862. return SLJIT_SUCCESS;
  1863. }
  1864. if (dst == SLJIT_PREF_SHIFT_REG) {
  1865. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1866. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1867. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1868. FAIL_IF(!inst);
  1869. *inst |= mode;
  1870. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1871. }
  1872. else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
  1873. if (src1 != dst)
  1874. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1875. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
  1876. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1877. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
  1878. FAIL_IF(!inst);
  1879. *inst |= mode;
  1880. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1881. }
  1882. else {
  1883. /* This case is really difficult, since ecx itself may used for
  1884. addressing, and we must ensure to work even in that case. */
  1885. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1886. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1887. EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
  1888. #else
  1889. /* [esp+0] contains the flags. */
  1890. EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
  1891. #endif
  1892. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1893. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1894. FAIL_IF(!inst);
  1895. *inst |= mode;
  1896. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1897. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
  1898. #else
  1899. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
  1900. #endif
  1901. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1902. }
  1903. return SLJIT_SUCCESS;
  1904. }
  1905. static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
  1906. sljit_ub mode, sljit_si set_flags,
  1907. sljit_si dst, sljit_sw dstw,
  1908. sljit_si src1, sljit_sw src1w,
  1909. sljit_si src2, sljit_sw src2w)
  1910. {
  1911. /* The CPU does not set flags if the shift count is 0. */
  1912. if (src2 & SLJIT_IMM) {
  1913. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1914. if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
  1915. return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
  1916. #else
  1917. if ((src2w & 0x1f) != 0)
  1918. return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
  1919. #endif
  1920. if (!set_flags)
  1921. return emit_mov(compiler, dst, dstw, src1, src1w);
  1922. /* OR dst, src, 0 */
  1923. return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
  1924. dst, dstw, src1, src1w, SLJIT_IMM, 0);
  1925. }
  1926. if (!set_flags)
  1927. return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
  1928. if (!FAST_IS_REG(dst))
  1929. FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
  1930. FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
  1931. if (FAST_IS_REG(dst))
  1932. return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
  1933. return SLJIT_SUCCESS;
  1934. }
  1935. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
  1936. sljit_si dst, sljit_sw dstw,
  1937. sljit_si src1, sljit_sw src1w,
  1938. sljit_si src2, sljit_sw src2w)
  1939. {
  1940. CHECK_ERROR();
  1941. CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
  1942. ADJUST_LOCAL_OFFSET(dst, dstw);
  1943. ADJUST_LOCAL_OFFSET(src1, src1w);
  1944. ADJUST_LOCAL_OFFSET(src2, src2w);
  1945. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  1946. CHECK_EXTRA_REGS(src1, src1w, (void)0);
  1947. CHECK_EXTRA_REGS(src2, src2w, (void)0);
  1948. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1949. compiler->mode32 = op & SLJIT_INT_OP;
  1950. #endif
  1951. if (GET_OPCODE(op) >= SLJIT_MUL) {
  1952. if (SLJIT_UNLIKELY(GET_FLAGS(op)))
  1953. compiler->flags_saved = 0;
  1954. else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
  1955. FAIL_IF(emit_save_flags(compiler));
  1956. }
  1957. switch (GET_OPCODE(op)) {
  1958. case SLJIT_ADD:
  1959. if (!GET_FLAGS(op)) {
  1960. if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
  1961. return compiler->error;
  1962. }
  1963. else
  1964. compiler->flags_saved = 0;
  1965. if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
  1966. FAIL_IF(emit_save_flags(compiler));
  1967. return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
  1968. dst, dstw, src1, src1w, src2, src2w);
  1969. case SLJIT_ADDC:
  1970. if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
  1971. FAIL_IF(emit_restore_flags(compiler, 1));
  1972. else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
  1973. FAIL_IF(emit_save_flags(compiler));
  1974. if (SLJIT_UNLIKELY(GET_FLAGS(op)))
  1975. compiler->flags_saved = 0;
  1976. return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
  1977. dst, dstw, src1, src1w, src2, src2w);
  1978. case SLJIT_SUB:
  1979. if (!GET_FLAGS(op)) {
  1980. if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
  1981. return compiler->error;
  1982. }
  1983. else
  1984. compiler->flags_saved = 0;
  1985. if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
  1986. FAIL_IF(emit_save_flags(compiler));
  1987. if (dst == SLJIT_UNUSED)
  1988. return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
  1989. return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
  1990. dst, dstw, src1, src1w, src2, src2w);
  1991. case SLJIT_SUBC:
  1992. if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
  1993. FAIL_IF(emit_restore_flags(compiler, 1));
  1994. else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
  1995. FAIL_IF(emit_save_flags(compiler));
  1996. if (SLJIT_UNLIKELY(GET_FLAGS(op)))
  1997. compiler->flags_saved = 0;
  1998. return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
  1999. dst, dstw, src1, src1w, src2, src2w);
  2000. case SLJIT_MUL:
  2001. return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
  2002. case SLJIT_AND:
  2003. if (dst == SLJIT_UNUSED)
  2004. return emit_test_binary(compiler, src1, src1w, src2, src2w);
  2005. return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
  2006. dst, dstw, src1, src1w, src2, src2w);
  2007. case SLJIT_OR:
  2008. return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
  2009. dst, dstw, src1, src1w, src2, src2w);
  2010. case SLJIT_XOR:
  2011. return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
  2012. dst, dstw, src1, src1w, src2, src2w);
  2013. case SLJIT_SHL:
  2014. return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
  2015. dst, dstw, src1, src1w, src2, src2w);
  2016. case SLJIT_LSHR:
  2017. return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
  2018. dst, dstw, src1, src1w, src2, src2w);
  2019. case SLJIT_ASHR:
  2020. return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
  2021. dst, dstw, src1, src1w, src2, src2w);
  2022. }
  2023. return SLJIT_SUCCESS;
  2024. }
  2025. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
  2026. {
  2027. CHECK_REG_INDEX(check_sljit_get_register_index(reg));
  2028. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2029. if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
  2030. return -1;
  2031. #endif
  2032. return reg_map[reg];
  2033. }
  2034. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
  2035. {
  2036. CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
  2037. return reg;
  2038. }
  2039. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
  2040. void *instruction, sljit_si size)
  2041. {
  2042. sljit_ub *inst;
  2043. CHECK_ERROR();
  2044. CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
  2045. inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
  2046. FAIL_IF(!inst);
  2047. INC_SIZE(size);
  2048. SLJIT_MEMMOVE(inst, instruction, size);
  2049. return SLJIT_SUCCESS;
  2050. }
  2051. /* --------------------------------------------------------------------- */
  2052. /* Floating point operators */
  2053. /* --------------------------------------------------------------------- */
  2054. /* Alignment + 2 * 16 bytes. */
  2055. static sljit_si sse2_data[3 + (4 + 4) * 2];
  2056. static sljit_si *sse2_buffer;
  2057. static void init_compiler(void)
  2058. {
  2059. sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
  2060. /* Single precision constants. */
  2061. sse2_buffer[0] = 0x80000000;
  2062. sse2_buffer[4] = 0x7fffffff;
  2063. /* Double precision constants. */
  2064. sse2_buffer[8] = 0;
  2065. sse2_buffer[9] = 0x80000000;
  2066. sse2_buffer[12] = 0xffffffff;
  2067. sse2_buffer[13] = 0x7fffffff;
  2068. }
  2069. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
  2070. {
  2071. #ifdef SLJIT_IS_FPU_AVAILABLE
  2072. return SLJIT_IS_FPU_AVAILABLE;
  2073. #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  2074. if (cpu_has_sse2 == -1)
  2075. get_cpu_features();
  2076. return cpu_has_sse2;
  2077. #else /* SLJIT_DETECT_SSE2 */
  2078. return 1;
  2079. #endif /* SLJIT_DETECT_SSE2 */
  2080. }
  2081. static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
  2082. sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
  2083. {
  2084. sljit_ub *inst;
  2085. inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
  2086. FAIL_IF(!inst);
  2087. *inst++ = GROUP_0F;
  2088. *inst = opcode;
  2089. return SLJIT_SUCCESS;
  2090. }
  2091. static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
  2092. sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
  2093. {
  2094. sljit_ub *inst;
  2095. inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
  2096. FAIL_IF(!inst);
  2097. *inst++ = GROUP_0F;
  2098. *inst = opcode;
  2099. return SLJIT_SUCCESS;
  2100. }
  2101. static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
  2102. sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
  2103. {
  2104. return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
  2105. }
  2106. static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
  2107. sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
  2108. {
  2109. return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
  2110. }
  2111. static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
  2112. sljit_si dst, sljit_sw dstw,
  2113. sljit_si src, sljit_sw srcw)
  2114. {
  2115. sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
  2116. sljit_ub *inst;
  2117. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2118. if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
  2119. compiler->mode32 = 0;
  2120. #endif
  2121. inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
  2122. FAIL_IF(!inst);
  2123. *inst++ = GROUP_0F;
  2124. *inst = CVTTSD2SI_r_xm;
  2125. if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
  2126. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  2127. return SLJIT_SUCCESS;
  2128. }
  2129. static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
  2130. sljit_si dst, sljit_sw dstw,
  2131. sljit_si src, sljit_sw srcw)
  2132. {
  2133. sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
  2134. sljit_ub *inst;
  2135. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2136. if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
  2137. compiler->mode32 = 0;
  2138. #endif
  2139. if (src & SLJIT_IMM) {
  2140. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2141. if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
  2142. srcw = (sljit_si)srcw;
  2143. #endif
  2144. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  2145. src = TMP_REG1;
  2146. srcw = 0;
  2147. }
  2148. inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
  2149. FAIL_IF(!inst);
  2150. *inst++ = GROUP_0F;
  2151. *inst = CVTSI2SD_x_rm;
  2152. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2153. compiler->mode32 = 1;
  2154. #endif
  2155. if (dst_r == TMP_FREG)
  2156. return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
  2157. return SLJIT_SUCCESS;
  2158. }
  2159. static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
  2160. sljit_si src1, sljit_sw src1w,
  2161. sljit_si src2, sljit_sw src2w)
  2162. {
  2163. compiler->flags_saved = 0;
  2164. if (!FAST_IS_REG(src1)) {
  2165. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
  2166. src1 = TMP_FREG;
  2167. }
  2168. return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
  2169. }
  2170. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
  2171. sljit_si dst, sljit_sw dstw,
  2172. sljit_si src, sljit_sw srcw)
  2173. {
  2174. sljit_si dst_r;
  2175. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2176. compiler->mode32 = 1;
  2177. #endif
  2178. CHECK_ERROR();
  2179. SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
  2180. if (GET_OPCODE(op) == SLJIT_DMOV) {
  2181. if (FAST_IS_REG(dst))
  2182. return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
  2183. if (FAST_IS_REG(src))
  2184. return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
  2185. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
  2186. return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
  2187. }
  2188. if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
  2189. dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
  2190. if (FAST_IS_REG(src)) {
  2191. /* We overwrite the high bits of source. From SLJIT point of view,
  2192. this is not an issue.
  2193. Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
  2194. FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
  2195. }
  2196. else {
  2197. FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
  2198. src = TMP_FREG;
  2199. }
  2200. FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
  2201. if (dst_r == TMP_FREG)
  2202. return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
  2203. return SLJIT_SUCCESS;
  2204. }
  2205. if (SLOW_IS_REG(dst)) {
  2206. dst_r = dst;
  2207. if (dst != src)
  2208. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
  2209. }
  2210. else {
  2211. dst_r = TMP_FREG;
  2212. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
  2213. }
  2214. switch (GET_OPCODE(op)) {
  2215. case SLJIT_DNEG:
  2216. FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
  2217. break;
  2218. case SLJIT_DABS:
  2219. FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
  2220. break;
  2221. }
  2222. if (dst_r == TMP_FREG)
  2223. return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
  2224. return SLJIT_SUCCESS;
  2225. }
  2226. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
  2227. sljit_si dst, sljit_sw dstw,
  2228. sljit_si src1, sljit_sw src1w,
  2229. sljit_si src2, sljit_sw src2w)
  2230. {
  2231. sljit_si dst_r;
  2232. CHECK_ERROR();
  2233. CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
  2234. ADJUST_LOCAL_OFFSET(dst, dstw);
  2235. ADJUST_LOCAL_OFFSET(src1, src1w);
  2236. ADJUST_LOCAL_OFFSET(src2, src2w);
  2237. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2238. compiler->mode32 = 1;
  2239. #endif
  2240. if (FAST_IS_REG(dst)) {
  2241. dst_r = dst;
  2242. if (dst == src1)
  2243. ; /* Do nothing here. */
  2244. else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) {
  2245. /* Swap arguments. */
  2246. src2 = src1;
  2247. src2w = src1w;
  2248. }
  2249. else if (dst != src2)
  2250. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
  2251. else {
  2252. dst_r = TMP_FREG;
  2253. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
  2254. }
  2255. }
  2256. else {
  2257. dst_r = TMP_FREG;
  2258. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
  2259. }
  2260. switch (GET_OPCODE(op)) {
  2261. case SLJIT_DADD:
  2262. FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
  2263. break;
  2264. case SLJIT_DSUB:
  2265. FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
  2266. break;
  2267. case SLJIT_DMUL:
  2268. FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
  2269. break;
  2270. case SLJIT_DDIV:
  2271. FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
  2272. break;
  2273. }
  2274. if (dst_r == TMP_FREG)
  2275. return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
  2276. return SLJIT_SUCCESS;
  2277. }
  2278. /* --------------------------------------------------------------------- */
  2279. /* Conditional instructions */
  2280. /* --------------------------------------------------------------------- */
  2281. SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
  2282. {
  2283. sljit_ub *inst;
  2284. struct sljit_label *label;
  2285. CHECK_ERROR_PTR();
  2286. CHECK_PTR(check_sljit_emit_label(compiler));
  2287. /* We should restore the flags before the label,
  2288. since other taken jumps has their own flags as well. */
  2289. if (SLJIT_UNLIKELY(compiler->flags_saved))
  2290. PTR_FAIL_IF(emit_restore_flags(compiler, 0));
  2291. if (compiler->last_label && compiler->last_label->size == compiler->size)
  2292. return compiler->last_label;
  2293. label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
  2294. PTR_FAIL_IF(!label);
  2295. set_label(label, compiler);
  2296. inst = (sljit_ub*)ensure_buf(compiler, 2);
  2297. PTR_FAIL_IF(!inst);
  2298. *inst++ = 0;
  2299. *inst++ = 0;
  2300. return label;
  2301. }
  2302. SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
  2303. {
  2304. sljit_ub *inst;
  2305. struct sljit_jump *jump;
  2306. CHECK_ERROR_PTR();
  2307. CHECK_PTR(check_sljit_emit_jump(compiler, type));
  2308. if (SLJIT_UNLIKELY(compiler->flags_saved)) {
  2309. if ((type & 0xff) <= SLJIT_JUMP)
  2310. PTR_FAIL_IF(emit_restore_flags(compiler, 0));
  2311. compiler->flags_saved = 0;
  2312. }
  2313. jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
  2314. PTR_FAIL_IF_NULL(jump);
  2315. set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
  2316. type &= 0xff;
  2317. if (type >= SLJIT_CALL1)
  2318. PTR_FAIL_IF(call_with_args(compiler, type));
  2319. /* Worst case size. */
  2320. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2321. compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
  2322. #else
  2323. compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
  2324. #endif
  2325. inst = (sljit_ub*)ensure_buf(compiler, 2);
  2326. PTR_FAIL_IF_NULL(inst);
  2327. *inst++ = 0;
  2328. *inst++ = type + 4;
  2329. return jump;
  2330. }
  2331. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
  2332. {
  2333. sljit_ub *inst;
  2334. struct sljit_jump *jump;
  2335. CHECK_ERROR();
  2336. CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
  2337. ADJUST_LOCAL_OFFSET(src, srcw);
  2338. CHECK_EXTRA_REGS(src, srcw, (void)0);
  2339. if (SLJIT_UNLIKELY(compiler->flags_saved)) {
  2340. if (type <= SLJIT_JUMP)
  2341. FAIL_IF(emit_restore_flags(compiler, 0));
  2342. compiler->flags_saved = 0;
  2343. }
  2344. if (type >= SLJIT_CALL1) {
  2345. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2346. #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
  2347. if (src == SLJIT_R2) {
  2348. EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
  2349. src = TMP_REG1;
  2350. }
  2351. if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
  2352. srcw += sizeof(sljit_sw);
  2353. #endif
  2354. #endif
  2355. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
  2356. if (src == SLJIT_R2) {
  2357. EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
  2358. src = TMP_REG1;
  2359. }
  2360. #endif
  2361. FAIL_IF(call_with_args(compiler, type));
  2362. }
  2363. if (src == SLJIT_IMM) {
  2364. jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
  2365. FAIL_IF_NULL(jump);
  2366. set_jump(jump, compiler, JUMP_ADDR);
  2367. jump->u.target = srcw;
  2368. /* Worst case size. */
  2369. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2370. compiler->size += 5;
  2371. #else
  2372. compiler->size += 10 + 3;
  2373. #endif
  2374. inst = (sljit_ub*)ensure_buf(compiler, 2);
  2375. FAIL_IF_NULL(inst);
  2376. *inst++ = 0;
  2377. *inst++ = type + 4;
  2378. }
  2379. else {
  2380. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2381. /* REX_W is not necessary (src is not immediate). */
  2382. compiler->mode32 = 1;
  2383. #endif
  2384. inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
  2385. FAIL_IF(!inst);
  2386. *inst++ = GROUP_FF;
  2387. *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
  2388. }
  2389. return SLJIT_SUCCESS;
  2390. }
  2391. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
  2392. sljit_si dst, sljit_sw dstw,
  2393. sljit_si src, sljit_sw srcw,
  2394. sljit_si type)
  2395. {
  2396. sljit_ub *inst;
  2397. sljit_ub cond_set = 0;
  2398. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2399. sljit_si reg;
  2400. #else
  2401. /* CHECK_EXTRA_REGS migh overwrite these values. */
  2402. sljit_si dst_save = dst;
  2403. sljit_sw dstw_save = dstw;
  2404. #endif
  2405. CHECK_ERROR();
  2406. CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
  2407. SLJIT_UNUSED_ARG(srcw);
  2408. if (dst == SLJIT_UNUSED)
  2409. return SLJIT_SUCCESS;
  2410. ADJUST_LOCAL_OFFSET(dst, dstw);
  2411. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  2412. if (SLJIT_UNLIKELY(compiler->flags_saved))
  2413. FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
  2414. type &= 0xff;
  2415. /* setcc = jcc + 0x10. */
  2416. cond_set = get_jump_code(type) + 0x10;
  2417. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2418. if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
  2419. inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
  2420. FAIL_IF(!inst);
  2421. INC_SIZE(4 + 3);
  2422. /* Set low register to conditional flag. */
  2423. *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
  2424. *inst++ = GROUP_0F;
  2425. *inst++ = cond_set;
  2426. *inst++ = MOD_REG | reg_lmap[TMP_REG1];
  2427. *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
  2428. *inst++ = OR_rm8_r8;
  2429. *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
  2430. return SLJIT_SUCCESS;
  2431. }
  2432. reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
  2433. inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
  2434. FAIL_IF(!inst);
  2435. INC_SIZE(4 + 4);
  2436. /* Set low register to conditional flag. */
  2437. *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
  2438. *inst++ = GROUP_0F;
  2439. *inst++ = cond_set;
  2440. *inst++ = MOD_REG | reg_lmap[reg];
  2441. *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
  2442. *inst++ = GROUP_0F;
  2443. *inst++ = MOVZX_r_rm8;
  2444. *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
  2445. if (reg != TMP_REG1)
  2446. return SLJIT_SUCCESS;
  2447. if (GET_OPCODE(op) < SLJIT_ADD) {
  2448. compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
  2449. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  2450. }
  2451. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  2452. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  2453. compiler->skip_checks = 1;
  2454. #endif
  2455. return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
  2456. #else /* SLJIT_CONFIG_X86_64 */
  2457. if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
  2458. if (reg_map[dst] <= 4) {
  2459. /* Low byte is accessible. */
  2460. inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
  2461. FAIL_IF(!inst);
  2462. INC_SIZE(3 + 3);
  2463. /* Set low byte to conditional flag. */
  2464. *inst++ = GROUP_0F;
  2465. *inst++ = cond_set;
  2466. *inst++ = MOD_REG | reg_map[dst];
  2467. *inst++ = GROUP_0F;
  2468. *inst++ = MOVZX_r_rm8;
  2469. *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
  2470. return SLJIT_SUCCESS;
  2471. }
  2472. /* Low byte is not accessible. */
  2473. if (cpu_has_cmov == -1)
  2474. get_cpu_features();
  2475. if (cpu_has_cmov) {
  2476. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
  2477. /* a xor reg, reg operation would overwrite the flags. */
  2478. EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
  2479. inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
  2480. FAIL_IF(!inst);
  2481. INC_SIZE(3);
  2482. *inst++ = GROUP_0F;
  2483. /* cmovcc = setcc - 0x50. */
  2484. *inst++ = cond_set - 0x50;
  2485. *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
  2486. return SLJIT_SUCCESS;
  2487. }
  2488. inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
  2489. FAIL_IF(!inst);
  2490. INC_SIZE(1 + 3 + 3 + 1);
  2491. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2492. /* Set al to conditional flag. */
  2493. *inst++ = GROUP_0F;
  2494. *inst++ = cond_set;
  2495. *inst++ = MOD_REG | 0 /* eax */;
  2496. *inst++ = GROUP_0F;
  2497. *inst++ = MOVZX_r_rm8;
  2498. *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
  2499. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2500. return SLJIT_SUCCESS;
  2501. }
  2502. if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
  2503. SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
  2504. if (dst != SLJIT_R0) {
  2505. inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
  2506. FAIL_IF(!inst);
  2507. INC_SIZE(1 + 3 + 2 + 1);
  2508. /* Set low register to conditional flag. */
  2509. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2510. *inst++ = GROUP_0F;
  2511. *inst++ = cond_set;
  2512. *inst++ = MOD_REG | 0 /* eax */;
  2513. *inst++ = OR_rm8_r8;
  2514. *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
  2515. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2516. }
  2517. else {
  2518. inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
  2519. FAIL_IF(!inst);
  2520. INC_SIZE(2 + 3 + 2 + 2);
  2521. /* Set low register to conditional flag. */
  2522. *inst++ = XCHG_r_rm;
  2523. *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
  2524. *inst++ = GROUP_0F;
  2525. *inst++ = cond_set;
  2526. *inst++ = MOD_REG | 1 /* ecx */;
  2527. *inst++ = OR_rm8_r8;
  2528. *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
  2529. *inst++ = XCHG_r_rm;
  2530. *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
  2531. }
  2532. return SLJIT_SUCCESS;
  2533. }
  2534. /* Set TMP_REG1 to the bit. */
  2535. inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
  2536. FAIL_IF(!inst);
  2537. INC_SIZE(1 + 3 + 3 + 1);
  2538. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2539. /* Set al to conditional flag. */
  2540. *inst++ = GROUP_0F;
  2541. *inst++ = cond_set;
  2542. *inst++ = MOD_REG | 0 /* eax */;
  2543. *inst++ = GROUP_0F;
  2544. *inst++ = MOVZX_r_rm8;
  2545. *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
  2546. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2547. if (GET_OPCODE(op) < SLJIT_ADD)
  2548. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  2549. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  2550. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  2551. compiler->skip_checks = 1;
  2552. #endif
  2553. return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
  2554. #endif /* SLJIT_CONFIG_X86_64 */
  2555. }
  2556. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
  2557. {
  2558. CHECK_ERROR();
  2559. CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
  2560. ADJUST_LOCAL_OFFSET(dst, dstw);
  2561. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  2562. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2563. compiler->mode32 = 0;
  2564. #endif
  2565. ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
  2566. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2567. if (NOT_HALFWORD(offset)) {
  2568. FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
  2569. #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
  2570. SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
  2571. return compiler->error;
  2572. #else
  2573. return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
  2574. #endif
  2575. }
  2576. #endif
  2577. if (offset != 0)
  2578. return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
  2579. return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
  2580. }
  2581. SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
  2582. {
  2583. sljit_ub *inst;
  2584. struct sljit_const *const_;
  2585. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2586. sljit_si reg;
  2587. #endif
  2588. CHECK_ERROR_PTR();
  2589. CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
  2590. ADJUST_LOCAL_OFFSET(dst, dstw);
  2591. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  2592. const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
  2593. PTR_FAIL_IF(!const_);
  2594. set_const(const_, compiler);
  2595. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2596. compiler->mode32 = 0;
  2597. reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
  2598. if (emit_load_imm64(compiler, reg, init_value))
  2599. return NULL;
  2600. #else
  2601. if (dst == SLJIT_UNUSED)
  2602. dst = TMP_REG1;
  2603. if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
  2604. return NULL;
  2605. #endif
  2606. inst = (sljit_ub*)ensure_buf(compiler, 2);
  2607. PTR_FAIL_IF(!inst);
  2608. *inst++ = 0;
  2609. *inst++ = 1;
  2610. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2611. if (dst & SLJIT_MEM)
  2612. if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
  2613. return NULL;
  2614. #endif
  2615. return const_;
  2616. }
  2617. SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
  2618. {
  2619. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2620. *(sljit_sw*)addr = new_addr - (addr + 4);
  2621. #else
  2622. *(sljit_uw*)addr = new_addr;
  2623. #endif
  2624. }
  2625. SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
  2626. {
  2627. *(sljit_sw*)addr = new_constant;
  2628. }
  2629. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void)
  2630. {
  2631. #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  2632. if (cpu_has_sse2 == -1)
  2633. get_cpu_features();
  2634. return cpu_has_sse2;
  2635. #else
  2636. return 1;
  2637. #endif
  2638. }
  2639. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void)
  2640. {
  2641. if (cpu_has_cmov == -1)
  2642. get_cpu_features();
  2643. return cpu_has_cmov;
  2644. }
  2645. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
  2646. sljit_si type,
  2647. sljit_si dst_reg,
  2648. sljit_si src, sljit_sw srcw)
  2649. {
  2650. sljit_ub* inst;
  2651. CHECK_ERROR();
  2652. #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  2653. CHECK_ARGUMENT(sljit_x86_is_cmov_available());
  2654. CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
  2655. CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
  2656. CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP));
  2657. FUNCTION_CHECK_SRC(src, srcw);
  2658. #endif
  2659. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
  2660. if (SLJIT_UNLIKELY(!!compiler->verbose)) {
  2661. fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
  2662. !(dst_reg & SLJIT_INT_OP) ? "" : ".i",
  2663. JUMP_PREFIX(type), jump_names[type & 0xff]);
  2664. sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP);
  2665. fprintf(compiler->verbose, ", ");
  2666. sljit_verbose_param(compiler, src, srcw);
  2667. fprintf(compiler->verbose, "\n");
  2668. }
  2669. #endif
  2670. ADJUST_LOCAL_OFFSET(src, srcw);
  2671. CHECK_EXTRA_REGS(src, srcw, (void)0);
  2672. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2673. compiler->mode32 = dst_reg & SLJIT_INT_OP;
  2674. #endif
  2675. dst_reg &= ~SLJIT_INT_OP;
  2676. if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
  2677. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
  2678. src = TMP_REG1;
  2679. srcw = 0;
  2680. }
  2681. inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
  2682. FAIL_IF(!inst);
  2683. *inst++ = GROUP_0F;
  2684. *inst = get_jump_code(type & 0xff) - 0x40;
  2685. return SLJIT_SUCCESS;
  2686. }