sljitNativeARM_32.c 78 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566
  1. /*
  2. * Stack-less Just-In-Time compiler
  3. *
  4. * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without modification, are
  7. * permitted provided that the following conditions are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright notice, this list of
  10. * conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright notice, this list
  13. * of conditions and the following disclaimer in the documentation and/or other materials
  14. * provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
  17. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  19. * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  21. * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  22. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  24. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
  27. {
  28. #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
  29. return "ARMv7" SLJIT_CPUINFO;
  30. #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  31. return "ARMv5" SLJIT_CPUINFO;
  32. #else
  33. #error "Internal error: Unknown ARM architecture"
  34. #endif
  35. }
  36. /* Last register + 1. */
  37. #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
  38. #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
  39. #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
  40. #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5)
  41. #define TMP_FREG1 (0)
  42. #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
  43. /* In ARM instruction words.
  44. Cache lines are usually 32 byte aligned. */
  45. #define CONST_POOL_ALIGNMENT 8
  46. #define CONST_POOL_EMPTY 0xffffffff
  47. #define ALIGN_INSTRUCTION(ptr) \
  48. (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1))
  49. #define MAX_DIFFERENCE(max_diff) \
  50. (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
  51. /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
  52. static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
  53. 0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15
  54. };
  55. #define RM(rm) (reg_map[rm])
  56. #define RD(rd) (reg_map[rd] << 12)
  57. #define RN(rn) (reg_map[rn] << 16)
  58. /* --------------------------------------------------------------------- */
  59. /* Instrucion forms */
  60. /* --------------------------------------------------------------------- */
  61. /* The instruction includes the AL condition.
  62. INST_NAME - CONDITIONAL remove this flag. */
  63. #define COND_MASK 0xf0000000
  64. #define CONDITIONAL 0xe0000000
  65. #define PUSH_POOL 0xff000000
  66. /* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */
  67. #define ADC_DP 0x5
  68. #define ADD_DP 0x4
  69. #define AND_DP 0x0
  70. #define B 0xea000000
  71. #define BIC_DP 0xe
  72. #define BL 0xeb000000
  73. #define BLX 0xe12fff30
  74. #define BX 0xe12fff10
  75. #define CLZ 0xe16f0f10
  76. #define CMP_DP 0xa
  77. #define BKPT 0xe1200070
  78. #define EOR_DP 0x1
  79. #define MOV_DP 0xd
  80. #define MUL 0xe0000090
  81. #define MVN_DP 0xf
  82. #define NOP 0xe1a00000
  83. #define ORR_DP 0xc
  84. #define PUSH 0xe92d0000
  85. #define POP 0xe8bd0000
  86. #define RSB_DP 0x3
  87. #define RSC_DP 0x7
  88. #define SBC_DP 0x6
  89. #define SMULL 0xe0c00090
  90. #define SUB_DP 0x2
  91. #define UMULL 0xe0800090
  92. #define VABS_F32 0xeeb00ac0
  93. #define VADD_F32 0xee300a00
  94. #define VCMP_F32 0xeeb40a40
  95. #define VCVT_F32_S32 0xeeb80ac0
  96. #define VCVT_F64_F32 0xeeb70ac0
  97. #define VCVT_S32_F32 0xeebd0ac0
  98. #define VDIV_F32 0xee800a00
  99. #define VMOV_F32 0xeeb00a40
  100. #define VMOV 0xee000a10
  101. #define VMRS 0xeef1fa10
  102. #define VMUL_F32 0xee200a00
  103. #define VNEG_F32 0xeeb10a40
  104. #define VSTR_F32 0xed000a00
  105. #define VSUB_F32 0xee300a40
  106. #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
  107. /* Arm v7 specific instructions. */
  108. #define MOVW 0xe3000000
  109. #define MOVT 0xe3400000
  110. #define SXTB 0xe6af0070
  111. #define SXTH 0xe6bf0070
  112. #define UXTB 0xe6ef0070
  113. #define UXTH 0xe6ff0070
  114. #endif
  115. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  116. static sljit_si push_cpool(struct sljit_compiler *compiler)
  117. {
  118. /* Pushing the constant pool into the instruction stream. */
  119. sljit_uw* inst;
  120. sljit_uw* cpool_ptr;
  121. sljit_uw* cpool_end;
  122. sljit_si i;
  123. /* The label could point the address after the constant pool. */
  124. if (compiler->last_label && compiler->last_label->size == compiler->size)
  125. compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
  126. SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
  127. inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  128. FAIL_IF(!inst);
  129. compiler->size++;
  130. *inst = 0xff000000 | compiler->cpool_fill;
  131. for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
  132. inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  133. FAIL_IF(!inst);
  134. compiler->size++;
  135. *inst = 0;
  136. }
  137. cpool_ptr = compiler->cpool;
  138. cpool_end = cpool_ptr + compiler->cpool_fill;
  139. while (cpool_ptr < cpool_end) {
  140. inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  141. FAIL_IF(!inst);
  142. compiler->size++;
  143. *inst = *cpool_ptr++;
  144. }
  145. compiler->cpool_diff = CONST_POOL_EMPTY;
  146. compiler->cpool_fill = 0;
  147. return SLJIT_SUCCESS;
  148. }
  149. static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst)
  150. {
  151. sljit_uw* ptr;
  152. if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
  153. FAIL_IF(push_cpool(compiler));
  154. ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  155. FAIL_IF(!ptr);
  156. compiler->size++;
  157. *ptr = inst;
  158. return SLJIT_SUCCESS;
  159. }
  160. static sljit_si push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
  161. {
  162. sljit_uw* ptr;
  163. sljit_uw cpool_index = CPOOL_SIZE;
  164. sljit_uw* cpool_ptr;
  165. sljit_uw* cpool_end;
  166. sljit_ub* cpool_unique_ptr;
  167. if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
  168. FAIL_IF(push_cpool(compiler));
  169. else if (compiler->cpool_fill > 0) {
  170. cpool_ptr = compiler->cpool;
  171. cpool_end = cpool_ptr + compiler->cpool_fill;
  172. cpool_unique_ptr = compiler->cpool_unique;
  173. do {
  174. if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
  175. cpool_index = cpool_ptr - compiler->cpool;
  176. break;
  177. }
  178. cpool_ptr++;
  179. cpool_unique_ptr++;
  180. } while (cpool_ptr < cpool_end);
  181. }
  182. if (cpool_index == CPOOL_SIZE) {
  183. /* Must allocate a new entry in the literal pool. */
  184. if (compiler->cpool_fill < CPOOL_SIZE) {
  185. cpool_index = compiler->cpool_fill;
  186. compiler->cpool_fill++;
  187. }
  188. else {
  189. FAIL_IF(push_cpool(compiler));
  190. cpool_index = 0;
  191. compiler->cpool_fill = 1;
  192. }
  193. }
  194. SLJIT_ASSERT((inst & 0xfff) == 0);
  195. ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  196. FAIL_IF(!ptr);
  197. compiler->size++;
  198. *ptr = inst | cpool_index;
  199. compiler->cpool[cpool_index] = literal;
  200. compiler->cpool_unique[cpool_index] = 0;
  201. if (compiler->cpool_diff == CONST_POOL_EMPTY)
  202. compiler->cpool_diff = compiler->size;
  203. return SLJIT_SUCCESS;
  204. }
  205. static sljit_si push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
  206. {
  207. sljit_uw* ptr;
  208. if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
  209. FAIL_IF(push_cpool(compiler));
  210. SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
  211. ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  212. FAIL_IF(!ptr);
  213. compiler->size++;
  214. *ptr = inst | compiler->cpool_fill;
  215. compiler->cpool[compiler->cpool_fill] = literal;
  216. compiler->cpool_unique[compiler->cpool_fill] = 1;
  217. compiler->cpool_fill++;
  218. if (compiler->cpool_diff == CONST_POOL_EMPTY)
  219. compiler->cpool_diff = compiler->size;
  220. return SLJIT_SUCCESS;
  221. }
  222. static SLJIT_INLINE sljit_si prepare_blx(struct sljit_compiler *compiler)
  223. {
  224. /* Place for at least two instruction (doesn't matter whether the first has a literal). */
  225. if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
  226. return push_cpool(compiler);
  227. return SLJIT_SUCCESS;
  228. }
  229. static SLJIT_INLINE sljit_si emit_blx(struct sljit_compiler *compiler)
  230. {
  231. /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
  232. SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
  233. return push_inst(compiler, BLX | RM(TMP_REG1));
  234. }
  235. static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
  236. {
  237. sljit_uw diff;
  238. sljit_uw ind;
  239. sljit_uw counter = 0;
  240. sljit_uw* clear_const_pool = const_pool;
  241. sljit_uw* clear_const_pool_end = const_pool + cpool_size;
  242. SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
  243. /* Set unused flag for all literals in the constant pool.
  244. I.e.: unused literals can belong to branches, which can be encoded as B or BL.
  245. We can "compress" the constant pool by discarding these literals. */
  246. while (clear_const_pool < clear_const_pool_end)
  247. *clear_const_pool++ = (sljit_uw)(-1);
  248. while (last_pc_patch < code_ptr) {
  249. /* Data transfer instruction with Rn == r15. */
  250. if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) {
  251. diff = const_pool - last_pc_patch;
  252. ind = (*last_pc_patch) & 0xfff;
  253. /* Must be a load instruction with immediate offset. */
  254. SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
  255. if ((sljit_si)const_pool[ind] < 0) {
  256. const_pool[ind] = counter;
  257. ind = counter;
  258. counter++;
  259. }
  260. else
  261. ind = const_pool[ind];
  262. SLJIT_ASSERT(diff >= 1);
  263. if (diff >= 2 || ind > 0) {
  264. diff = (diff + ind - 2) << 2;
  265. SLJIT_ASSERT(diff <= 0xfff);
  266. *last_pc_patch = (*last_pc_patch & ~0xfff) | diff;
  267. }
  268. else
  269. *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004;
  270. }
  271. last_pc_patch++;
  272. }
  273. return counter;
  274. }
  275. /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
  276. struct future_patch {
  277. struct future_patch* next;
  278. sljit_si index;
  279. sljit_si value;
  280. };
  281. static sljit_si resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
  282. {
  283. sljit_si value;
  284. struct future_patch *curr_patch, *prev_patch;
  285. SLJIT_UNUSED_ARG(compiler);
  286. /* Using the values generated by patch_pc_relative_loads. */
  287. if (!*first_patch)
  288. value = (sljit_si)cpool_start_address[cpool_current_index];
  289. else {
  290. curr_patch = *first_patch;
  291. prev_patch = 0;
  292. while (1) {
  293. if (!curr_patch) {
  294. value = (sljit_si)cpool_start_address[cpool_current_index];
  295. break;
  296. }
  297. if ((sljit_uw)curr_patch->index == cpool_current_index) {
  298. value = curr_patch->value;
  299. if (prev_patch)
  300. prev_patch->next = curr_patch->next;
  301. else
  302. *first_patch = curr_patch->next;
  303. SLJIT_FREE(curr_patch, compiler->allocator_data);
  304. break;
  305. }
  306. prev_patch = curr_patch;
  307. curr_patch = curr_patch->next;
  308. }
  309. }
  310. if (value >= 0) {
  311. if ((sljit_uw)value > cpool_current_index) {
  312. curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
  313. if (!curr_patch) {
  314. while (*first_patch) {
  315. curr_patch = *first_patch;
  316. *first_patch = (*first_patch)->next;
  317. SLJIT_FREE(curr_patch, compiler->allocator_data);
  318. }
  319. return SLJIT_ERR_ALLOC_FAILED;
  320. }
  321. curr_patch->next = *first_patch;
  322. curr_patch->index = value;
  323. curr_patch->value = cpool_start_address[value];
  324. *first_patch = curr_patch;
  325. }
  326. cpool_start_address[value] = *buf_ptr;
  327. }
  328. return SLJIT_SUCCESS;
  329. }
  330. #else
  331. static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst)
  332. {
  333. sljit_uw* ptr;
  334. ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
  335. FAIL_IF(!ptr);
  336. compiler->size++;
  337. *ptr = inst;
  338. return SLJIT_SUCCESS;
  339. }
  340. static SLJIT_INLINE sljit_si emit_imm(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
  341. {
  342. FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
  343. return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
  344. }
  345. #endif
  346. static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code)
  347. {
  348. sljit_sw diff;
  349. if (jump->flags & SLJIT_REWRITABLE_JUMP)
  350. return 0;
  351. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  352. if (jump->flags & IS_BL)
  353. code_ptr--;
  354. if (jump->flags & JUMP_ADDR)
  355. diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2));
  356. else {
  357. SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  358. diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
  359. }
  360. /* Branch to Thumb code has not been optimized yet. */
  361. if (diff & 0x3)
  362. return 0;
  363. if (jump->flags & IS_BL) {
  364. if (diff <= 0x01ffffff && diff >= -0x02000000) {
  365. *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
  366. jump->flags |= PATCH_B;
  367. return 1;
  368. }
  369. }
  370. else {
  371. if (diff <= 0x01ffffff && diff >= -0x02000000) {
  372. *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
  373. jump->flags |= PATCH_B;
  374. }
  375. }
  376. #else
  377. if (jump->flags & JUMP_ADDR)
  378. diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr);
  379. else {
  380. SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  381. diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
  382. }
  383. /* Branch to Thumb code has not been optimized yet. */
  384. if (diff & 0x3)
  385. return 0;
  386. if (diff <= 0x01ffffff && diff >= -0x02000000) {
  387. code_ptr -= 2;
  388. *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
  389. jump->flags |= PATCH_B;
  390. return 1;
  391. }
  392. #endif
  393. return 0;
  394. }
  395. static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_si flush)
  396. {
  397. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  398. sljit_uw *ptr = (sljit_uw*)addr;
  399. sljit_uw *inst = (sljit_uw*)ptr[0];
  400. sljit_uw mov_pc = ptr[1];
  401. sljit_si bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
  402. sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2)) >> 2);
  403. if (diff <= 0x7fffff && diff >= -0x800000) {
  404. /* Turn to branch. */
  405. if (!bl) {
  406. inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
  407. if (flush) {
  408. SLJIT_CACHE_FLUSH(inst, inst + 1);
  409. }
  410. } else {
  411. inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
  412. inst[1] = NOP;
  413. if (flush) {
  414. SLJIT_CACHE_FLUSH(inst, inst + 2);
  415. }
  416. }
  417. } else {
  418. /* Get the position of the constant. */
  419. if (mov_pc & (1 << 23))
  420. ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
  421. else
  422. ptr = inst + 1;
  423. if (*inst != mov_pc) {
  424. inst[0] = mov_pc;
  425. if (!bl) {
  426. if (flush) {
  427. SLJIT_CACHE_FLUSH(inst, inst + 1);
  428. }
  429. } else {
  430. inst[1] = BLX | RM(TMP_REG1);
  431. if (flush) {
  432. SLJIT_CACHE_FLUSH(inst, inst + 2);
  433. }
  434. }
  435. }
  436. *ptr = new_addr;
  437. }
  438. #else
  439. sljit_uw *inst = (sljit_uw*)addr;
  440. SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
  441. inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
  442. inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
  443. if (flush) {
  444. SLJIT_CACHE_FLUSH(inst, inst + 2);
  445. }
  446. #endif
  447. }
  448. static sljit_uw get_imm(sljit_uw imm);
  449. static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, sljit_si flush)
  450. {
  451. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  452. sljit_uw *ptr = (sljit_uw*)addr;
  453. sljit_uw *inst = (sljit_uw*)ptr[0];
  454. sljit_uw ldr_literal = ptr[1];
  455. sljit_uw src2;
  456. src2 = get_imm(new_constant);
  457. if (src2) {
  458. *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
  459. if (flush) {
  460. SLJIT_CACHE_FLUSH(inst, inst + 1);
  461. }
  462. return;
  463. }
  464. src2 = get_imm(~new_constant);
  465. if (src2) {
  466. *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
  467. if (flush) {
  468. SLJIT_CACHE_FLUSH(inst, inst + 1);
  469. }
  470. return;
  471. }
  472. if (ldr_literal & (1 << 23))
  473. ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
  474. else
  475. ptr = inst + 1;
  476. if (*inst != ldr_literal) {
  477. *inst = ldr_literal;
  478. if (flush) {
  479. SLJIT_CACHE_FLUSH(inst, inst + 1);
  480. }
  481. }
  482. *ptr = new_constant;
  483. #else
  484. sljit_uw *inst = (sljit_uw*)addr;
  485. SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
  486. inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
  487. inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
  488. if (flush) {
  489. SLJIT_CACHE_FLUSH(inst, inst + 2);
  490. }
  491. #endif
  492. }
  493. SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  494. {
  495. struct sljit_memory_fragment *buf;
  496. sljit_uw *code;
  497. sljit_uw *code_ptr;
  498. sljit_uw *buf_ptr;
  499. sljit_uw *buf_end;
  500. sljit_uw size;
  501. sljit_uw word_count;
  502. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  503. sljit_uw cpool_size;
  504. sljit_uw cpool_skip_alignment;
  505. sljit_uw cpool_current_index;
  506. sljit_uw *cpool_start_address;
  507. sljit_uw *last_pc_patch;
  508. struct future_patch *first_patch;
  509. #endif
  510. struct sljit_label *label;
  511. struct sljit_jump *jump;
  512. struct sljit_const *const_;
  513. CHECK_ERROR_PTR();
  514. CHECK_PTR(check_sljit_generate_code(compiler));
  515. reverse_buf(compiler);
  516. /* Second code generation pass. */
  517. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  518. size = compiler->size + (compiler->patches << 1);
  519. if (compiler->cpool_fill > 0)
  520. size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
  521. #else
  522. size = compiler->size;
  523. #endif
  524. code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw));
  525. PTR_FAIL_WITH_EXEC_IF(code);
  526. buf = compiler->buf;
  527. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  528. cpool_size = 0;
  529. cpool_skip_alignment = 0;
  530. cpool_current_index = 0;
  531. cpool_start_address = NULL;
  532. first_patch = NULL;
  533. last_pc_patch = code;
  534. #endif
  535. code_ptr = code;
  536. word_count = 0;
  537. label = compiler->labels;
  538. jump = compiler->jumps;
  539. const_ = compiler->consts;
  540. if (label && label->size == 0) {
  541. label->addr = (sljit_uw)code;
  542. label->size = 0;
  543. label = label->next;
  544. }
  545. do {
  546. buf_ptr = (sljit_uw*)buf->memory;
  547. buf_end = buf_ptr + (buf->used_size >> 2);
  548. do {
  549. word_count++;
  550. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  551. if (cpool_size > 0) {
  552. if (cpool_skip_alignment > 0) {
  553. buf_ptr++;
  554. cpool_skip_alignment--;
  555. }
  556. else {
  557. if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
  558. SLJIT_FREE_EXEC(code);
  559. compiler->error = SLJIT_ERR_ALLOC_FAILED;
  560. return NULL;
  561. }
  562. buf_ptr++;
  563. if (++cpool_current_index >= cpool_size) {
  564. SLJIT_ASSERT(!first_patch);
  565. cpool_size = 0;
  566. if (label && label->size == word_count) {
  567. /* Points after the current instruction. */
  568. label->addr = (sljit_uw)code_ptr;
  569. label->size = code_ptr - code;
  570. label = label->next;
  571. }
  572. }
  573. }
  574. }
  575. else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
  576. #endif
  577. *code_ptr = *buf_ptr++;
  578. /* These structures are ordered by their address. */
  579. SLJIT_ASSERT(!label || label->size >= word_count);
  580. SLJIT_ASSERT(!jump || jump->addr >= word_count);
  581. SLJIT_ASSERT(!const_ || const_->addr >= word_count);
  582. if (jump && jump->addr == word_count) {
  583. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  584. if (detect_jump_type(jump, code_ptr, code))
  585. code_ptr--;
  586. jump->addr = (sljit_uw)code_ptr;
  587. #else
  588. jump->addr = (sljit_uw)(code_ptr - 2);
  589. if (detect_jump_type(jump, code_ptr, code))
  590. code_ptr -= 2;
  591. #endif
  592. jump = jump->next;
  593. }
  594. if (label && label->size == word_count) {
  595. /* code_ptr can be affected above. */
  596. label->addr = (sljit_uw)(code_ptr + 1);
  597. label->size = (code_ptr + 1) - code;
  598. label = label->next;
  599. }
  600. if (const_ && const_->addr == word_count) {
  601. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  602. const_->addr = (sljit_uw)code_ptr;
  603. #else
  604. const_->addr = (sljit_uw)(code_ptr - 1);
  605. #endif
  606. const_ = const_->next;
  607. }
  608. code_ptr++;
  609. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  610. }
  611. else {
  612. /* Fortunately, no need to shift. */
  613. cpool_size = *buf_ptr++ & ~PUSH_POOL;
  614. SLJIT_ASSERT(cpool_size > 0);
  615. cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
  616. cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
  617. if (cpool_current_index > 0) {
  618. /* Unconditional branch. */
  619. *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
  620. code_ptr = cpool_start_address + cpool_current_index;
  621. }
  622. cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
  623. cpool_current_index = 0;
  624. last_pc_patch = code_ptr;
  625. }
  626. #endif
  627. } while (buf_ptr < buf_end);
  628. buf = buf->next;
  629. } while (buf);
  630. SLJIT_ASSERT(!label);
  631. SLJIT_ASSERT(!jump);
  632. SLJIT_ASSERT(!const_);
  633. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  634. SLJIT_ASSERT(cpool_size == 0);
  635. if (compiler->cpool_fill > 0) {
  636. cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
  637. cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
  638. if (cpool_current_index > 0)
  639. code_ptr = cpool_start_address + cpool_current_index;
  640. buf_ptr = compiler->cpool;
  641. buf_end = buf_ptr + compiler->cpool_fill;
  642. cpool_current_index = 0;
  643. while (buf_ptr < buf_end) {
  644. if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
  645. SLJIT_FREE_EXEC(code);
  646. compiler->error = SLJIT_ERR_ALLOC_FAILED;
  647. return NULL;
  648. }
  649. buf_ptr++;
  650. cpool_current_index++;
  651. }
  652. SLJIT_ASSERT(!first_patch);
  653. }
  654. #endif
  655. jump = compiler->jumps;
  656. while (jump) {
  657. buf_ptr = (sljit_uw*)jump->addr;
  658. if (jump->flags & PATCH_B) {
  659. if (!(jump->flags & JUMP_ADDR)) {
  660. SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  661. SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >= -0x02000000);
  662. *buf_ptr |= (((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff;
  663. }
  664. else {
  665. SLJIT_ASSERT(((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >= -0x02000000);
  666. *buf_ptr |= (((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff;
  667. }
  668. }
  669. else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
  670. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  671. jump->addr = (sljit_uw)code_ptr;
  672. code_ptr[0] = (sljit_uw)buf_ptr;
  673. code_ptr[1] = *buf_ptr;
  674. inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
  675. code_ptr += 2;
  676. #else
  677. inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
  678. #endif
  679. }
  680. else {
  681. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  682. if (jump->flags & IS_BL)
  683. buf_ptr--;
  684. if (*buf_ptr & (1 << 23))
  685. buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
  686. else
  687. buf_ptr += 1;
  688. *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
  689. #else
  690. inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
  691. #endif
  692. }
  693. jump = jump->next;
  694. }
  695. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  696. const_ = compiler->consts;
  697. while (const_) {
  698. buf_ptr = (sljit_uw*)const_->addr;
  699. const_->addr = (sljit_uw)code_ptr;
  700. code_ptr[0] = (sljit_uw)buf_ptr;
  701. code_ptr[1] = *buf_ptr;
  702. if (*buf_ptr & (1 << 23))
  703. buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
  704. else
  705. buf_ptr += 1;
  706. /* Set the value again (can be a simple constant). */
  707. inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0);
  708. code_ptr += 2;
  709. const_ = const_->next;
  710. }
  711. #endif
  712. SLJIT_ASSERT(code_ptr - code <= (sljit_si)size);
  713. compiler->error = SLJIT_ERR_COMPILED;
  714. compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
  715. SLJIT_CACHE_FLUSH(code, code_ptr);
  716. return code;
  717. }
  718. /* --------------------------------------------------------------------- */
  719. /* Entry, exit */
  720. /* --------------------------------------------------------------------- */
  721. /* emit_op inp_flags.
  722. WRITE_BACK must be the first, since it is a flag. */
  723. #define WRITE_BACK 0x01
  724. #define ALLOW_IMM 0x02
  725. #define ALLOW_INV_IMM 0x04
  726. #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
  727. #define ARG_TEST 0x08
  728. /* Creates an index in data_transfer_insts array. */
  729. #define WORD_DATA 0x00
  730. #define BYTE_DATA 0x10
  731. #define HALF_DATA 0x20
  732. #define SIGNED_DATA 0x40
  733. #define LOAD_DATA 0x80
  734. /* Condition: AL. */
  735. #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
  736. (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2))
  737. static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags,
  738. sljit_si dst, sljit_sw dstw,
  739. sljit_si src1, sljit_sw src1w,
  740. sljit_si src2, sljit_sw src2w);
  741. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
  742. sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
  743. sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
  744. {
  745. sljit_si size, i, tmp;
  746. sljit_uw push;
  747. CHECK_ERROR();
  748. CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
  749. set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
  750. /* Push saved registers, temporary registers
  751. stmdb sp!, {..., lr} */
  752. push = PUSH | (1 << 14);
  753. tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
  754. for (i = SLJIT_S0; i >= tmp; i--)
  755. push |= 1 << reg_map[i];
  756. for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
  757. push |= 1 << reg_map[i];
  758. FAIL_IF(push_inst(compiler, push));
  759. /* Stack must be aligned to 8 bytes: */
  760. size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  761. local_size = ((size + local_size + 7) & ~7) - size;
  762. compiler->local_size = local_size;
  763. if (local_size > 0)
  764. FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
  765. if (args >= 1)
  766. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0))));
  767. if (args >= 2)
  768. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1))));
  769. if (args >= 3)
  770. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2))));
  771. return SLJIT_SUCCESS;
  772. }
  773. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
  774. sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
  775. sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
  776. {
  777. sljit_si size;
  778. CHECK_ERROR();
  779. CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
  780. set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
  781. size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  782. compiler->local_size = ((size + local_size + 7) & ~7) - size;
  783. return SLJIT_SUCCESS;
  784. }
  785. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
  786. {
  787. sljit_si i, tmp;
  788. sljit_uw pop;
  789. CHECK_ERROR();
  790. CHECK(check_sljit_emit_return(compiler, op, src, srcw));
  791. FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
  792. if (compiler->local_size > 0)
  793. FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
  794. /* Push saved registers, temporary registers
  795. ldmia sp!, {..., pc} */
  796. pop = POP | (1 << 15);
  797. tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
  798. for (i = SLJIT_S0; i >= tmp; i--)
  799. pop |= 1 << reg_map[i];
  800. for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
  801. pop |= 1 << reg_map[i];
  802. return push_inst(compiler, pop);
  803. }
  804. /* --------------------------------------------------------------------- */
  805. /* Operators */
  806. /* --------------------------------------------------------------------- */
  807. /* s/l - store/load (1 bit)
  808. u/s - signed/unsigned (1 bit)
  809. w/b/h/N - word/byte/half/NOT allowed (2 bit)
  810. It contans 16 items, but not all are different. */
  811. static sljit_sw data_transfer_insts[16] = {
  812. /* s u w */ 0xe5000000 /* str */,
  813. /* s u b */ 0xe5400000 /* strb */,
  814. /* s u h */ 0xe10000b0 /* strh */,
  815. /* s u N */ 0x00000000 /* not allowed */,
  816. /* s s w */ 0xe5000000 /* str */,
  817. /* s s b */ 0xe5400000 /* strb */,
  818. /* s s h */ 0xe10000b0 /* strh */,
  819. /* s s N */ 0x00000000 /* not allowed */,
  820. /* l u w */ 0xe5100000 /* ldr */,
  821. /* l u b */ 0xe5500000 /* ldrb */,
  822. /* l u h */ 0xe11000b0 /* ldrh */,
  823. /* l u N */ 0x00000000 /* not allowed */,
  824. /* l s w */ 0xe5100000 /* ldr */,
  825. /* l s b */ 0xe11000d0 /* ldrsb */,
  826. /* l s h */ 0xe11000f0 /* ldrsh */,
  827. /* l s N */ 0x00000000 /* not allowed */,
  828. };
  829. #define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \
  830. (data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2))
  831. /* Normal ldr/str instruction.
  832. Type2: ldrsb, ldrh, ldrsh */
  833. #define IS_TYPE1_TRANSFER(type) \
  834. (data_transfer_insts[(type) >> 4] & 0x04000000)
  835. #define TYPE2_TRANSFER_IMM(imm) \
  836. (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
  837. /* flags: */
  838. /* Arguments are swapped. */
  839. #define ARGS_SWAPPED 0x01
  840. /* Inverted immediate. */
  841. #define INV_IMM 0x02
  842. /* Source and destination is register. */
  843. #define REG_DEST 0x04
  844. #define REG_SOURCE 0x08
  845. /* One instruction is enough. */
  846. #define FAST_DEST 0x10
  847. /* Multiple instructions are required. */
  848. #define SLOW_DEST 0x20
  849. /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
  850. #define SET_FLAGS (1 << 20)
  851. /* dst: reg
  852. src1: reg
  853. src2: reg or imm (if allowed)
  854. SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
  855. #define SRC2_IMM (1 << 25)
  856. #define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \
  857. return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)))
  858. #define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \
  859. return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2))
  860. #define EMIT_SHIFT_INS_AND_RETURN(opcode) \
  861. SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \
  862. if (compiler->shift_imm != 0x20) { \
  863. SLJIT_ASSERT(src1 == TMP_REG1); \
  864. SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \
  865. if (compiler->shift_imm != 0) \
  866. return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \
  867. return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \
  868. } \
  869. return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1])));
  870. static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
  871. sljit_si dst, sljit_si src1, sljit_si src2)
  872. {
  873. sljit_sw mul_inst;
  874. switch (GET_OPCODE(op)) {
  875. case SLJIT_MOV:
  876. SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
  877. if (dst != src2) {
  878. if (src2 & SRC2_IMM) {
  879. if (flags & INV_IMM)
  880. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
  881. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
  882. }
  883. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]);
  884. }
  885. return SLJIT_SUCCESS;
  886. case SLJIT_MOV_UB:
  887. case SLJIT_MOV_SB:
  888. SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
  889. if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
  890. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  891. if (op == SLJIT_MOV_UB)
  892. return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
  893. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])));
  894. return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst]));
  895. #else
  896. return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2));
  897. #endif
  898. }
  899. else if (dst != src2) {
  900. SLJIT_ASSERT(src2 & SRC2_IMM);
  901. if (flags & INV_IMM)
  902. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
  903. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
  904. }
  905. return SLJIT_SUCCESS;
  906. case SLJIT_MOV_UH:
  907. case SLJIT_MOV_SH:
  908. SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
  909. if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
  910. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  911. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])));
  912. return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst]));
  913. #else
  914. return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2));
  915. #endif
  916. }
  917. else if (dst != src2) {
  918. SLJIT_ASSERT(src2 & SRC2_IMM);
  919. if (flags & INV_IMM)
  920. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
  921. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
  922. }
  923. return SLJIT_SUCCESS;
  924. case SLJIT_NOT:
  925. if (src2 & SRC2_IMM) {
  926. if (flags & INV_IMM)
  927. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
  928. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
  929. }
  930. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2));
  931. case SLJIT_CLZ:
  932. SLJIT_ASSERT(!(flags & INV_IMM));
  933. SLJIT_ASSERT(!(src2 & SRC2_IMM));
  934. FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
  935. if (flags & SET_FLAGS)
  936. EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM);
  937. return SLJIT_SUCCESS;
  938. case SLJIT_ADD:
  939. SLJIT_ASSERT(!(flags & INV_IMM));
  940. EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP);
  941. case SLJIT_ADDC:
  942. SLJIT_ASSERT(!(flags & INV_IMM));
  943. EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP);
  944. case SLJIT_SUB:
  945. SLJIT_ASSERT(!(flags & INV_IMM));
  946. if (!(flags & ARGS_SWAPPED))
  947. EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP);
  948. EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP);
  949. case SLJIT_SUBC:
  950. SLJIT_ASSERT(!(flags & INV_IMM));
  951. if (!(flags & ARGS_SWAPPED))
  952. EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP);
  953. EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP);
  954. case SLJIT_MUL:
  955. SLJIT_ASSERT(!(flags & INV_IMM));
  956. SLJIT_ASSERT(!(src2 & SRC2_IMM));
  957. if (SLJIT_UNLIKELY(op & SLJIT_SET_O))
  958. mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12);
  959. else
  960. mul_inst = MUL | (reg_map[dst] << 16);
  961. if (dst != src2)
  962. FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2]));
  963. else if (dst != src1)
  964. FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1]));
  965. else {
  966. /* Rm and Rd must not be the same register. */
  967. SLJIT_ASSERT(dst != TMP_REG1);
  968. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2])));
  969. FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1]));
  970. }
  971. if (!(op & SLJIT_SET_O))
  972. return SLJIT_SUCCESS;
  973. /* We need to use TMP_REG3. */
  974. compiler->cache_arg = 0;
  975. compiler->cache_argw = 0;
  976. /* cmp TMP_REG2, dst asr #31. */
  977. return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0));
  978. case SLJIT_AND:
  979. if (!(flags & INV_IMM))
  980. EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP);
  981. EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP);
  982. case SLJIT_OR:
  983. SLJIT_ASSERT(!(flags & INV_IMM));
  984. EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP);
  985. case SLJIT_XOR:
  986. SLJIT_ASSERT(!(flags & INV_IMM));
  987. EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP);
  988. case SLJIT_SHL:
  989. EMIT_SHIFT_INS_AND_RETURN(0);
  990. case SLJIT_LSHR:
  991. EMIT_SHIFT_INS_AND_RETURN(1);
  992. case SLJIT_ASHR:
  993. EMIT_SHIFT_INS_AND_RETURN(2);
  994. }
  995. SLJIT_ASSERT_STOP();
  996. return SLJIT_SUCCESS;
  997. }
  998. #undef EMIT_DATA_PROCESS_INS_AND_RETURN
  999. #undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN
  1000. #undef EMIT_SHIFT_INS_AND_RETURN
  1001. /* Tests whether the immediate can be stored in the 12 bit imm field.
  1002. Returns with 0 if not possible. */
  1003. static sljit_uw get_imm(sljit_uw imm)
  1004. {
  1005. sljit_si rol;
  1006. if (imm <= 0xff)
  1007. return SRC2_IMM | imm;
  1008. if (!(imm & 0xff000000)) {
  1009. imm <<= 8;
  1010. rol = 8;
  1011. }
  1012. else {
  1013. imm = (imm << 24) | (imm >> 8);
  1014. rol = 0;
  1015. }
  1016. if (!(imm & 0xff000000)) {
  1017. imm <<= 8;
  1018. rol += 4;
  1019. }
  1020. if (!(imm & 0xf0000000)) {
  1021. imm <<= 4;
  1022. rol += 2;
  1023. }
  1024. if (!(imm & 0xc0000000)) {
  1025. imm <<= 2;
  1026. rol += 1;
  1027. }
  1028. if (!(imm & 0x00ffffff))
  1029. return SRC2_IMM | (imm >> 24) | (rol << 8);
  1030. else
  1031. return 0;
  1032. }
  1033. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  1034. static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm, sljit_si positive)
  1035. {
  1036. sljit_uw mask;
  1037. sljit_uw imm1;
  1038. sljit_uw imm2;
  1039. sljit_si rol;
  1040. /* Step1: Search a zero byte (8 continous zero bit). */
  1041. mask = 0xff000000;
  1042. rol = 8;
  1043. while(1) {
  1044. if (!(imm & mask)) {
  1045. /* Rol imm by rol. */
  1046. imm = (imm << rol) | (imm >> (32 - rol));
  1047. /* Calculate arm rol. */
  1048. rol = 4 + (rol >> 1);
  1049. break;
  1050. }
  1051. rol += 2;
  1052. mask >>= 2;
  1053. if (mask & 0x3) {
  1054. /* rol by 8. */
  1055. imm = (imm << 8) | (imm >> 24);
  1056. mask = 0xff00;
  1057. rol = 24;
  1058. while (1) {
  1059. if (!(imm & mask)) {
  1060. /* Rol imm by rol. */
  1061. imm = (imm << rol) | (imm >> (32 - rol));
  1062. /* Calculate arm rol. */
  1063. rol = (rol >> 1) - 8;
  1064. break;
  1065. }
  1066. rol += 2;
  1067. mask >>= 2;
  1068. if (mask & 0x3)
  1069. return 0;
  1070. }
  1071. break;
  1072. }
  1073. }
  1074. /* The low 8 bit must be zero. */
  1075. SLJIT_ASSERT(!(imm & 0xff));
  1076. if (!(imm & 0xff000000)) {
  1077. imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
  1078. imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
  1079. }
  1080. else if (imm & 0xc0000000) {
  1081. imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
  1082. imm <<= 8;
  1083. rol += 4;
  1084. if (!(imm & 0xff000000)) {
  1085. imm <<= 8;
  1086. rol += 4;
  1087. }
  1088. if (!(imm & 0xf0000000)) {
  1089. imm <<= 4;
  1090. rol += 2;
  1091. }
  1092. if (!(imm & 0xc0000000)) {
  1093. imm <<= 2;
  1094. rol += 1;
  1095. }
  1096. if (!(imm & 0x00ffffff))
  1097. imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
  1098. else
  1099. return 0;
  1100. }
  1101. else {
  1102. if (!(imm & 0xf0000000)) {
  1103. imm <<= 4;
  1104. rol += 2;
  1105. }
  1106. if (!(imm & 0xc0000000)) {
  1107. imm <<= 2;
  1108. rol += 1;
  1109. }
  1110. imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
  1111. imm <<= 8;
  1112. rol += 4;
  1113. if (!(imm & 0xf0000000)) {
  1114. imm <<= 4;
  1115. rol += 2;
  1116. }
  1117. if (!(imm & 0xc0000000)) {
  1118. imm <<= 2;
  1119. rol += 1;
  1120. }
  1121. if (!(imm & 0x00ffffff))
  1122. imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
  1123. else
  1124. return 0;
  1125. }
  1126. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)));
  1127. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)));
  1128. return 1;
  1129. }
  1130. #endif
  1131. static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm)
  1132. {
  1133. sljit_uw tmp;
  1134. #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
  1135. if (!(imm & ~0xffff))
  1136. return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
  1137. #endif
  1138. /* Create imm by 1 inst. */
  1139. tmp = get_imm(imm);
  1140. if (tmp)
  1141. return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
  1142. tmp = get_imm(~imm);
  1143. if (tmp)
  1144. return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
  1145. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  1146. /* Create imm by 2 inst. */
  1147. FAIL_IF(generate_int(compiler, reg, imm, 1));
  1148. FAIL_IF(generate_int(compiler, reg, ~imm, 0));
  1149. /* Load integer. */
  1150. return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm);
  1151. #else
  1152. return emit_imm(compiler, reg, imm);
  1153. #endif
  1154. }
  1155. /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
  1156. static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value)
  1157. {
  1158. if (value >= 0) {
  1159. value = get_imm(value);
  1160. if (value)
  1161. return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, dst, reg, value));
  1162. }
  1163. else {
  1164. value = get_imm(-value);
  1165. if (value)
  1166. return push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, dst, reg, value));
  1167. }
  1168. return SLJIT_ERR_UNSUPPORTED;
  1169. }
  1170. /* Can perform an operation using at most 1 instruction. */
  1171. static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw)
  1172. {
  1173. sljit_uw imm;
  1174. if (arg & SLJIT_IMM) {
  1175. imm = get_imm(argw);
  1176. if (imm) {
  1177. if (inp_flags & ARG_TEST)
  1178. return 1;
  1179. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)));
  1180. return -1;
  1181. }
  1182. imm = get_imm(~argw);
  1183. if (imm) {
  1184. if (inp_flags & ARG_TEST)
  1185. return 1;
  1186. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)));
  1187. return -1;
  1188. }
  1189. return 0;
  1190. }
  1191. SLJIT_ASSERT(arg & SLJIT_MEM);
  1192. /* Fast loads/stores. */
  1193. if (!(arg & REG_MASK))
  1194. return 0;
  1195. if (arg & OFFS_REG_MASK) {
  1196. if ((argw & 0x3) != 0 && !IS_TYPE1_TRANSFER(inp_flags))
  1197. return 0;
  1198. if (inp_flags & ARG_TEST)
  1199. return 1;
  1200. FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
  1201. RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))));
  1202. return -1;
  1203. }
  1204. if (IS_TYPE1_TRANSFER(inp_flags)) {
  1205. if (argw >= 0 && argw <= 0xfff) {
  1206. if (inp_flags & ARG_TEST)
  1207. return 1;
  1208. FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw)));
  1209. return -1;
  1210. }
  1211. if (argw < 0 && argw >= -0xfff) {
  1212. if (inp_flags & ARG_TEST)
  1213. return 1;
  1214. FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw)));
  1215. return -1;
  1216. }
  1217. }
  1218. else {
  1219. if (argw >= 0 && argw <= 0xff) {
  1220. if (inp_flags & ARG_TEST)
  1221. return 1;
  1222. FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
  1223. return -1;
  1224. }
  1225. if (argw < 0 && argw >= -0xff) {
  1226. if (inp_flags & ARG_TEST)
  1227. return 1;
  1228. argw = -argw;
  1229. FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
  1230. return -1;
  1231. }
  1232. }
  1233. return 0;
  1234. }
  1235. /* See getput_arg below.
  1236. Note: can_cache is called only for binary operators. Those
  1237. operators always uses word arguments without write back. */
  1238. static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
  1239. {
  1240. /* Immediate caching is not supported as it would be an operation on constant arguments. */
  1241. if (arg & SLJIT_IMM)
  1242. return 0;
  1243. /* Always a simple operation. */
  1244. if (arg & OFFS_REG_MASK)
  1245. return 0;
  1246. if (!(arg & REG_MASK)) {
  1247. /* Immediate access. */
  1248. if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
  1249. return 1;
  1250. return 0;
  1251. }
  1252. if (argw <= 0xfffff && argw >= -0xfffff)
  1253. return 0;
  1254. if (argw == next_argw && (next_arg & SLJIT_MEM))
  1255. return 1;
  1256. if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
  1257. return 1;
  1258. return 0;
  1259. }
  1260. #define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \
  1261. if (max_delta & 0xf00) \
  1262. FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \
  1263. else \
  1264. FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm))));
  1265. #define TEST_WRITE_BACK() \
  1266. if (inp_flags & WRITE_BACK) { \
  1267. tmp_r = arg & REG_MASK; \
  1268. if (reg == tmp_r) { \
  1269. /* This can only happen for stores */ \
  1270. /* since ldr reg, [reg, ...]! has no meaning */ \
  1271. SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \
  1272. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \
  1273. reg = TMP_REG3; \
  1274. } \
  1275. }
  1276. /* Emit the necessary instructions. See can_cache above. */
  1277. static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
  1278. {
  1279. sljit_si tmp_r;
  1280. sljit_sw max_delta;
  1281. sljit_sw sign;
  1282. sljit_uw imm;
  1283. if (arg & SLJIT_IMM) {
  1284. SLJIT_ASSERT(inp_flags & LOAD_DATA);
  1285. return load_immediate(compiler, reg, argw);
  1286. }
  1287. SLJIT_ASSERT(arg & SLJIT_MEM);
  1288. tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3;
  1289. max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff;
  1290. if ((arg & REG_MASK) == SLJIT_UNUSED) {
  1291. /* Write back is not used. */
  1292. imm = (sljit_uw)(argw - compiler->cache_argw);
  1293. if ((compiler->cache_arg & SLJIT_IMM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
  1294. if (imm <= (sljit_uw)max_delta) {
  1295. sign = 1;
  1296. argw = argw - compiler->cache_argw;
  1297. }
  1298. else {
  1299. sign = 0;
  1300. argw = compiler->cache_argw - argw;
  1301. }
  1302. GETPUT_ARG_DATA_TRANSFER(sign, 0, reg, TMP_REG3, argw);
  1303. return SLJIT_SUCCESS;
  1304. }
  1305. /* With write back, we can create some sophisticated loads, but
  1306. it is hard to decide whether we should convert downward (0s) or upward (1s). */
  1307. imm = (sljit_uw)(argw - next_argw);
  1308. if ((next_arg & SLJIT_MEM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
  1309. SLJIT_ASSERT(inp_flags & LOAD_DATA);
  1310. compiler->cache_arg = SLJIT_IMM;
  1311. compiler->cache_argw = argw;
  1312. tmp_r = TMP_REG3;
  1313. }
  1314. FAIL_IF(load_immediate(compiler, tmp_r, argw));
  1315. GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0);
  1316. return SLJIT_SUCCESS;
  1317. }
  1318. if (arg & OFFS_REG_MASK) {
  1319. SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00));
  1320. if (inp_flags & WRITE_BACK)
  1321. tmp_r = arg & REG_MASK;
  1322. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
  1323. return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
  1324. }
  1325. imm = (sljit_uw)(argw - compiler->cache_argw);
  1326. if (compiler->cache_arg == arg && imm <= (sljit_uw)max_delta) {
  1327. SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
  1328. GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, imm);
  1329. return SLJIT_SUCCESS;
  1330. }
  1331. if (compiler->cache_arg == arg && imm >= (sljit_uw)-max_delta) {
  1332. SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
  1333. imm = (sljit_uw)-(sljit_sw)imm;
  1334. GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, imm);
  1335. return SLJIT_SUCCESS;
  1336. }
  1337. imm = get_imm(argw & ~max_delta);
  1338. if (imm) {
  1339. TEST_WRITE_BACK();
  1340. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm)));
  1341. GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
  1342. return SLJIT_SUCCESS;
  1343. }
  1344. imm = get_imm(-argw & ~max_delta);
  1345. if (imm) {
  1346. argw = -argw;
  1347. TEST_WRITE_BACK();
  1348. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm)));
  1349. GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
  1350. return SLJIT_SUCCESS;
  1351. }
  1352. if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) {
  1353. TEST_WRITE_BACK();
  1354. return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
  1355. }
  1356. if (argw == next_argw && (next_arg & SLJIT_MEM)) {
  1357. SLJIT_ASSERT(inp_flags & LOAD_DATA);
  1358. FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
  1359. compiler->cache_arg = SLJIT_IMM;
  1360. compiler->cache_argw = argw;
  1361. TEST_WRITE_BACK();
  1362. return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
  1363. }
  1364. imm = (sljit_uw)(argw - next_argw);
  1365. if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
  1366. SLJIT_ASSERT(inp_flags & LOAD_DATA);
  1367. FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
  1368. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK])));
  1369. compiler->cache_arg = arg;
  1370. compiler->cache_argw = argw;
  1371. GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0);
  1372. return SLJIT_SUCCESS;
  1373. }
  1374. if ((arg & REG_MASK) == tmp_r) {
  1375. compiler->cache_arg = SLJIT_IMM;
  1376. compiler->cache_argw = argw;
  1377. tmp_r = TMP_REG3;
  1378. }
  1379. FAIL_IF(load_immediate(compiler, tmp_r, argw));
  1380. return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
  1381. }
  1382. static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
  1383. {
  1384. if (getput_arg_fast(compiler, flags, reg, arg, argw))
  1385. return compiler->error;
  1386. compiler->cache_arg = 0;
  1387. compiler->cache_argw = 0;
  1388. return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
  1389. }
  1390. static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
  1391. {
  1392. if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
  1393. return compiler->error;
  1394. return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
  1395. }
  1396. static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags,
  1397. sljit_si dst, sljit_sw dstw,
  1398. sljit_si src1, sljit_sw src1w,
  1399. sljit_si src2, sljit_sw src2w)
  1400. {
  1401. /* arg1 goes to TMP_REG1 or src reg
  1402. arg2 goes to TMP_REG2, imm or src reg
  1403. TMP_REG3 can be used for caching
  1404. result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
  1405. /* We prefers register and simple consts. */
  1406. sljit_si dst_r;
  1407. sljit_si src1_r;
  1408. sljit_si src2_r = 0;
  1409. sljit_si sugg_src2_r = TMP_REG2;
  1410. sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0;
  1411. compiler->cache_arg = 0;
  1412. compiler->cache_argw = 0;
  1413. /* Destination check. */
  1414. if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
  1415. if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
  1416. return SLJIT_SUCCESS;
  1417. dst_r = TMP_REG2;
  1418. }
  1419. else if (FAST_IS_REG(dst)) {
  1420. dst_r = dst;
  1421. flags |= REG_DEST;
  1422. if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
  1423. sugg_src2_r = dst_r;
  1424. }
  1425. else {
  1426. SLJIT_ASSERT(dst & SLJIT_MEM);
  1427. if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
  1428. flags |= FAST_DEST;
  1429. dst_r = TMP_REG2;
  1430. }
  1431. else {
  1432. flags |= SLOW_DEST;
  1433. dst_r = 0;
  1434. }
  1435. }
  1436. /* Source 1. */
  1437. if (FAST_IS_REG(src1))
  1438. src1_r = src1;
  1439. else if (FAST_IS_REG(src2)) {
  1440. flags |= ARGS_SWAPPED;
  1441. src1_r = src2;
  1442. src2 = src1;
  1443. src2w = src1w;
  1444. }
  1445. else do { /* do { } while(0) is used because of breaks. */
  1446. src1_r = 0;
  1447. if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) {
  1448. /* The second check will generate a hit. */
  1449. src2_r = get_imm(src1w);
  1450. if (src2_r) {
  1451. flags |= ARGS_SWAPPED;
  1452. src1 = src2;
  1453. src1w = src2w;
  1454. break;
  1455. }
  1456. if (inp_flags & ALLOW_INV_IMM) {
  1457. src2_r = get_imm(~src1w);
  1458. if (src2_r) {
  1459. flags |= ARGS_SWAPPED | INV_IMM;
  1460. src1 = src2;
  1461. src1w = src2w;
  1462. break;
  1463. }
  1464. }
  1465. if (GET_OPCODE(op) == SLJIT_ADD) {
  1466. src2_r = get_imm(-src1w);
  1467. if (src2_r) {
  1468. /* Note: ARGS_SWAPPED is intentionally not applied! */
  1469. src1 = src2;
  1470. src1w = src2w;
  1471. op = SLJIT_SUB | GET_ALL_FLAGS(op);
  1472. break;
  1473. }
  1474. }
  1475. }
  1476. if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
  1477. FAIL_IF(compiler->error);
  1478. src1_r = TMP_REG1;
  1479. }
  1480. } while (0);
  1481. /* Source 2. */
  1482. if (src2_r == 0) {
  1483. if (FAST_IS_REG(src2)) {
  1484. src2_r = src2;
  1485. flags |= REG_SOURCE;
  1486. if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
  1487. dst_r = src2_r;
  1488. }
  1489. else do { /* do { } while(0) is used because of breaks. */
  1490. if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) {
  1491. src2_r = get_imm(src2w);
  1492. if (src2_r)
  1493. break;
  1494. if (inp_flags & ALLOW_INV_IMM) {
  1495. src2_r = get_imm(~src2w);
  1496. if (src2_r) {
  1497. flags |= INV_IMM;
  1498. break;
  1499. }
  1500. }
  1501. if (GET_OPCODE(op) == SLJIT_ADD) {
  1502. src2_r = get_imm(-src2w);
  1503. if (src2_r) {
  1504. op = SLJIT_SUB | GET_ALL_FLAGS(op);
  1505. flags &= ~ARGS_SWAPPED;
  1506. break;
  1507. }
  1508. }
  1509. if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) {
  1510. src2_r = get_imm(-src2w);
  1511. if (src2_r) {
  1512. op = SLJIT_ADD | GET_ALL_FLAGS(op);
  1513. flags &= ~ARGS_SWAPPED;
  1514. break;
  1515. }
  1516. }
  1517. }
  1518. /* src2_r is 0. */
  1519. if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
  1520. FAIL_IF(compiler->error);
  1521. src2_r = sugg_src2_r;
  1522. }
  1523. } while (0);
  1524. }
  1525. /* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero.
  1526. If they are zero, they must not be registers. */
  1527. if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
  1528. if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
  1529. SLJIT_ASSERT(!(flags & ARGS_SWAPPED));
  1530. flags |= ARGS_SWAPPED;
  1531. FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w));
  1532. FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw));
  1533. }
  1534. else {
  1535. FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
  1536. FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
  1537. }
  1538. src1_r = TMP_REG1;
  1539. src2_r = TMP_REG2;
  1540. }
  1541. else if (src1_r == 0 && src2_r == 0) {
  1542. FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
  1543. src1_r = TMP_REG1;
  1544. }
  1545. else if (src1_r == 0 && dst_r == 0) {
  1546. FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
  1547. src1_r = TMP_REG1;
  1548. }
  1549. else if (src2_r == 0 && dst_r == 0) {
  1550. FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
  1551. src2_r = sugg_src2_r;
  1552. }
  1553. if (dst_r == 0)
  1554. dst_r = TMP_REG2;
  1555. if (src1_r == 0) {
  1556. FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
  1557. src1_r = TMP_REG1;
  1558. }
  1559. if (src2_r == 0) {
  1560. FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
  1561. src2_r = sugg_src2_r;
  1562. }
  1563. FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
  1564. if (flags & (FAST_DEST | SLOW_DEST)) {
  1565. if (flags & FAST_DEST)
  1566. FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw));
  1567. else
  1568. FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0));
  1569. }
  1570. return SLJIT_SUCCESS;
  1571. }
  1572. #ifdef __cplusplus
  1573. extern "C" {
  1574. #endif
  1575. #if defined(__GNUC__)
  1576. extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
  1577. extern int __aeabi_idivmod(int numerator, int denominator);
  1578. #else
  1579. #error "Software divmod functions are needed"
  1580. #endif
  1581. #ifdef __cplusplus
  1582. }
  1583. #endif
  1584. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
  1585. {
  1586. CHECK_ERROR();
  1587. CHECK(check_sljit_emit_op0(compiler, op));
  1588. op = GET_OPCODE(op);
  1589. switch (op) {
  1590. case SLJIT_BREAKPOINT:
  1591. FAIL_IF(push_inst(compiler, BKPT));
  1592. break;
  1593. case SLJIT_NOP:
  1594. FAIL_IF(push_inst(compiler, NOP));
  1595. break;
  1596. case SLJIT_LUMUL:
  1597. case SLJIT_LSMUL:
  1598. #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
  1599. return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL)
  1600. | (reg_map[SLJIT_R1] << 16)
  1601. | (reg_map[SLJIT_R0] << 12)
  1602. | (reg_map[SLJIT_R0] << 8)
  1603. | reg_map[SLJIT_R1]);
  1604. #else
  1605. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1))));
  1606. return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL)
  1607. | (reg_map[SLJIT_R1] << 16)
  1608. | (reg_map[SLJIT_R0] << 12)
  1609. | (reg_map[SLJIT_R0] << 8)
  1610. | reg_map[TMP_REG1]);
  1611. #endif
  1612. case SLJIT_UDIVMOD:
  1613. case SLJIT_SDIVMOD:
  1614. case SLJIT_UDIVI:
  1615. case SLJIT_SDIVI:
  1616. SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
  1617. SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2, bad_register_mapping);
  1618. if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) {
  1619. FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */));
  1620. FAIL_IF(push_inst(compiler, 0xe58d1004 /* str r1, [sp, #4] */));
  1621. }
  1622. else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3))
  1623. FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */));
  1624. #if defined(__GNUC__)
  1625. FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
  1626. ((op | 0x2) == SLJIT_UDIVI ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
  1627. #else
  1628. #error "Software divmod functions are needed"
  1629. #endif
  1630. if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) {
  1631. FAIL_IF(push_inst(compiler, 0xe59d1004 /* ldr r1, [sp, #4] */));
  1632. FAIL_IF(push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */));
  1633. }
  1634. else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3))
  1635. return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */);
  1636. return SLJIT_SUCCESS;
  1637. }
  1638. return SLJIT_SUCCESS;
  1639. }
  1640. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
  1641. sljit_si dst, sljit_sw dstw,
  1642. sljit_si src, sljit_sw srcw)
  1643. {
  1644. CHECK_ERROR();
  1645. CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
  1646. ADJUST_LOCAL_OFFSET(dst, dstw);
  1647. ADJUST_LOCAL_OFFSET(src, srcw);
  1648. switch (GET_OPCODE(op)) {
  1649. case SLJIT_MOV:
  1650. case SLJIT_MOV_UI:
  1651. case SLJIT_MOV_SI:
  1652. case SLJIT_MOV_P:
  1653. return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
  1654. case SLJIT_MOV_UB:
  1655. return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
  1656. case SLJIT_MOV_SB:
  1657. return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
  1658. case SLJIT_MOV_UH:
  1659. return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
  1660. case SLJIT_MOV_SH:
  1661. return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
  1662. case SLJIT_MOVU:
  1663. case SLJIT_MOVU_UI:
  1664. case SLJIT_MOVU_SI:
  1665. case SLJIT_MOVU_P:
  1666. return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
  1667. case SLJIT_MOVU_UB:
  1668. return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
  1669. case SLJIT_MOVU_SB:
  1670. return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
  1671. case SLJIT_MOVU_UH:
  1672. return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
  1673. case SLJIT_MOVU_SH:
  1674. return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
  1675. case SLJIT_NOT:
  1676. return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
  1677. case SLJIT_NEG:
  1678. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  1679. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  1680. compiler->skip_checks = 1;
  1681. #endif
  1682. return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);
  1683. case SLJIT_CLZ:
  1684. return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
  1685. }
  1686. return SLJIT_SUCCESS;
  1687. }
  1688. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
  1689. sljit_si dst, sljit_sw dstw,
  1690. sljit_si src1, sljit_sw src1w,
  1691. sljit_si src2, sljit_sw src2w)
  1692. {
  1693. CHECK_ERROR();
  1694. CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
  1695. ADJUST_LOCAL_OFFSET(dst, dstw);
  1696. ADJUST_LOCAL_OFFSET(src1, src1w);
  1697. ADJUST_LOCAL_OFFSET(src2, src2w);
  1698. switch (GET_OPCODE(op)) {
  1699. case SLJIT_ADD:
  1700. case SLJIT_ADDC:
  1701. case SLJIT_SUB:
  1702. case SLJIT_SUBC:
  1703. case SLJIT_OR:
  1704. case SLJIT_XOR:
  1705. return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w);
  1706. case SLJIT_MUL:
  1707. return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
  1708. case SLJIT_AND:
  1709. return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
  1710. case SLJIT_SHL:
  1711. case SLJIT_LSHR:
  1712. case SLJIT_ASHR:
  1713. if (src2 & SLJIT_IMM) {
  1714. compiler->shift_imm = src2w & 0x1f;
  1715. return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
  1716. }
  1717. else {
  1718. compiler->shift_imm = 0x20;
  1719. return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
  1720. }
  1721. }
  1722. return SLJIT_SUCCESS;
  1723. }
  1724. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
  1725. {
  1726. CHECK_REG_INDEX(check_sljit_get_register_index(reg));
  1727. return reg_map[reg];
  1728. }
  1729. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
  1730. {
  1731. CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
  1732. return reg << 1;
  1733. }
  1734. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
  1735. void *instruction, sljit_si size)
  1736. {
  1737. CHECK_ERROR();
  1738. CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
  1739. return push_inst(compiler, *(sljit_uw*)instruction);
  1740. }
  1741. /* --------------------------------------------------------------------- */
  1742. /* Floating point operators */
  1743. /* --------------------------------------------------------------------- */
  1744. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  1745. /* 0 - no fpu
  1746. 1 - vfp */
  1747. static sljit_si arm_fpu_type = -1;
  1748. static void init_compiler(void)
  1749. {
  1750. if (arm_fpu_type != -1)
  1751. return;
  1752. /* TODO: Only the OS can help to determine the correct fpu type. */
  1753. arm_fpu_type = 1;
  1754. }
  1755. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
  1756. {
  1757. #ifdef SLJIT_IS_FPU_AVAILABLE
  1758. return SLJIT_IS_FPU_AVAILABLE;
  1759. #else
  1760. if (arm_fpu_type == -1)
  1761. init_compiler();
  1762. return arm_fpu_type;
  1763. #endif
  1764. }
  1765. #else
  1766. #define arm_fpu_type 1
  1767. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
  1768. {
  1769. /* Always available. */
  1770. return 1;
  1771. }
  1772. #endif
  1773. #define FPU_LOAD (1 << 20)
  1774. #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
  1775. ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs))
  1776. #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
  1777. ((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16))
  1778. static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
  1779. {
  1780. sljit_sw tmp;
  1781. sljit_uw imm;
  1782. sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD));
  1783. SLJIT_ASSERT(arg & SLJIT_MEM);
  1784. if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
  1785. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
  1786. arg = SLJIT_MEM | TMP_REG1;
  1787. argw = 0;
  1788. }
  1789. /* Fast loads and stores. */
  1790. if ((arg & REG_MASK)) {
  1791. if (!(argw & ~0x3fc))
  1792. return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
  1793. if (!(-argw & ~0x3fc))
  1794. return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
  1795. }
  1796. if (compiler->cache_arg == arg) {
  1797. tmp = argw - compiler->cache_argw;
  1798. if (!(tmp & ~0x3fc))
  1799. return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, tmp >> 2));
  1800. if (!(-tmp & ~0x3fc))
  1801. return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG3, reg, -tmp >> 2));
  1802. if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
  1803. FAIL_IF(compiler->error);
  1804. compiler->cache_argw = argw;
  1805. return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
  1806. }
  1807. }
  1808. if (arg & REG_MASK) {
  1809. if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
  1810. FAIL_IF(compiler->error);
  1811. return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0));
  1812. }
  1813. imm = get_imm(argw & ~0x3fc);
  1814. if (imm) {
  1815. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
  1816. return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
  1817. }
  1818. imm = get_imm(-argw & ~0x3fc);
  1819. if (imm) {
  1820. argw = -argw;
  1821. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
  1822. return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
  1823. }
  1824. }
  1825. compiler->cache_arg = arg;
  1826. compiler->cache_argw = argw;
  1827. if (arg & REG_MASK) {
  1828. FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
  1829. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1])));
  1830. }
  1831. else
  1832. FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
  1833. return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
  1834. }
  1835. static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
  1836. sljit_si dst, sljit_sw dstw,
  1837. sljit_si src, sljit_sw srcw)
  1838. {
  1839. if (src & SLJIT_MEM) {
  1840. FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
  1841. src = TMP_FREG1;
  1842. }
  1843. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_SINGLE_OP, TMP_FREG1, src, 0)));
  1844. if (dst == SLJIT_UNUSED)
  1845. return SLJIT_SUCCESS;
  1846. if (FAST_IS_REG(dst))
  1847. return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16));
  1848. /* Store the integer value from a VFP register. */
  1849. return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
  1850. }
  1851. static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
  1852. sljit_si dst, sljit_sw dstw,
  1853. sljit_si src, sljit_sw srcw)
  1854. {
  1855. sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
  1856. if (FAST_IS_REG(src))
  1857. FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16)));
  1858. else if (src & SLJIT_MEM) {
  1859. /* Load the integer value into a VFP register. */
  1860. FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
  1861. }
  1862. else {
  1863. FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
  1864. FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16)));
  1865. }
  1866. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_SINGLE_OP, dst_r, TMP_FREG1, 0)));
  1867. if (dst & SLJIT_MEM)
  1868. return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
  1869. return SLJIT_SUCCESS;
  1870. }
  1871. static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
  1872. sljit_si src1, sljit_sw src1w,
  1873. sljit_si src2, sljit_sw src2w)
  1874. {
  1875. if (src1 & SLJIT_MEM) {
  1876. FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
  1877. src1 = TMP_FREG1;
  1878. }
  1879. if (src2 & SLJIT_MEM) {
  1880. FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
  1881. src2 = TMP_FREG2;
  1882. }
  1883. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, src1, src2, 0)));
  1884. return push_inst(compiler, VMRS);
  1885. }
  1886. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
  1887. sljit_si dst, sljit_sw dstw,
  1888. sljit_si src, sljit_sw srcw)
  1889. {
  1890. sljit_si dst_r;
  1891. CHECK_ERROR();
  1892. compiler->cache_arg = 0;
  1893. compiler->cache_argw = 0;
  1894. if (GET_OPCODE(op) != SLJIT_CONVD_FROMS)
  1895. op ^= SLJIT_SINGLE_OP;
  1896. SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
  1897. SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
  1898. dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
  1899. if (src & SLJIT_MEM) {
  1900. FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw));
  1901. src = dst_r;
  1902. }
  1903. switch (GET_OPCODE(op)) {
  1904. case SLJIT_DMOV:
  1905. if (src != dst_r) {
  1906. if (dst_r != TMP_FREG1)
  1907. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
  1908. else
  1909. dst_r = src;
  1910. }
  1911. break;
  1912. case SLJIT_DNEG:
  1913. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
  1914. break;
  1915. case SLJIT_DABS:
  1916. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
  1917. break;
  1918. case SLJIT_CONVD_FROMS:
  1919. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
  1920. op ^= SLJIT_SINGLE_OP;
  1921. break;
  1922. }
  1923. if (dst & SLJIT_MEM)
  1924. return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw);
  1925. return SLJIT_SUCCESS;
  1926. }
  1927. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
  1928. sljit_si dst, sljit_sw dstw,
  1929. sljit_si src1, sljit_sw src1w,
  1930. sljit_si src2, sljit_sw src2w)
  1931. {
  1932. sljit_si dst_r;
  1933. CHECK_ERROR();
  1934. CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
  1935. ADJUST_LOCAL_OFFSET(dst, dstw);
  1936. ADJUST_LOCAL_OFFSET(src1, src1w);
  1937. ADJUST_LOCAL_OFFSET(src2, src2w);
  1938. compiler->cache_arg = 0;
  1939. compiler->cache_argw = 0;
  1940. op ^= SLJIT_SINGLE_OP;
  1941. dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
  1942. if (src2 & SLJIT_MEM) {
  1943. FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
  1944. src2 = TMP_FREG2;
  1945. }
  1946. if (src1 & SLJIT_MEM) {
  1947. FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
  1948. src1 = TMP_FREG1;
  1949. }
  1950. switch (GET_OPCODE(op)) {
  1951. case SLJIT_DADD:
  1952. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
  1953. break;
  1954. case SLJIT_DSUB:
  1955. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
  1956. break;
  1957. case SLJIT_DMUL:
  1958. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
  1959. break;
  1960. case SLJIT_DDIV:
  1961. FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
  1962. break;
  1963. }
  1964. if (dst_r == TMP_FREG1)
  1965. FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw));
  1966. return SLJIT_SUCCESS;
  1967. }
  1968. #undef FPU_LOAD
  1969. #undef EMIT_FPU_DATA_TRANSFER
  1970. #undef EMIT_FPU_OPERATION
  1971. /* --------------------------------------------------------------------- */
  1972. /* Other instructions */
  1973. /* --------------------------------------------------------------------- */
  1974. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
  1975. {
  1976. CHECK_ERROR();
  1977. CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
  1978. ADJUST_LOCAL_OFFSET(dst, dstw);
  1979. /* For UNUSED dst. Uncommon, but possible. */
  1980. if (dst == SLJIT_UNUSED)
  1981. return SLJIT_SUCCESS;
  1982. if (FAST_IS_REG(dst))
  1983. return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3)));
  1984. /* Memory. */
  1985. if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw))
  1986. return compiler->error;
  1987. /* TMP_REG3 is used for caching. */
  1988. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))));
  1989. compiler->cache_arg = 0;
  1990. compiler->cache_argw = 0;
  1991. return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
  1992. }
  1993. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
  1994. {
  1995. CHECK_ERROR();
  1996. CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
  1997. ADJUST_LOCAL_OFFSET(src, srcw);
  1998. if (FAST_IS_REG(src))
  1999. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))));
  2000. else if (src & SLJIT_MEM) {
  2001. if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw))
  2002. FAIL_IF(compiler->error);
  2003. else {
  2004. compiler->cache_arg = 0;
  2005. compiler->cache_argw = 0;
  2006. FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0));
  2007. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))));
  2008. }
  2009. }
  2010. else if (src & SLJIT_IMM)
  2011. FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
  2012. return push_inst(compiler, BLX | RM(TMP_REG3));
  2013. }
  2014. /* --------------------------------------------------------------------- */
  2015. /* Conditional instructions */
  2016. /* --------------------------------------------------------------------- */
  2017. static sljit_uw get_cc(sljit_si type)
  2018. {
  2019. switch (type) {
  2020. case SLJIT_EQUAL:
  2021. case SLJIT_MUL_NOT_OVERFLOW:
  2022. case SLJIT_D_EQUAL:
  2023. return 0x00000000;
  2024. case SLJIT_NOT_EQUAL:
  2025. case SLJIT_MUL_OVERFLOW:
  2026. case SLJIT_D_NOT_EQUAL:
  2027. return 0x10000000;
  2028. case SLJIT_LESS:
  2029. case SLJIT_D_LESS:
  2030. return 0x30000000;
  2031. case SLJIT_GREATER_EQUAL:
  2032. case SLJIT_D_GREATER_EQUAL:
  2033. return 0x20000000;
  2034. case SLJIT_GREATER:
  2035. case SLJIT_D_GREATER:
  2036. return 0x80000000;
  2037. case SLJIT_LESS_EQUAL:
  2038. case SLJIT_D_LESS_EQUAL:
  2039. return 0x90000000;
  2040. case SLJIT_SIG_LESS:
  2041. return 0xb0000000;
  2042. case SLJIT_SIG_GREATER_EQUAL:
  2043. return 0xa0000000;
  2044. case SLJIT_SIG_GREATER:
  2045. return 0xc0000000;
  2046. case SLJIT_SIG_LESS_EQUAL:
  2047. return 0xd0000000;
  2048. case SLJIT_OVERFLOW:
  2049. case SLJIT_D_UNORDERED:
  2050. return 0x60000000;
  2051. case SLJIT_NOT_OVERFLOW:
  2052. case SLJIT_D_ORDERED:
  2053. return 0x70000000;
  2054. default:
  2055. SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
  2056. return 0xe0000000;
  2057. }
  2058. }
  2059. SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
  2060. {
  2061. struct sljit_label *label;
  2062. CHECK_ERROR_PTR();
  2063. CHECK_PTR(check_sljit_emit_label(compiler));
  2064. if (compiler->last_label && compiler->last_label->size == compiler->size)
  2065. return compiler->last_label;
  2066. label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
  2067. PTR_FAIL_IF(!label);
  2068. set_label(label, compiler);
  2069. return label;
  2070. }
  2071. SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
  2072. {
  2073. struct sljit_jump *jump;
  2074. CHECK_ERROR_PTR();
  2075. CHECK_PTR(check_sljit_emit_jump(compiler, type));
  2076. jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
  2077. PTR_FAIL_IF(!jump);
  2078. set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
  2079. type &= 0xff;
  2080. /* In ARM, we don't need to touch the arguments. */
  2081. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  2082. if (type >= SLJIT_FAST_CALL)
  2083. PTR_FAIL_IF(prepare_blx(compiler));
  2084. PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0,
  2085. type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
  2086. if (jump->flags & SLJIT_REWRITABLE_JUMP) {
  2087. jump->addr = compiler->size;
  2088. compiler->patches++;
  2089. }
  2090. if (type >= SLJIT_FAST_CALL) {
  2091. jump->flags |= IS_BL;
  2092. PTR_FAIL_IF(emit_blx(compiler));
  2093. }
  2094. if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
  2095. jump->addr = compiler->size;
  2096. #else
  2097. if (type >= SLJIT_FAST_CALL)
  2098. jump->flags |= IS_BL;
  2099. PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
  2100. PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type)));
  2101. jump->addr = compiler->size;
  2102. #endif
  2103. return jump;
  2104. }
  2105. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
  2106. {
  2107. struct sljit_jump *jump;
  2108. CHECK_ERROR();
  2109. CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
  2110. ADJUST_LOCAL_OFFSET(src, srcw);
  2111. /* In ARM, we don't need to touch the arguments. */
  2112. if (!(src & SLJIT_IMM)) {
  2113. if (FAST_IS_REG(src))
  2114. return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
  2115. SLJIT_ASSERT(src & SLJIT_MEM);
  2116. FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
  2117. return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2));
  2118. }
  2119. jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
  2120. FAIL_IF(!jump);
  2121. set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
  2122. jump->u.target = srcw;
  2123. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  2124. if (type >= SLJIT_FAST_CALL)
  2125. FAIL_IF(prepare_blx(compiler));
  2126. FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
  2127. if (type >= SLJIT_FAST_CALL)
  2128. FAIL_IF(emit_blx(compiler));
  2129. #else
  2130. FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
  2131. FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
  2132. #endif
  2133. jump->addr = compiler->size;
  2134. return SLJIT_SUCCESS;
  2135. }
  2136. SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
  2137. sljit_si dst, sljit_sw dstw,
  2138. sljit_si src, sljit_sw srcw,
  2139. sljit_si type)
  2140. {
  2141. sljit_si dst_r, flags = GET_ALL_FLAGS(op);
  2142. sljit_uw cc, ins;
  2143. CHECK_ERROR();
  2144. CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
  2145. ADJUST_LOCAL_OFFSET(dst, dstw);
  2146. ADJUST_LOCAL_OFFSET(src, srcw);
  2147. if (dst == SLJIT_UNUSED)
  2148. return SLJIT_SUCCESS;
  2149. op = GET_OPCODE(op);
  2150. cc = get_cc(type & 0xff);
  2151. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
  2152. if (op < SLJIT_ADD) {
  2153. FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)));
  2154. FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
  2155. return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
  2156. }
  2157. ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
  2158. if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
  2159. FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc));
  2160. /* The condition must always be set, even if the ORR/EOR is not executed above. */
  2161. return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
  2162. }
  2163. compiler->cache_arg = 0;
  2164. compiler->cache_argw = 0;
  2165. if (src & SLJIT_MEM) {
  2166. FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
  2167. src = TMP_REG1;
  2168. srcw = 0;
  2169. } else if (src & SLJIT_IMM) {
  2170. FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
  2171. src = TMP_REG1;
  2172. srcw = 0;
  2173. }
  2174. FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc));
  2175. FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
  2176. if (dst_r == TMP_REG2)
  2177. FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0));
  2178. return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst_r))) : SLJIT_SUCCESS;
  2179. }
  2180. SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
  2181. {
  2182. struct sljit_const *const_;
  2183. sljit_si reg;
  2184. CHECK_ERROR_PTR();
  2185. CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
  2186. ADJUST_LOCAL_OFFSET(dst, dstw);
  2187. const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
  2188. PTR_FAIL_IF(!const_);
  2189. reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
  2190. #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
  2191. PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value));
  2192. compiler->patches++;
  2193. #else
  2194. PTR_FAIL_IF(emit_imm(compiler, reg, init_value));
  2195. #endif
  2196. set_const(const_, compiler);
  2197. if (dst & SLJIT_MEM)
  2198. PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
  2199. return const_;
  2200. }
  2201. SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
  2202. {
  2203. inline_set_jump_addr(addr, new_addr, 1);
  2204. }
  2205. SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
  2206. {
  2207. inline_set_const(addr, new_constant, 1);
  2208. }