decode.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266
  1. /* udis86 - libudis86/decode.c
  2. *
  3. * Copyright (c) 2002-2009 Vivek Thampi
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without modification,
  7. * are permitted provided that the following conditions are met:
  8. *
  9. * * Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * * Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  16. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
  19. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  22. * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "udint.h"
  27. #include "types.h"
  28. #include "extern.h"
  29. #include "decode.h"
  30. #ifndef __UD_STANDALONE__
  31. # include <string.h>
  32. #endif /* __UD_STANDALONE__ */
  33. /* The max number of prefixes to an instruction */
  34. #define MAX_PREFIXES 15
  35. /* rex prefix bits */
  36. #define REX_W(r) ( ( 0xF & ( r ) ) >> 3 )
  37. #define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 )
  38. #define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 )
  39. #define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 )
  40. #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
  41. ( P_REXR(n) << 2 ) | \
  42. ( P_REXX(n) << 1 ) | \
  43. ( P_REXB(n) << 0 ) )
  44. /* scable-index-base bits */
  45. #define SIB_S(b) ( ( b ) >> 6 )
  46. #define SIB_I(b) ( ( ( b ) >> 3 ) & 7 )
  47. #define SIB_B(b) ( ( b ) & 7 )
  48. /* modrm bits */
  49. #define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 )
  50. #define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 )
  51. #define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 )
  52. #define MODRM_RM(b) ( ( b ) & 7 )
  53. static int decode_ext(struct ud *u, uint16_t ptr);
  54. static int decode_opcode(struct ud *u);
  55. enum reg_class { /* register classes */
  56. REGCLASS_GPR,
  57. REGCLASS_MMX,
  58. REGCLASS_CR,
  59. REGCLASS_DB,
  60. REGCLASS_SEG,
  61. REGCLASS_XMM
  62. };
  63. /*
  64. * inp_start
  65. * Should be called before each de-code operation.
  66. */
  67. static void
  68. inp_start(struct ud *u)
  69. {
  70. u->inp_ctr = 0;
  71. }
  72. static uint8_t
  73. inp_peek(struct ud *u)
  74. {
  75. if (u->inp_end == 0) {
  76. if (u->inp_buf != NULL) {
  77. if (u->inp_buf_index < u->inp_buf_size) {
  78. return u->inp_buf[u->inp_buf_index];
  79. }
  80. } else if (u->inp_peek != UD_EOI) {
  81. return u->inp_peek;
  82. } else {
  83. int c;
  84. if ((c = u->inp_hook(u)) != UD_EOI) {
  85. u->inp_peek = c;
  86. return u->inp_peek;
  87. }
  88. }
  89. }
  90. u->inp_end = 1;
  91. UDERR(u, "byte expected, eoi received\n");
  92. return 0;
  93. }
  94. static uint8_t
  95. inp_next(struct ud *u)
  96. {
  97. if (u->inp_end == 0) {
  98. if (u->inp_buf != NULL) {
  99. if (u->inp_buf_index < u->inp_buf_size) {
  100. u->inp_ctr++;
  101. return (u->inp_curr = u->inp_buf[u->inp_buf_index++]);
  102. }
  103. } else {
  104. int c = u->inp_peek;
  105. if (c != UD_EOI || (c = u->inp_hook(u)) != UD_EOI) {
  106. u->inp_peek = UD_EOI;
  107. u->inp_curr = c;
  108. u->inp_sess[u->inp_ctr++] = u->inp_curr;
  109. return u->inp_curr;
  110. }
  111. }
  112. }
  113. u->inp_end = 1;
  114. UDERR(u, "byte expected, eoi received\n");
  115. return 0;
  116. }
  117. static uint8_t
  118. inp_curr(struct ud *u)
  119. {
  120. return u->inp_curr;
  121. }
  122. /*
  123. * inp_uint8
  124. * int_uint16
  125. * int_uint32
  126. * int_uint64
  127. * Load little-endian values from input
  128. */
  129. static uint8_t
  130. inp_uint8(struct ud* u)
  131. {
  132. return inp_next(u);
  133. }
  134. static uint16_t
  135. inp_uint16(struct ud* u)
  136. {
  137. uint16_t r, ret;
  138. ret = inp_next(u);
  139. r = inp_next(u);
  140. return ret | (r << 8);
  141. }
  142. static uint32_t
  143. inp_uint32(struct ud* u)
  144. {
  145. uint32_t r, ret;
  146. ret = inp_next(u);
  147. r = inp_next(u);
  148. ret = ret | (r << 8);
  149. r = inp_next(u);
  150. ret = ret | (r << 16);
  151. r = inp_next(u);
  152. return ret | (r << 24);
  153. }
  154. static uint64_t
  155. inp_uint64(struct ud* u)
  156. {
  157. uint64_t r, ret;
  158. ret = inp_next(u);
  159. r = inp_next(u);
  160. ret = ret | (r << 8);
  161. r = inp_next(u);
  162. ret = ret | (r << 16);
  163. r = inp_next(u);
  164. ret = ret | (r << 24);
  165. r = inp_next(u);
  166. ret = ret | (r << 32);
  167. r = inp_next(u);
  168. ret = ret | (r << 40);
  169. r = inp_next(u);
  170. ret = ret | (r << 48);
  171. r = inp_next(u);
  172. return ret | (r << 56);
  173. }
  174. static UD_INLINE int
  175. eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
  176. {
  177. if (dis_mode == 64) {
  178. return rex_w ? 64 : (pfx_opr ? 16 : 32);
  179. } else if (dis_mode == 32) {
  180. return pfx_opr ? 16 : 32;
  181. } else {
  182. UD_ASSERT(dis_mode == 16);
  183. return pfx_opr ? 32 : 16;
  184. }
  185. }
  186. static UD_INLINE int
  187. eff_adr_mode(int dis_mode, int pfx_adr)
  188. {
  189. if (dis_mode == 64) {
  190. return pfx_adr ? 32 : 64;
  191. } else if (dis_mode == 32) {
  192. return pfx_adr ? 16 : 32;
  193. } else {
  194. UD_ASSERT(dis_mode == 16);
  195. return pfx_adr ? 32 : 16;
  196. }
  197. }
  198. /*
  199. * decode_prefixes
  200. *
  201. * Extracts instruction prefixes.
  202. */
  203. static int
  204. decode_prefixes(struct ud *u)
  205. {
  206. int done = 0;
  207. uint8_t curr = 0, last = 0;
  208. UD_RETURN_ON_ERROR(u);
  209. do {
  210. last = curr;
  211. curr = inp_next(u);
  212. UD_RETURN_ON_ERROR(u);
  213. if (u->inp_ctr == MAX_INSN_LENGTH) {
  214. UD_RETURN_WITH_ERROR(u, "max instruction length");
  215. }
  216. switch (curr)
  217. {
  218. case 0x2E:
  219. u->pfx_seg = UD_R_CS;
  220. break;
  221. case 0x36:
  222. u->pfx_seg = UD_R_SS;
  223. break;
  224. case 0x3E:
  225. u->pfx_seg = UD_R_DS;
  226. break;
  227. case 0x26:
  228. u->pfx_seg = UD_R_ES;
  229. break;
  230. case 0x64:
  231. u->pfx_seg = UD_R_FS;
  232. break;
  233. case 0x65:
  234. u->pfx_seg = UD_R_GS;
  235. break;
  236. case 0x67: /* adress-size override prefix */
  237. u->pfx_adr = 0x67;
  238. break;
  239. case 0xF0:
  240. u->pfx_lock = 0xF0;
  241. break;
  242. case 0x66:
  243. u->pfx_opr = 0x66;
  244. break;
  245. case 0xF2:
  246. u->pfx_str = 0xf2;
  247. break;
  248. case 0xF3:
  249. u->pfx_str = 0xf3;
  250. break;
  251. default:
  252. /* consume if rex */
  253. done = (u->dis_mode == 64 && (curr & 0xF0) == 0x40) ? 0 : 1;
  254. break;
  255. }
  256. } while (!done);
  257. /* rex prefixes in 64bit mode, must be the last prefix */
  258. if (u->dis_mode == 64 && (last & 0xF0) == 0x40) {
  259. u->pfx_rex = last;
  260. }
  261. return 0;
  262. }
  263. /*
  264. * vex_l, vex_w
  265. * Return the vex.L and vex.W bits
  266. */
  267. static UD_INLINE uint8_t
  268. vex_l(const struct ud *u)
  269. {
  270. UD_ASSERT(u->vex_op != 0);
  271. return ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 2) & 1;
  272. }
  273. static UD_INLINE uint8_t
  274. vex_w(const struct ud *u)
  275. {
  276. UD_ASSERT(u->vex_op != 0);
  277. return u->vex_op == 0xc4 ? ((u->vex_b2 >> 7) & 1) : 0;
  278. }
  279. static UD_INLINE uint8_t
  280. modrm(struct ud * u)
  281. {
  282. if ( !u->have_modrm ) {
  283. u->modrm = inp_next( u );
  284. u->modrm_offset = (uint8_t) (u->inp_ctr - 1);
  285. u->have_modrm = 1;
  286. }
  287. return u->modrm;
  288. }
  289. static unsigned int
  290. resolve_operand_size(const struct ud* u, ud_operand_size_t osize)
  291. {
  292. switch (osize) {
  293. case SZ_V:
  294. return u->opr_mode;
  295. case SZ_Z:
  296. return u->opr_mode == 16 ? 16 : 32;
  297. case SZ_Y:
  298. return u->opr_mode == 16 ? 32 : u->opr_mode;
  299. case SZ_RDQ:
  300. return u->dis_mode == 64 ? 64 : 32;
  301. case SZ_X:
  302. UD_ASSERT(u->vex_op != 0);
  303. return (P_VEXL(u->itab_entry->prefix) && vex_l(u)) ? SZ_QQ : SZ_DQ;
  304. default:
  305. return osize;
  306. }
  307. }
  308. static int resolve_mnemonic( struct ud* u )
  309. {
  310. /* resolve 3dnow weirdness. */
  311. if ( u->mnemonic == UD_I3dnow ) {
  312. u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic;
  313. }
  314. /* SWAPGS is only valid in 64bits mode */
  315. if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
  316. UDERR(u, "swapgs invalid in 64bits mode\n");
  317. return -1;
  318. }
  319. if (u->mnemonic == UD_Ixchg) {
  320. if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX &&
  321. u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
  322. (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
  323. u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
  324. u->operand[0].type = UD_NONE;
  325. u->operand[1].type = UD_NONE;
  326. u->mnemonic = UD_Inop;
  327. }
  328. }
  329. if (u->mnemonic == UD_Inop && u->pfx_repe) {
  330. u->pfx_repe = 0;
  331. u->mnemonic = UD_Ipause;
  332. }
  333. return 0;
  334. }
  335. /* -----------------------------------------------------------------------------
  336. * decode_a()- Decodes operands of the type seg:offset
  337. * -----------------------------------------------------------------------------
  338. */
  339. static void
  340. decode_a(struct ud* u, struct ud_operand *op)
  341. {
  342. if (u->opr_mode == 16) {
  343. /* seg16:off16 */
  344. op->type = UD_OP_PTR;
  345. op->size = 32;
  346. op->lval.ptr.off = inp_uint16(u);
  347. op->lval.ptr.seg = inp_uint16(u);
  348. } else {
  349. /* seg16:off32 */
  350. op->type = UD_OP_PTR;
  351. op->size = 48;
  352. op->lval.ptr.off = inp_uint32(u);
  353. op->lval.ptr.seg = inp_uint16(u);
  354. }
  355. }
  356. /* -----------------------------------------------------------------------------
  357. * decode_gpr() - Returns decoded General Purpose Register
  358. * -----------------------------------------------------------------------------
  359. */
  360. static enum ud_type
  361. decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
  362. {
  363. switch (s) {
  364. case 64:
  365. return UD_R_RAX + rm;
  366. case 32:
  367. return UD_R_EAX + rm;
  368. case 16:
  369. return UD_R_AX + rm;
  370. case 8:
  371. if (u->dis_mode == 64 && u->pfx_rex) {
  372. if (rm >= 4)
  373. return UD_R_SPL + (rm-4);
  374. return UD_R_AL + rm;
  375. } else return UD_R_AL + rm;
  376. case 0:
  377. /* invalid size in case of a decode error */
  378. UD_ASSERT(u->error);
  379. return UD_NONE;
  380. default:
  381. UD_ASSERT(!"invalid operand size");
  382. return UD_NONE;
  383. }
  384. }
  385. static void
  386. decode_reg(struct ud *u,
  387. struct ud_operand *opr,
  388. int type,
  389. int num,
  390. int size)
  391. {
  392. int reg;
  393. size = resolve_operand_size(u, size);
  394. switch (type) {
  395. case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
  396. case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break;
  397. case REGCLASS_XMM :
  398. reg = num + (size == SZ_QQ ? UD_R_YMM0 : UD_R_XMM0);
  399. break;
  400. case REGCLASS_CR : reg = UD_R_CR0 + num; break;
  401. case REGCLASS_DB : reg = UD_R_DR0 + num; break;
  402. case REGCLASS_SEG : {
  403. /*
  404. * Only 6 segment registers, anything else is an error.
  405. */
  406. if ((num & 7) > 5) {
  407. UDERR(u, "invalid segment register value\n");
  408. return;
  409. } else {
  410. reg = UD_R_ES + (num & 7);
  411. }
  412. break;
  413. }
  414. default:
  415. UD_ASSERT(!"invalid register type");
  416. return;
  417. }
  418. opr->type = UD_OP_REG;
  419. opr->base = reg;
  420. opr->size = size;
  421. }
  422. /*
  423. * decode_imm
  424. *
  425. * Decode Immediate values.
  426. */
  427. static void
  428. decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
  429. {
  430. op->size = resolve_operand_size(u, size);
  431. op->type = UD_OP_IMM;
  432. switch (op->size) {
  433. case 8: op->lval.sbyte = inp_uint8(u); break;
  434. case 16: op->lval.uword = inp_uint16(u); break;
  435. case 32: op->lval.udword = inp_uint32(u); break;
  436. case 64: op->lval.uqword = inp_uint64(u); break;
  437. default: return;
  438. }
  439. }
  440. /*
  441. * decode_mem_disp
  442. *
  443. * Decode mem address displacement.
  444. */
  445. static void
  446. decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
  447. {
  448. switch (size) {
  449. case 8:
  450. op->offset = 8;
  451. op->lval.ubyte = inp_uint8(u);
  452. break;
  453. case 16:
  454. op->offset = 16;
  455. op->lval.uword = inp_uint16(u);
  456. break;
  457. case 32:
  458. op->offset = 32;
  459. op->lval.udword = inp_uint32(u);
  460. break;
  461. case 64:
  462. op->offset = 64;
  463. op->lval.uqword = inp_uint64(u);
  464. break;
  465. default:
  466. return;
  467. }
  468. }
  469. /*
  470. * decode_modrm_reg
  471. *
  472. * Decodes reg field of mod/rm byte
  473. *
  474. */
  475. static UD_INLINE void
  476. decode_modrm_reg(struct ud *u,
  477. struct ud_operand *operand,
  478. unsigned int type,
  479. unsigned int size)
  480. {
  481. uint8_t reg = (REX_R(u->_rex) << 3) | MODRM_REG(modrm(u));
  482. decode_reg(u, operand, type, reg, size);
  483. }
  484. /*
  485. * decode_modrm_rm
  486. *
  487. * Decodes rm field of mod/rm byte
  488. *
  489. */
  490. static void
  491. decode_modrm_rm(struct ud *u,
  492. struct ud_operand *op,
  493. unsigned char type, /* register type */
  494. unsigned int size) /* operand size */
  495. {
  496. size_t offset = 0;
  497. unsigned char mod, rm;
  498. /* get mod, r/m and reg fields */
  499. mod = MODRM_MOD(modrm(u));
  500. rm = (REX_B(u->_rex) << 3) | MODRM_RM(modrm(u));
  501. /*
  502. * If mod is 11b, then the modrm.rm specifies a register.
  503. *
  504. */
  505. if (mod == 3) {
  506. decode_reg(u, op, type, rm, size);
  507. return;
  508. }
  509. /*
  510. * !11b => Memory Address
  511. */
  512. op->type = UD_OP_MEM;
  513. op->size = resolve_operand_size(u, size);
  514. if (u->adr_mode == 64) {
  515. op->base = UD_R_RAX + rm;
  516. if (mod == 1) {
  517. offset = 8;
  518. } else if (mod == 2) {
  519. offset = 32;
  520. } else if (mod == 0 && (rm & 7) == 5) {
  521. op->base = UD_R_RIP;
  522. offset = 32;
  523. } else {
  524. offset = 0;
  525. }
  526. /*
  527. * Scale-Index-Base (SIB)
  528. */
  529. if ((rm & 7) == 4) {
  530. inp_next(u);
  531. op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->_rex) << 3));
  532. op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->_rex) << 3));
  533. /* special conditions for base reference */
  534. if (op->index == UD_R_RSP) {
  535. op->index = UD_NONE;
  536. op->scale = UD_NONE;
  537. } else {
  538. op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
  539. }
  540. if (op->base == UD_R_RBP || op->base == UD_R_R13) {
  541. if (mod == 0) {
  542. op->base = UD_NONE;
  543. }
  544. if (mod == 1) {
  545. offset = 8;
  546. } else {
  547. offset = 32;
  548. }
  549. }
  550. } else {
  551. op->scale = UD_NONE;
  552. op->index = UD_NONE;
  553. }
  554. } else if (u->adr_mode == 32) {
  555. op->base = UD_R_EAX + rm;
  556. if (mod == 1) {
  557. offset = 8;
  558. } else if (mod == 2) {
  559. offset = 32;
  560. } else if (mod == 0 && rm == 5) {
  561. op->base = UD_NONE;
  562. offset = 32;
  563. } else {
  564. offset = 0;
  565. }
  566. /* Scale-Index-Base (SIB) */
  567. if ((rm & 7) == 4) {
  568. inp_next(u);
  569. op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
  570. op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
  571. op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
  572. if (op->index == UD_R_ESP) {
  573. op->index = UD_NONE;
  574. op->scale = UD_NONE;
  575. }
  576. /* special condition for base reference */
  577. if (op->base == UD_R_EBP) {
  578. if (mod == 0) {
  579. op->base = UD_NONE;
  580. }
  581. if (mod == 1) {
  582. offset = 8;
  583. } else {
  584. offset = 32;
  585. }
  586. }
  587. } else {
  588. op->scale = UD_NONE;
  589. op->index = UD_NONE;
  590. }
  591. } else {
  592. const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
  593. UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
  594. const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
  595. UD_NONE, UD_NONE, UD_NONE, UD_NONE };
  596. op->base = bases[rm & 7];
  597. op->index = indices[rm & 7];
  598. op->scale = UD_NONE;
  599. if (mod == 0 && rm == 6) {
  600. offset = 16;
  601. op->base = UD_NONE;
  602. } else if (mod == 1) {
  603. offset = 8;
  604. } else if (mod == 2) {
  605. offset = 16;
  606. }
  607. }
  608. if (offset) {
  609. decode_mem_disp(u, offset, op);
  610. } else {
  611. op->offset = 0;
  612. }
  613. }
  614. /*
  615. * decode_moffset
  616. * Decode offset-only memory operand
  617. */
  618. static void
  619. decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
  620. {
  621. opr->type = UD_OP_MEM;
  622. opr->base = UD_NONE;
  623. opr->index = UD_NONE;
  624. opr->scale = UD_NONE;
  625. opr->size = resolve_operand_size(u, size);
  626. decode_mem_disp(u, u->adr_mode, opr);
  627. }
  628. static void
  629. decode_vex_vvvv(struct ud *u, struct ud_operand *opr, unsigned size)
  630. {
  631. uint8_t vvvv;
  632. UD_ASSERT(u->vex_op != 0);
  633. vvvv = ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 3) & 0xf;
  634. decode_reg(u, opr, REGCLASS_XMM, (0xf & ~vvvv), size);
  635. }
  636. /*
  637. * decode_vex_immreg
  638. * Decode source operand encoded in immediate byte [7:4]
  639. */
  640. static int
  641. decode_vex_immreg(struct ud *u, struct ud_operand *opr, unsigned size)
  642. {
  643. uint8_t imm = inp_next(u);
  644. uint8_t mask = u->dis_mode == 64 ? 0xf : 0x7;
  645. UD_RETURN_ON_ERROR(u);
  646. UD_ASSERT(u->vex_op != 0);
  647. decode_reg(u, opr, REGCLASS_XMM, mask & (imm >> 4), size);
  648. return 0;
  649. }
  650. /*
  651. * decode_operand
  652. *
  653. * Decodes a single operand.
  654. * Returns the type of the operand (UD_NONE if none)
  655. */
  656. static int
  657. decode_operand(struct ud *u,
  658. struct ud_operand *operand,
  659. enum ud_operand_code type,
  660. unsigned int size)
  661. {
  662. operand->type = UD_NONE;
  663. operand->_oprcode = type;
  664. switch (type) {
  665. case OP_A :
  666. decode_a(u, operand);
  667. break;
  668. case OP_MR:
  669. decode_modrm_rm(u, operand, REGCLASS_GPR,
  670. MODRM_MOD(modrm(u)) == 3 ?
  671. Mx_reg_size(size) : Mx_mem_size(size));
  672. break;
  673. case OP_F:
  674. u->br_far = 1;
  675. /* intended fall through */
  676. case OP_M:
  677. if (MODRM_MOD(modrm(u)) == 3) {
  678. UDERR(u, "expected modrm.mod != 3\n");
  679. }
  680. /* intended fall through */
  681. case OP_E:
  682. decode_modrm_rm(u, operand, REGCLASS_GPR, size);
  683. break;
  684. case OP_G:
  685. decode_modrm_reg(u, operand, REGCLASS_GPR, size);
  686. break;
  687. case OP_sI:
  688. case OP_I:
  689. decode_imm(u, size, operand);
  690. break;
  691. case OP_I1:
  692. operand->type = UD_OP_CONST;
  693. operand->lval.udword = 1;
  694. break;
  695. case OP_N:
  696. if (MODRM_MOD(modrm(u)) != 3) {
  697. UDERR(u, "expected modrm.mod == 3\n");
  698. }
  699. /* intended fall through */
  700. case OP_Q:
  701. decode_modrm_rm(u, operand, REGCLASS_MMX, size);
  702. break;
  703. case OP_P:
  704. decode_modrm_reg(u, operand, REGCLASS_MMX, size);
  705. break;
  706. case OP_U:
  707. if (MODRM_MOD(modrm(u)) != 3) {
  708. UDERR(u, "expected modrm.mod == 3\n");
  709. }
  710. /* intended fall through */
  711. case OP_W:
  712. decode_modrm_rm(u, operand, REGCLASS_XMM, size);
  713. break;
  714. case OP_V:
  715. decode_modrm_reg(u, operand, REGCLASS_XMM, size);
  716. break;
  717. case OP_H:
  718. decode_vex_vvvv(u, operand, size);
  719. break;
  720. case OP_MU:
  721. decode_modrm_rm(u, operand, REGCLASS_XMM,
  722. MODRM_MOD(modrm(u)) == 3 ?
  723. Mx_reg_size(size) : Mx_mem_size(size));
  724. break;
  725. case OP_S:
  726. decode_modrm_reg(u, operand, REGCLASS_SEG, size);
  727. break;
  728. case OP_O:
  729. decode_moffset(u, size, operand);
  730. break;
  731. case OP_R0:
  732. case OP_R1:
  733. case OP_R2:
  734. case OP_R3:
  735. case OP_R4:
  736. case OP_R5:
  737. case OP_R6:
  738. case OP_R7:
  739. decode_reg(u, operand, REGCLASS_GPR,
  740. (REX_B(u->_rex) << 3) | (type - OP_R0), size);
  741. break;
  742. case OP_AL:
  743. case OP_AX:
  744. case OP_eAX:
  745. case OP_rAX:
  746. decode_reg(u, operand, REGCLASS_GPR, 0, size);
  747. break;
  748. case OP_CL:
  749. case OP_CX:
  750. case OP_eCX:
  751. decode_reg(u, operand, REGCLASS_GPR, 1, size);
  752. break;
  753. case OP_DL:
  754. case OP_DX:
  755. case OP_eDX:
  756. decode_reg(u, operand, REGCLASS_GPR, 2, size);
  757. break;
  758. case OP_ES:
  759. case OP_CS:
  760. case OP_DS:
  761. case OP_SS:
  762. case OP_FS:
  763. case OP_GS:
  764. /* in 64bits mode, only fs and gs are allowed */
  765. if (u->dis_mode == 64) {
  766. if (type != OP_FS && type != OP_GS) {
  767. UDERR(u, "invalid segment register in 64bits\n");
  768. }
  769. }
  770. operand->type = UD_OP_REG;
  771. operand->base = (type - OP_ES) + UD_R_ES;
  772. operand->size = 16;
  773. break;
  774. case OP_J :
  775. decode_imm(u, size, operand);
  776. operand->type = UD_OP_JIMM;
  777. break ;
  778. case OP_R :
  779. if (MODRM_MOD(modrm(u)) != 3) {
  780. UDERR(u, "expected modrm.mod == 3\n");
  781. }
  782. decode_modrm_rm(u, operand, REGCLASS_GPR, size);
  783. break;
  784. case OP_C:
  785. decode_modrm_reg(u, operand, REGCLASS_CR, size);
  786. break;
  787. case OP_D:
  788. decode_modrm_reg(u, operand, REGCLASS_DB, size);
  789. break;
  790. case OP_I3 :
  791. operand->type = UD_OP_CONST;
  792. operand->lval.sbyte = 3;
  793. break;
  794. case OP_ST0:
  795. case OP_ST1:
  796. case OP_ST2:
  797. case OP_ST3:
  798. case OP_ST4:
  799. case OP_ST5:
  800. case OP_ST6:
  801. case OP_ST7:
  802. operand->type = UD_OP_REG;
  803. operand->base = (type - OP_ST0) + UD_R_ST0;
  804. operand->size = 80;
  805. break;
  806. case OP_L:
  807. decode_vex_immreg(u, operand, size);
  808. break;
  809. default :
  810. operand->type = UD_NONE;
  811. break;
  812. }
  813. return operand->type;
  814. }
  815. /*
  816. * decode_operands
  817. *
  818. * Disassemble up to 3 operands of the current instruction being
  819. * disassembled. By the end of the function, the operand fields
  820. * of the ud structure will have been filled.
  821. */
  822. static int
  823. decode_operands(struct ud* u)
  824. {
  825. decode_operand(u, &u->operand[0],
  826. u->itab_entry->operand1.type,
  827. u->itab_entry->operand1.size);
  828. if (u->operand[0].type != UD_NONE) {
  829. decode_operand(u, &u->operand[1],
  830. u->itab_entry->operand2.type,
  831. u->itab_entry->operand2.size);
  832. }
  833. if (u->operand[1].type != UD_NONE) {
  834. decode_operand(u, &u->operand[2],
  835. u->itab_entry->operand3.type,
  836. u->itab_entry->operand3.size);
  837. }
  838. if (u->operand[2].type != UD_NONE) {
  839. decode_operand(u, &u->operand[3],
  840. u->itab_entry->operand4.type,
  841. u->itab_entry->operand4.size);
  842. }
  843. return 0;
  844. }
  845. /* -----------------------------------------------------------------------------
  846. * clear_insn() - clear instruction structure
  847. * -----------------------------------------------------------------------------
  848. */
  849. static void
  850. clear_insn(register struct ud* u)
  851. {
  852. u->error = 0;
  853. u->pfx_seg = 0;
  854. u->pfx_opr = 0;
  855. u->pfx_adr = 0;
  856. u->pfx_lock = 0;
  857. u->pfx_repne = 0;
  858. u->pfx_rep = 0;
  859. u->pfx_repe = 0;
  860. u->pfx_rex = 0;
  861. u->pfx_str = 0;
  862. u->mnemonic = UD_Inone;
  863. u->itab_entry = NULL;
  864. u->have_modrm = 0;
  865. u->br_far = 0;
  866. u->vex_op = 0;
  867. u->_rex = 0;
  868. u->operand[0].type = UD_NONE;
  869. u->operand[1].type = UD_NONE;
  870. u->operand[2].type = UD_NONE;
  871. u->operand[3].type = UD_NONE;
  872. }
  873. static UD_INLINE int
  874. resolve_pfx_str(struct ud* u)
  875. {
  876. if (u->pfx_str == 0xf3) {
  877. if (P_STR(u->itab_entry->prefix)) {
  878. u->pfx_rep = 0xf3;
  879. } else {
  880. u->pfx_repe = 0xf3;
  881. }
  882. } else if (u->pfx_str == 0xf2) {
  883. u->pfx_repne = 0xf3;
  884. }
  885. return 0;
  886. }
  887. static int
  888. resolve_mode( struct ud* u )
  889. {
  890. int default64;
  891. /* if in error state, bail out */
  892. if ( u->error ) return -1;
  893. /* propagate prefix effects */
  894. if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
  895. /* Check validity of instruction m64 */
  896. if ( P_INV64( u->itab_entry->prefix ) ) {
  897. UDERR(u, "instruction invalid in 64bits\n");
  898. return -1;
  899. }
  900. /* compute effective rex based on,
  901. * - vex prefix (if any)
  902. * - rex prefix (if any, and not vex)
  903. * - allowed prefixes specified by the opcode map
  904. */
  905. if (u->vex_op == 0xc4) {
  906. /* vex has rex.rxb in 1's complement */
  907. u->_rex = ((~(u->vex_b1 >> 5) & 0x7) /* rex.0rxb */ |
  908. ((u->vex_b2 >> 4) & 0x8) /* rex.w000 */);
  909. } else if (u->vex_op == 0xc5) {
  910. /* vex has rex.r in 1's complement */
  911. u->_rex = (~(u->vex_b1 >> 5)) & 4;
  912. } else {
  913. UD_ASSERT(u->vex_op == 0);
  914. u->_rex = u->pfx_rex;
  915. }
  916. u->_rex &= REX_PFX_MASK(u->itab_entry->prefix);
  917. /* whether this instruction has a default operand size of
  918. * 64bit, also hardcoded into the opcode map.
  919. */
  920. default64 = P_DEF64( u->itab_entry->prefix );
  921. /* calculate effective operand size */
  922. if (REX_W(u->_rex)) {
  923. u->opr_mode = 64;
  924. } else if ( u->pfx_opr ) {
  925. u->opr_mode = 16;
  926. } else {
  927. /* unless the default opr size of instruction is 64,
  928. * the effective operand size in the absence of rex.w
  929. * prefix is 32.
  930. */
  931. u->opr_mode = default64 ? 64 : 32;
  932. }
  933. /* calculate effective address size */
  934. u->adr_mode = (u->pfx_adr) ? 32 : 64;
  935. } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
  936. u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
  937. u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
  938. } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
  939. u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
  940. u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
  941. }
  942. return 0;
  943. }
  944. static UD_INLINE int
  945. decode_insn(struct ud *u, uint16_t ptr)
  946. {
  947. UD_ASSERT((ptr & 0x8000) == 0);
  948. u->itab_entry = &ud_itab[ ptr ];
  949. u->mnemonic = u->itab_entry->mnemonic;
  950. return (resolve_pfx_str(u) == 0 &&
  951. resolve_mode(u) == 0 &&
  952. decode_operands(u) == 0 &&
  953. resolve_mnemonic(u) == 0) ? 0 : -1;
  954. }
  955. /*
  956. * decode_3dnow()
  957. *
  958. * Decoding 3dnow is a little tricky because of its strange opcode
  959. * structure. The final opcode disambiguation depends on the last
  960. * byte that comes after the operands have been decoded. Fortunately,
  961. * all 3dnow instructions have the same set of operand types. So we
  962. * go ahead and decode the instruction by picking an arbitrarily chosen
  963. * valid entry in the table, decode the operands, and read the final
  964. * byte to resolve the menmonic.
  965. */
  966. static UD_INLINE int
  967. decode_3dnow(struct ud* u)
  968. {
  969. uint16_t ptr;
  970. UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
  971. UD_ASSERT(u->le->table[0xc] != 0);
  972. decode_insn(u, u->le->table[0xc]);
  973. inp_next(u);
  974. if (u->error) {
  975. return -1;
  976. }
  977. ptr = u->le->table[inp_curr(u)];
  978. UD_ASSERT((ptr & 0x8000) == 0);
  979. u->mnemonic = ud_itab[ptr].mnemonic;
  980. return 0;
  981. }
  982. static int
  983. decode_ssepfx(struct ud *u)
  984. {
  985. uint8_t idx;
  986. uint8_t pfx;
  987. /*
  988. * String prefixes (f2, f3) take precedence over operand
  989. * size prefix (66).
  990. */
  991. pfx = u->pfx_str;
  992. if (pfx == 0) {
  993. pfx = u->pfx_opr;
  994. }
  995. idx = ((pfx & 0xf) + 1) / 2;
  996. if (u->le->table[idx] == 0) {
  997. idx = 0;
  998. }
  999. if (idx && u->le->table[idx] != 0) {
  1000. /*
  1001. * "Consume" the prefix as a part of the opcode, so it is no
  1002. * longer exported as an instruction prefix.
  1003. */
  1004. u->pfx_str = 0;
  1005. if (pfx == 0x66) {
  1006. /*
  1007. * consume "66" only if it was used for decoding, leaving
  1008. * it to be used as an operands size override for some
  1009. * simd instructions.
  1010. */
  1011. u->pfx_opr = 0;
  1012. }
  1013. }
  1014. return decode_ext(u, u->le->table[idx]);
  1015. }
  1016. static int
  1017. decode_vex(struct ud *u)
  1018. {
  1019. uint8_t index;
  1020. if (u->dis_mode != 64 && MODRM_MOD(inp_peek(u)) != 0x3) {
  1021. index = 0;
  1022. } else {
  1023. u->vex_op = inp_curr(u);
  1024. u->vex_b1 = inp_next(u);
  1025. if (u->vex_op == 0xc4) {
  1026. uint8_t pp, m;
  1027. /* 3-byte vex */
  1028. u->vex_b2 = inp_next(u);
  1029. UD_RETURN_ON_ERROR(u);
  1030. m = u->vex_b1 & 0x1f;
  1031. if (m == 0 || m > 3) {
  1032. UD_RETURN_WITH_ERROR(u, "reserved vex.m-mmmm value");
  1033. }
  1034. pp = u->vex_b2 & 0x3;
  1035. index = (pp << 2) | m;
  1036. } else {
  1037. /* 2-byte vex */
  1038. UD_ASSERT(u->vex_op == 0xc5);
  1039. index = 0x1 | ((u->vex_b1 & 0x3) << 2);
  1040. }
  1041. }
  1042. return decode_ext(u, u->le->table[index]);
  1043. }
  1044. /*
  1045. * decode_ext()
  1046. *
  1047. * Decode opcode extensions (if any)
  1048. */
  1049. static int
  1050. decode_ext(struct ud *u, uint16_t ptr)
  1051. {
  1052. uint8_t idx = 0;
  1053. if ((ptr & 0x8000) == 0) {
  1054. return decode_insn(u, ptr);
  1055. }
  1056. u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
  1057. if (u->le->type == UD_TAB__OPC_3DNOW) {
  1058. return decode_3dnow(u);
  1059. }
  1060. switch (u->le->type) {
  1061. case UD_TAB__OPC_MOD:
  1062. /* !11 = 0, 11 = 1 */
  1063. idx = (MODRM_MOD(modrm(u)) + 1) / 4;
  1064. break;
  1065. /* disassembly mode/operand size/address size based tables.
  1066. * 16 = 0,, 32 = 1, 64 = 2
  1067. */
  1068. case UD_TAB__OPC_MODE:
  1069. idx = u->dis_mode != 64 ? 0 : 1;
  1070. break;
  1071. case UD_TAB__OPC_OSIZE:
  1072. idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
  1073. break;
  1074. case UD_TAB__OPC_ASIZE:
  1075. idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
  1076. break;
  1077. case UD_TAB__OPC_X87:
  1078. idx = modrm(u) - 0xC0;
  1079. break;
  1080. case UD_TAB__OPC_VENDOR:
  1081. if (u->vendor == UD_VENDOR_ANY) {
  1082. /* choose a valid entry */
  1083. idx = (u->le->table[idx] != 0) ? 0 : 1;
  1084. } else if (u->vendor == UD_VENDOR_AMD) {
  1085. idx = 0;
  1086. } else {
  1087. idx = 1;
  1088. }
  1089. break;
  1090. case UD_TAB__OPC_RM:
  1091. idx = MODRM_RM(modrm(u));
  1092. break;
  1093. case UD_TAB__OPC_REG:
  1094. idx = MODRM_REG(modrm(u));
  1095. break;
  1096. case UD_TAB__OPC_SSE:
  1097. return decode_ssepfx(u);
  1098. case UD_TAB__OPC_VEX:
  1099. return decode_vex(u);
  1100. case UD_TAB__OPC_VEX_W:
  1101. idx = vex_w(u);
  1102. break;
  1103. case UD_TAB__OPC_VEX_L:
  1104. idx = vex_l(u);
  1105. break;
  1106. case UD_TAB__OPC_TABLE:
  1107. inp_next(u);
  1108. return decode_opcode(u);
  1109. default:
  1110. UD_ASSERT(!"not reached");
  1111. break;
  1112. }
  1113. return decode_ext(u, u->le->table[idx]);
  1114. }
  1115. static int
  1116. decode_opcode(struct ud *u)
  1117. {
  1118. uint16_t ptr;
  1119. UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
  1120. UD_RETURN_ON_ERROR(u);
  1121. ptr = u->le->table[inp_curr(u)];
  1122. return decode_ext(u, ptr);
  1123. }
  1124. /* =============================================================================
  1125. * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
  1126. * =============================================================================
  1127. */
  1128. unsigned int
  1129. ud_decode(struct ud *u)
  1130. {
  1131. inp_start(u);
  1132. clear_insn(u);
  1133. u->le = &ud_lookup_table_list[0];
  1134. u->error = decode_prefixes(u) == -1 ||
  1135. decode_opcode(u) == -1 ||
  1136. u->error;
  1137. /* Handle decode error. */
  1138. if (u->error) {
  1139. /* clear out the decode data. */
  1140. clear_insn(u);
  1141. /* mark the sequence of bytes as invalid. */
  1142. u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
  1143. u->mnemonic = u->itab_entry->mnemonic;
  1144. }
  1145. /* maybe this stray segment override byte
  1146. * should be spewed out?
  1147. */
  1148. if ( !P_SEG( u->itab_entry->prefix ) &&
  1149. u->operand[0].type != UD_OP_MEM &&
  1150. u->operand[1].type != UD_OP_MEM )
  1151. u->pfx_seg = 0;
  1152. u->insn_offset = u->pc; /* set offset of instruction */
  1153. u->asm_buf_fill = 0; /* set translation buffer index to 0 */
  1154. u->pc += u->inp_ctr; /* move program counter by bytes decoded */
  1155. /* return number of bytes disassembled. */
  1156. return u->inp_ctr;
  1157. }
  1158. /*
  1159. vim: set ts=2 sw=2 expandtab
  1160. */