base64.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 7 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2018 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Jim Winstead <jimw@php.net> |
  16. | Xinchen Hui <laruence@php.net> |
  17. +----------------------------------------------------------------------+
  18. */
  19. #include <string.h>
  20. #include "php.h"
  21. #include "base64.h"
  22. /* {{{ base64 tables */
  23. static const char base64_table[] = {
  24. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  25. 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  26. 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  27. 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  28. '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
  29. };
  30. static const char base64_pad = '=';
  31. static const short base64_reverse_table[256] = {
  32. -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
  33. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  34. -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
  35. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
  36. -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  37. 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
  38. -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  39. 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
  40. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  41. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  42. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  43. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  44. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  45. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  46. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  47. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
  48. };
  49. /* }}} */
  50. static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */
  51. {
  52. while (inl > 2) { /* keep going until we have less than 24 bits */
  53. *out++ = base64_table[in[0] >> 2];
  54. *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
  55. *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)];
  56. *out++ = base64_table[in[2] & 0x3f];
  57. in += 3;
  58. inl -= 3; /* we just handle 3 octets of data */
  59. }
  60. /* now deal with the tail end of things */
  61. if (inl != 0) {
  62. *out++ = base64_table[in[0] >> 2];
  63. if (inl > 1) {
  64. *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
  65. *out++ = base64_table[(in[1] & 0x0f) << 2];
  66. *out++ = base64_pad;
  67. } else {
  68. *out++ = base64_table[(in[0] & 0x03) << 4];
  69. *out++ = base64_pad;
  70. *out++ = base64_pad;
  71. }
  72. }
  73. *out = '\0';
  74. return out;
  75. }
  76. /* }}} */
  77. static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, zend_bool strict) /* {{{ */
  78. {
  79. int ch;
  80. size_t i = 0, padding = 0, j = *outl;
  81. /* run through the whole string, converting as we go */
  82. while (inl-- > 0) {
  83. ch = *in++;
  84. if (ch == base64_pad) {
  85. padding++;
  86. continue;
  87. }
  88. ch = base64_reverse_table[ch];
  89. if (!strict) {
  90. /* skip unknown characters and whitespace */
  91. if (ch < 0) {
  92. continue;
  93. }
  94. } else {
  95. /* skip whitespace */
  96. if (ch == -1) {
  97. continue;
  98. }
  99. /* fail on bad characters or if any data follows padding */
  100. if (ch == -2 || padding) {
  101. goto fail;
  102. }
  103. }
  104. switch (i % 4) {
  105. case 0:
  106. out[j] = ch << 2;
  107. break;
  108. case 1:
  109. out[j++] |= ch >> 4;
  110. out[j] = (ch & 0x0f) << 4;
  111. break;
  112. case 2:
  113. out[j++] |= ch >>2;
  114. out[j] = (ch & 0x03) << 6;
  115. break;
  116. case 3:
  117. out[j++] |= ch;
  118. break;
  119. }
  120. i++;
  121. }
  122. /* fail if the input is truncated (only one char in last group) */
  123. if (strict && i % 4 == 1) {
  124. goto fail;
  125. }
  126. /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding
  127. * RFC 4648: "In some circumstances, the use of padding [--] is not required" */
  128. if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
  129. goto fail;
  130. }
  131. *outl = j;
  132. out[j] = '\0';
  133. return 1;
  134. fail:
  135. return 0;
  136. }
  137. /* }}} */
  138. /* {{{ php_base64_encode */
  139. #if ZEND_INTRIN_AVX2_NATIVE
  140. # undef ZEND_INTRIN_SSSE3_NATIVE
  141. # undef ZEND_INTRIN_SSSE3_RESOLVER
  142. # undef ZEND_INTRIN_SSSE3_FUNC_PROTO
  143. # undef ZEND_INTRIN_SSSE3_FUNC_PTR
  144. #elif ZEND_INTRIN_AVX2_FUNC_PROTO && ZEND_INTRIN_SSSE3_NATIVE
  145. # undef ZEND_INTRIN_SSSE3_NATIVE
  146. # undef ZEND_INTRIN_SSSE3_RESOLVER
  147. # define ZEND_INTRIN_SSSE3_RESOLVER 1
  148. # define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
  149. # undef ZEND_INTRIN_SSSE3_FUNC_DECL
  150. # ifdef HAVE_FUNC_ATTRIBUTE_TARGET
  151. # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
  152. # else
  153. # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
  154. # endif
  155. #elif ZEND_INTRIN_AVX2_FUNC_PTR && ZEND_INTRIN_SSSE3_NATIVE
  156. # undef ZEND_INTRIN_SSSE3_NATIVE
  157. # undef ZEND_INTRIN_SSSE3_RESOLVER
  158. # define ZEND_INTRIN_SSSE3_RESOLVER 1
  159. # define ZEND_INTRIN_SSSE3_FUNC_PTR 1
  160. # undef ZEND_INTRIN_SSSE3_FUNC_DECL
  161. # ifdef HAVE_FUNC_ATTRIBUTE_TARGET
  162. # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
  163. # else
  164. # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
  165. # endif
  166. #endif
  167. #if ZEND_INTRIN_AVX2_NATIVE
  168. # include <immintrin.h>
  169. #elif ZEND_INTRIN_SSSE3_NATIVE
  170. # include <tmmintrin.h>
  171. #elif (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER)
  172. # if ZEND_INTRIN_AVX2_RESOLVER
  173. # include <immintrin.h>
  174. # else
  175. # include <tmmintrin.h>
  176. # endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */
  177. # include "Zend/zend_cpuinfo.h"
  178. # if ZEND_INTRIN_AVX2_RESOLVER
  179. ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length));
  180. ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict));
  181. # endif
  182. # if ZEND_INTRIN_SSSE3_RESOLVER
  183. ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length));
  184. ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict));
  185. # endif
  186. zend_string *php_base64_encode_default(const unsigned char *str, size_t length);
  187. zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict);
  188. # if (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO)
  189. PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) __attribute__((ifunc("resolve_base64_encode")));
  190. PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) __attribute__((ifunc("resolve_base64_decode")));
  191. ZEND_NO_SANITIZE_ADDRESS
  192. static void *resolve_base64_encode() {
  193. # if ZEND_INTRIN_AVX2_FUNC_PROTO
  194. if (zend_cpu_supports_avx2()) {
  195. return php_base64_encode_avx2;
  196. } else
  197. # endif
  198. #if ZEND_INTRIN_SSSE3_FUNC_PROTO
  199. if (zend_cpu_supports_ssse3()) {
  200. return php_base64_encode_ssse3;
  201. }
  202. #endif
  203. return php_base64_encode_default;
  204. }
  205. ZEND_NO_SANITIZE_ADDRESS
  206. static void *resolve_base64_decode() {
  207. # if ZEND_INTRIN_AVX2_FUNC_PROTO
  208. if (zend_cpu_supports_avx2()) {
  209. return php_base64_decode_ex_avx2;
  210. } else
  211. # endif
  212. #if ZEND_INTRIN_SSSE3_FUNC_PROTO
  213. if (zend_cpu_supports_ssse3()) {
  214. return php_base64_decode_ex_ssse3;
  215. }
  216. #endif
  217. return php_base64_decode_ex_default;
  218. }
  219. # else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
  220. PHPAPI zend_string *(*php_base64_encode)(const unsigned char *str, size_t length) = NULL;
  221. PHPAPI zend_string *(*php_base64_decode_ex)(const unsigned char *str, size_t length, zend_bool strict) = NULL;
  222. PHP_MINIT_FUNCTION(base64_intrin)
  223. {
  224. # if ZEND_INTRIN_AVX2_FUNC_PTR
  225. if (zend_cpu_supports_avx2()) {
  226. php_base64_encode = php_base64_encode_avx2;
  227. php_base64_decode_ex = php_base64_decode_ex_avx2;
  228. } else
  229. # endif
  230. #if ZEND_INTRIN_SSSE3_FUNC_PTR
  231. if (zend_cpu_supports_ssse3()) {
  232. php_base64_encode = php_base64_encode_ssse3;
  233. php_base64_decode_ex = php_base64_decode_ex_ssse3;
  234. } else
  235. #endif
  236. {
  237. php_base64_encode = php_base64_encode_default;
  238. php_base64_decode_ex = php_base64_decode_ex_default;
  239. }
  240. return SUCCESS;
  241. }
  242. # endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
  243. #endif /* ZEND_INTRIN_AVX2_NATIVE */
  244. #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
  245. # if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
  246. static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
  247. static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2")));
  248. # endif
  249. static __m256i php_base64_encode_avx2_reshuffle(__m256i in)
  250. {
  251. /* This one works with shifted (4 bytes) input in order to
  252. * be able to work efficiently in the 2 128-bit lanes */
  253. __m256i t0, t1, t2, t3;
  254. /* input, bytes MSB to LSB:
  255. * 0 0 0 0 x w v u t s r q p o n m
  256. * l k j i h g f e d c b a 0 0 0 0 */
  257. in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
  258. 10, 11, 9, 10,
  259. 7, 8, 6, 7,
  260. 4, 5, 3, 4,
  261. 1, 2, 0, 1,
  262. 14, 15, 13, 14,
  263. 11, 12, 10, 11,
  264. 8, 9, 7, 8,
  265. 5, 6, 4, 5));
  266. t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
  267. t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
  268. t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
  269. t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
  270. return _mm256_or_si256(t1, t3);
  271. /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
  272. * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
  273. * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
  274. * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
  275. * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
  276. * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
  277. * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
  278. * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
  279. }
  280. static __m256i php_base64_encode_avx2_translate(__m256i in)
  281. {
  282. __m256i lut, indices, mask;
  283. lut = _mm256_setr_epi8(
  284. 65, 71, -4, -4, -4, -4, -4, -4,
  285. -4, -4, -4, -4, -19, -16, 0, 0,
  286. 65, 71, -4, -4, -4, -4, -4, -4,
  287. -4, -4, -4, -4, -19, -16, 0, 0);
  288. indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
  289. mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
  290. indices = _mm256_sub_epi8(indices, mask);
  291. return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
  292. }
  293. #endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */
  294. #if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
  295. # if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
  296. static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
  297. static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3")));
  298. # endif
  299. static __m128i php_base64_encode_ssse3_reshuffle(__m128i in)
  300. {
  301. __m128i t0, t1, t2, t3;
  302. /* input, bytes MSB to LSB:
  303. * 0 0 0 0 l k j i h g f e d c b a */
  304. in = _mm_shuffle_epi8(in, _mm_set_epi8(
  305. 10, 11, 9, 10,
  306. 7, 8, 6, 7,
  307. 4, 5, 3, 4,
  308. 1, 2, 0, 1));
  309. t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00));
  310. t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
  311. t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0));
  312. t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
  313. /* output (upper case are MSB, lower case are LSB):
  314. * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
  315. * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
  316. * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
  317. * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
  318. return _mm_or_si128(t1, t3);
  319. }
  320. static __m128i php_base64_encode_ssse3_translate(__m128i in)
  321. {
  322. __m128i mask, indices;
  323. __m128i lut = _mm_setr_epi8(
  324. 65, 71, -4, -4,
  325. -4, -4, -4, -4,
  326. -4, -4, -4, -4,
  327. -19, -16, 0, 0
  328. );
  329. /* Translate values 0..63 to the Base64 alphabet. There are five sets:
  330. * # From To Abs Index Characters
  331. * 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
  332. * 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
  333. * 2 [52..61] [48..57] -4 [2..11] 0123456789
  334. * 3 [62] [43] -19 12 +
  335. * 4 [63] [47] -16 13 / */
  336. /* Create LUT indices from input:
  337. * the index for range #0 is right, others are 1 less than expected: */
  338. indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
  339. /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */
  340. mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
  341. /* subtract -1, so add 1 to indices for range #[1..4], All indices are now correct: */
  342. indices = _mm_sub_epi8(indices, mask);
  343. /* Add offsets to input values: */
  344. return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
  345. }
  346. #define PHP_BASE64_ENCODE_SSSE3_LOOP \
  347. while (length > 15) { \
  348. __m128i s = _mm_loadu_si128((__m128i *)c); \
  349. \
  350. s = php_base64_encode_ssse3_reshuffle(s); \
  351. \
  352. s = php_base64_encode_ssse3_translate(s); \
  353. \
  354. _mm_storeu_si128((__m128i *)o, s); \
  355. c += 12; \
  356. o += 16; \
  357. length -= 12; \
  358. }
  359. #endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */
  360. #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
  361. # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
  362. PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
  363. # elif ZEND_INTRIN_AVX2_RESOLVER
  364. zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length)
  365. # else /* ZEND_INTRIN_SSSE3_RESOLVER */
  366. zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
  367. # endif
  368. {
  369. const unsigned char *c = str;
  370. unsigned char *o;
  371. zend_string *result;
  372. result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
  373. o = (unsigned char *)ZSTR_VAL(result);
  374. # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
  375. if (length > 31) {
  376. __m256i s = _mm256_loadu_si256((__m256i *)c);
  377. s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
  378. for (;;) {
  379. s = php_base64_encode_avx2_reshuffle(s);
  380. s = php_base64_encode_avx2_translate(s);
  381. _mm256_storeu_si256((__m256i *)o, s);
  382. c += 24;
  383. o += 32;
  384. length -= 24;
  385. if (length < 28) {
  386. break;
  387. }
  388. s = _mm256_loadu_si256((__m256i *)(c - 4));
  389. }
  390. }
  391. # else
  392. PHP_BASE64_ENCODE_SSSE3_LOOP;
  393. # endif
  394. o = php_base64_encode_impl(c, length, o);
  395. ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
  396. return result;
  397. }
  398. # if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
  399. zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
  400. {
  401. const unsigned char *c = str;
  402. unsigned char *o;
  403. zend_string *result;
  404. result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
  405. o = (unsigned char *)ZSTR_VAL(result);
  406. PHP_BASE64_ENCODE_SSSE3_LOOP;
  407. o = php_base64_encode_impl(c, length, o);
  408. ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
  409. return result;
  410. }
  411. # endif
  412. #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
  413. /* }}} */
  414. /* {{{ php_base64_decode_ex */
  415. /* generate reverse table (do not set index 0 to 64)
  416. static unsigned short base64_reverse_table[256];
  417. #define rt base64_reverse_table
  418. void php_base64_init(void)
  419. {
  420. char *s = emalloc(10240), *sp;
  421. char *chp;
  422. short idx;
  423. for(ch = 0; ch < 256; ch++) {
  424. chp = strchr(base64_table, ch);
  425. if(ch && chp) {
  426. idx = chp - base64_table;
  427. if (idx >= 64) idx = -1;
  428. rt[ch] = idx;
  429. } else {
  430. rt[ch] = -1;
  431. }
  432. }
  433. sp = s;
  434. sprintf(sp, "static const short base64_reverse_table[256] = {\n");
  435. for(ch =0; ch < 256;) {
  436. sp = s+strlen(s);
  437. sprintf(sp, "\t% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,% 3d,\n", rt[ch+0], rt[ch+1], rt[ch+2], rt[ch+3], rt[ch+4], rt[ch+5], rt[ch+6], rt[ch+7], rt[ch+8], rt[ch+9], rt[ch+10], rt[ch+11], rt[ch+12], rt[ch+13], rt[ch+14], rt[ch+15]);
  438. ch += 16;
  439. }
  440. sprintf(sp, "};");
  441. php_error_docref(NULL, E_NOTICE, "Reverse_table:\n%s", s);
  442. efree(s);
  443. }
  444. */
  445. #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
  446. # if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
  447. static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
  448. # endif
  449. static __m256i php_base64_decode_avx2_reshuffle(__m256i in)
  450. {
  451. __m256i merge_ab_and_bc, out;
  452. merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
  453. out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
  454. out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
  455. 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
  456. 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
  457. return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
  458. }
  459. #endif
  460. #if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
  461. # if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
  462. static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
  463. # endif
  464. static __m128i php_base64_decode_ssse3_reshuffle(__m128i in)
  465. {
  466. __m128i merge_ab_and_bc, out;
  467. merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
  468. /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK
  469. * 0000hhhh IIiiiiii 0000GGGG GGggHHHH
  470. * 0000eeee FFffffff 0000DDDD DDddEEEE
  471. * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */
  472. out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
  473. /* 00000000 JJJJJJjj KKKKkkkk LLllllll
  474. * 00000000 GGGGGGgg HHHHhhhh IIiiiiii
  475. * 00000000 DDDDDDdd EEEEeeee FFffffff
  476. * 00000000 AAAAAAaa BBBBbbbb CCcccccc */
  477. return _mm_shuffle_epi8(out, _mm_setr_epi8(
  478. 2, 1, 0,
  479. 6, 5, 4,
  480. 10, 9, 8,
  481. 14, 13, 12,
  482. -1, -1, -1, -1));
  483. /* 00000000 00000000 00000000 00000000
  484. * LLllllll KKKKkkkk JJJJJJjj IIiiiiii
  485. * HHHHhhhh GGGGGGgg FFffffff EEEEeeee
  486. * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */
  487. }
  488. #define PHP_BASE64_DECODE_SSSE3_LOOP \
  489. while (length > 15 + 6 + 2) { \
  490. __m128i lut_lo, lut_hi, lut_roll; \
  491. __m128i hi_nibbles, lo_nibbles, hi, lo; \
  492. __m128i s = _mm_loadu_si128((__m128i *)c); \
  493. \
  494. lut_lo = _mm_setr_epi8( \
  495. 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
  496. 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \
  497. lut_hi = _mm_setr_epi8( \
  498. 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
  499. 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \
  500. lut_roll = _mm_setr_epi8( \
  501. 0, 16, 19, 4, -65, -65, -71, -71, \
  502. 0, 0, 0, 0, 0, 0, 0, 0); \
  503. \
  504. hi_nibbles = _mm_and_si128( \
  505. _mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); \
  506. lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); \
  507. hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); \
  508. lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); \
  509. \
  510. \
  511. if (UNEXPECTED( \
  512. _mm_movemask_epi8( \
  513. _mm_cmpgt_epi8( \
  514. _mm_and_si128(lo, hi), _mm_set1_epi8(0))))) { \
  515. break; \
  516. } else { \
  517. __m128i eq_2f, roll; \
  518. \
  519. eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); \
  520. roll = _mm_shuffle_epi8( \
  521. lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); \
  522. \
  523. s = _mm_add_epi8(s, roll); \
  524. s = php_base64_decode_ssse3_reshuffle(s); \
  525. \
  526. _mm_storeu_si128((__m128i *)o, s); \
  527. \
  528. c += 16; \
  529. o += 12; \
  530. outl += 12; \
  531. length -= 16; \
  532. } \
  533. }
  534. #endif
  535. #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
  536. # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
  537. PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict)
  538. # elif ZEND_INTRIN_AVX2_RESOLVER
  539. zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict)
  540. # else
  541. zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)
  542. # endif
  543. {
  544. const unsigned char *c = str;
  545. unsigned char *o;
  546. size_t outl = 0;
  547. zend_string *result;
  548. result = zend_string_alloc(length, 0);
  549. o = (unsigned char *)ZSTR_VAL(result);
  550. /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions"
  551. * https://arxiv.org/pdf/1704.00605.pdf */
  552. # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
  553. while (length > 31 + 11 + 2) {
  554. __m256i lut_lo, lut_hi, lut_roll;
  555. __m256i hi_nibbles, lo_nibbles, hi, lo;
  556. __m256i str = _mm256_loadu_si256((__m256i *)c);
  557. lut_lo = _mm256_setr_epi8(
  558. 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
  559. 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
  560. 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
  561. 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
  562. lut_hi = _mm256_setr_epi8(
  563. 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
  564. 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
  565. 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
  566. 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
  567. lut_roll = _mm256_setr_epi8(
  568. 0, 16, 19, 4, -65, -65, -71, -71,
  569. 0, 0, 0, 0, 0, 0, 0, 0,
  570. 0, 16, 19, 4, -65, -65, -71, -71,
  571. 0, 0, 0, 0, 0, 0, 0, 0);
  572. hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f));
  573. lo_nibbles = _mm256_and_si256(str, _mm256_set1_epi8(0x2f));
  574. hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
  575. lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
  576. if (!_mm256_testz_si256(lo, hi)) {
  577. break;
  578. } else {
  579. __m256i eq_2f, roll;
  580. eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f));
  581. roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles));
  582. str = _mm256_add_epi8(str, roll);
  583. str = php_base64_decode_avx2_reshuffle(str);
  584. _mm256_storeu_si256((__m256i *)o, str);
  585. c += 32;
  586. o += 24;
  587. outl += 24;
  588. length -= 32;
  589. }
  590. }
  591. # else
  592. PHP_BASE64_DECODE_SSSE3_LOOP;
  593. # endif
  594. if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
  595. zend_string_efree(result);
  596. return NULL;
  597. }
  598. ZSTR_LEN(result) = outl;
  599. return result;
  600. }
  601. # if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
  602. zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)
  603. {
  604. const unsigned char *c = str;
  605. unsigned char *o;
  606. size_t outl = 0;
  607. zend_string *result;
  608. result = zend_string_alloc(length, 0);
  609. o = (unsigned char *)ZSTR_VAL(result);
  610. PHP_BASE64_DECODE_SSSE3_LOOP;
  611. if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
  612. zend_string_efree(result);
  613. return NULL;
  614. }
  615. ZSTR_LEN(result) = outl;
  616. return result;
  617. }
  618. # endif
  619. #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
  620. #if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
  621. #if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
  622. zend_string *php_base64_encode_default(const unsigned char *str, size_t length)
  623. #else
  624. PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
  625. #endif
  626. {
  627. unsigned char *p;
  628. zend_string *result;
  629. result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
  630. p = (unsigned char *)ZSTR_VAL(result);
  631. p = php_base64_encode_impl(str, length, p);
  632. ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result));
  633. return result;
  634. }
  635. #endif
  636. #if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
  637. #if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
  638. zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict)
  639. #else
  640. PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict)
  641. #endif
  642. {
  643. zend_string *result;
  644. size_t outl = 0;
  645. result = zend_string_alloc(length, 0);
  646. if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
  647. zend_string_efree(result);
  648. return NULL;
  649. }
  650. ZSTR_LEN(result) = outl;
  651. return result;
  652. }
  653. #endif
  654. /* }}} */
  655. /* {{{ proto string base64_encode(string str)
  656. Encodes string using MIME base64 algorithm */
  657. PHP_FUNCTION(base64_encode)
  658. {
  659. char *str;
  660. size_t str_len;
  661. zend_string *result;
  662. ZEND_PARSE_PARAMETERS_START(1, 1)
  663. Z_PARAM_STRING(str, str_len)
  664. ZEND_PARSE_PARAMETERS_END();
  665. result = php_base64_encode((unsigned char*)str, str_len);
  666. if (result != NULL) {
  667. RETURN_STR(result);
  668. } else {
  669. RETURN_FALSE;
  670. }
  671. }
  672. /* }}} */
  673. /* {{{ proto string base64_decode(string str[, bool strict])
  674. Decodes string using MIME base64 algorithm */
  675. PHP_FUNCTION(base64_decode)
  676. {
  677. char *str;
  678. zend_bool strict = 0;
  679. size_t str_len;
  680. zend_string *result;
  681. ZEND_PARSE_PARAMETERS_START(1, 2)
  682. Z_PARAM_STRING(str, str_len)
  683. Z_PARAM_OPTIONAL
  684. Z_PARAM_BOOL(strict)
  685. ZEND_PARSE_PARAMETERS_END();
  686. result = php_base64_decode_ex((unsigned char*)str, str_len, strict);
  687. if (result != NULL) {
  688. RETURN_STR(result);
  689. } else {
  690. RETURN_FALSE;
  691. }
  692. }
  693. /* }}} */
  694. /*
  695. * Local variables:
  696. * tab-width: 4
  697. * c-basic-offset: 4
  698. * End:
  699. * vim600: sw=4 ts=4 fdm=marker
  700. * vim<600: sw=4 ts=4
  701. */