123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965 |
- /*
- +----------------------------------------------------------------------+
- | Copyright (c) The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 3.01 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available through the world-wide-web at the following url: |
- | https://www.php.net/license/3_01.txt |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- | Author: Jim Winstead <jimw@php.net> |
- | Xinchen Hui <laruence@php.net> |
- +----------------------------------------------------------------------+
- */
- #include <string.h>
- #include "php.h"
- #include "base64.h"
- /* {{{ base64 tables */
- static const char base64_table[] = {
- 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
- 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
- };
- static const char base64_pad = '=';
- static const short base64_reverse_table[256] = {
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
- 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
- -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
- -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
- };
- /* }}} */
- #ifdef __aarch64__
- #include <arm_neon.h>
- static zend_always_inline uint8x16_t encode_toascii(const uint8x16_t input, const uint8x16x2_t shift_LUT)
- {
- /* reduce 0..51 -> 0
- 52..61 -> 1 .. 10
- 62 -> 11
- 63 -> 12 */
- uint8x16_t result = vqsubq_u8(input, vdupq_n_u8(51));
- /* distinguish between ranges 0..25 and 26..51:
- 0 .. 25 -> remains 0
- 26 .. 51 -> becomes 13 */
- const uint8x16_t less = vcgtq_u8(vdupq_n_u8(26), input);
- result = vorrq_u8(result, vandq_u8(less, vdupq_n_u8(13)));
- /* read shift */
- result = vqtbl2q_u8(shift_LUT, result);
- return vaddq_u8(result, input);
- }
- static zend_always_inline unsigned char *neon_base64_encode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left)
- {
- const uint8_t shift_LUT_[32] = {'a' - 26, '0' - 52, '0' - 52, '0' - 52,
- '0' - 52, '0' - 52, '0' - 52, '0' - 52,
- '0' - 52, '0' - 52, '0' - 52, '+' - 62,
- '/' - 63, 'A', 0, 0,
- 'a' - 26, '0' - 52, '0' - 52, '0' - 52,
- '0' - 52, '0' - 52, '0' - 52, '0' - 52,
- '0' - 52, '0' - 52, '0' - 52, '+' - 62,
- '/' - 63, 'A', 0, 0};
- const uint8x16x2_t shift_LUT = *((const uint8x16x2_t *)shift_LUT_);
- do {
- /* [ccdddddd | bbbbcccc | aaaaaabb]
- x.val[2] | x.val[1] | x.val[0] */
- const uint8x16x3_t x = vld3q_u8((const uint8_t *)(in));
- /* [00aa_aaaa] */
- const uint8x16_t field_a = vshrq_n_u8(x.val[0], 2);
- const uint8x16_t field_b = /* [00bb_bbbb] */
- vbslq_u8(vdupq_n_u8(0x30), /* [0011_0000] */
- vshlq_n_u8(x.val[0], 4), /* [aabb_0000] */
- vshrq_n_u8(x.val[1], 4)); /* [0000_bbbb] */
- const uint8x16_t field_c = /* [00cc_cccc] */
- vbslq_u8(vdupq_n_u8(0x3c), /* [0011_1100] */
- vshlq_n_u8(x.val[1], 2), /* [bbcc_cc00] */
- vshrq_n_u8(x.val[2], 6)); /* [0000_00cc] */
- /* [00dd_dddd] */
- const uint8x16_t field_d = vandq_u8(x.val[2], vdupq_n_u8(0x3f));
- uint8x16x4_t result;
- result.val[0] = encode_toascii(field_a, shift_LUT);
- result.val[1] = encode_toascii(field_b, shift_LUT);
- result.val[2] = encode_toascii(field_c, shift_LUT);
- result.val[3] = encode_toascii(field_d, shift_LUT);
- vst4q_u8((uint8_t *)out, result);
- out += 64;
- in += 16 * 3;
- inl -= 16 * 3;
- } while (inl >= 16 * 3);
- *left = inl;
- return out;
- }
- #endif /* __aarch64__ */
- static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */
- {
- #ifdef __aarch64__
- if (inl >= 16 * 3) {
- size_t left = 0;
- out = neon_base64_encode(in, inl, out, &left);
- in += inl - left;
- inl = left;
- }
- #endif
- while (inl > 2) { /* keep going until we have less than 24 bits */
- *out++ = base64_table[in[0] >> 2];
- *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
- *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)];
- *out++ = base64_table[in[2] & 0x3f];
- in += 3;
- inl -= 3; /* we just handle 3 octets of data */
- }
- /* now deal with the tail end of things */
- if (inl != 0) {
- *out++ = base64_table[in[0] >> 2];
- if (inl > 1) {
- *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
- *out++ = base64_table[(in[1] & 0x0f) << 2];
- *out++ = base64_pad;
- } else {
- *out++ = base64_table[(in[0] & 0x03) << 4];
- *out++ = base64_pad;
- *out++ = base64_pad;
- }
- }
- *out = '\0';
- return out;
- }
- /* }}} */
- #ifdef __aarch64__
- static zend_always_inline uint8x16_t decode_fromascii(const uint8x16_t input, uint8x16_t *error, const uint8x16x2_t shiftLUT, const uint8x16x2_t maskLUT, const uint8x16x2_t bitposLUT) {
- const uint8x16_t higher_nibble = vshrq_n_u8(input, 4);
- const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f));
- const uint8x16_t sh = vqtbl2q_u8(shiftLUT, higher_nibble);
- const uint8x16_t eq_2f = vceqq_u8(input, vdupq_n_u8(0x2f));
- const uint8x16_t shift = vbslq_u8(eq_2f, vdupq_n_u8(16), sh);
- const uint8x16_t M = vqtbl2q_u8(maskLUT, lower_nibble);
- const uint8x16_t bit = vqtbl2q_u8(bitposLUT, higher_nibble);
- *error = vceqq_u8(vandq_u8(M, bit), vdupq_n_u8(0));
- return vaddq_u8(input, shift);
- }
- static zend_always_inline size_t neon_base64_decode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left) {
- unsigned char *out_orig = out;
- const uint8_t shiftLUT_[32] = {
- 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
- 0, 0, 0, 0, 0, 0, 0, 0};
- const uint8_t maskLUT_[32] = {
- /* 0 : 0b1010_1000*/ 0xa8,
- /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
- /* 10 : 0b1111_0000*/ 0xf0,
- /* 11 : 0b0101_0100*/ 0x54,
- /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
- /* 15 : 0b0101_0100*/ 0x54,
- /* 0 : 0b1010_1000*/ 0xa8,
- /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
- /* 10 : 0b1111_0000*/ 0xf0,
- /* 11 : 0b0101_0100*/ 0x54,
- /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
- /* 15 : 0b0101_0100*/ 0x54
- };
- const uint8_t bitposLUT_[32] = {
- 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
- };
- const uint8x16x2_t shiftLUT = *((const uint8x16x2_t *)shiftLUT_);
- const uint8x16x2_t maskLUT = *((const uint8x16x2_t *)maskLUT_);
- const uint8x16x2_t bitposLUT = *((const uint8x16x2_t *)bitposLUT_);;
- do {
- const uint8x16x4_t x = vld4q_u8((const unsigned char *)in);
- uint8x16_t error_a;
- uint8x16_t error_b;
- uint8x16_t error_c;
- uint8x16_t error_d;
- uint8x16_t field_a = decode_fromascii(x.val[0], &error_a, shiftLUT, maskLUT, bitposLUT);
- uint8x16_t field_b = decode_fromascii(x.val[1], &error_b, shiftLUT, maskLUT, bitposLUT);
- uint8x16_t field_c = decode_fromascii(x.val[2], &error_c, shiftLUT, maskLUT, bitposLUT);
- uint8x16_t field_d = decode_fromascii(x.val[3], &error_d, shiftLUT, maskLUT, bitposLUT);
- const uint8x16_t err = vorrq_u8(vorrq_u8(error_a, error_b), vorrq_u8(error_c, error_d));
- union {uint8_t mem[16]; uint64_t dw[2]; } error;
- vst1q_u8(error.mem, err);
- /* Check that the input only contains bytes belonging to the alphabet of
- Base64. If there are errors, decode the rest of the string with the
- scalar decoder. */
- if (error.dw[0] | error.dw[1])
- break;
- uint8x16x3_t result;
- result.val[0] = vorrq_u8(vshrq_n_u8(field_b, 4), vshlq_n_u8(field_a, 2));
- result.val[1] = vorrq_u8(vshrq_n_u8(field_c, 2), vshlq_n_u8(field_b, 4));
- result.val[2] = vorrq_u8(field_d, vshlq_n_u8(field_c, 6));
- vst3q_u8((unsigned char *)out, result);
- out += 16 * 3;
- in += 16 * 4;
- inl -= 16 * 4;
- } while (inl >= 16 * 4);
- *left = inl;
- return out - out_orig;
- }
- #endif /* __aarch64__ */
- static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, bool strict) /* {{{ */
- {
- int ch;
- size_t i = 0, padding = 0, j = *outl;
- #ifdef __aarch64__
- if (inl >= 16 * 4) {
- size_t left = 0;
- j += neon_base64_decode(in, inl, out, &left);
- i = inl - left;
- in += i;
- inl = left;
- }
- #endif
- /* run through the whole string, converting as we go */
- while (inl-- > 0) {
- ch = *in++;
- if (ch == base64_pad) {
- padding++;
- continue;
- }
- ch = base64_reverse_table[ch];
- if (!strict) {
- /* skip unknown characters and whitespace */
- if (ch < 0) {
- continue;
- }
- } else {
- /* skip whitespace */
- if (ch == -1) {
- continue;
- }
- /* fail on bad characters or if any data follows padding */
- if (ch == -2 || padding) {
- goto fail;
- }
- }
- switch (i % 4) {
- case 0:
- out[j] = ch << 2;
- break;
- case 1:
- out[j++] |= ch >> 4;
- out[j] = (ch & 0x0f) << 4;
- break;
- case 2:
- out[j++] |= ch >>2;
- out[j] = (ch & 0x03) << 6;
- break;
- case 3:
- out[j++] |= ch;
- break;
- }
- i++;
- }
- /* fail if the input is truncated (only one char in last group) */
- if (strict && i % 4 == 1) {
- goto fail;
- }
- /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding
- * RFC 4648: "In some circumstances, the use of padding [--] is not required" */
- if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
- goto fail;
- }
- *outl = j;
- out[j] = '\0';
- return 1;
- fail:
- return 0;
- }
- /* }}} */
- /* {{{ php_base64_encode */
- #if ZEND_INTRIN_AVX2_NATIVE
- # undef ZEND_INTRIN_SSSE3_NATIVE
- # undef ZEND_INTRIN_SSSE3_RESOLVER
- # undef ZEND_INTRIN_SSSE3_FUNC_PROTO
- # undef ZEND_INTRIN_SSSE3_FUNC_PTR
- #elif ZEND_INTRIN_AVX2_FUNC_PROTO && ZEND_INTRIN_SSSE3_NATIVE
- # undef ZEND_INTRIN_SSSE3_NATIVE
- # undef ZEND_INTRIN_SSSE3_RESOLVER
- # define ZEND_INTRIN_SSSE3_RESOLVER 1
- # define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
- # undef ZEND_INTRIN_SSSE3_FUNC_DECL
- # ifdef HAVE_FUNC_ATTRIBUTE_TARGET
- # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
- # else
- # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
- # endif
- #elif ZEND_INTRIN_AVX2_FUNC_PTR && ZEND_INTRIN_SSSE3_NATIVE
- # undef ZEND_INTRIN_SSSE3_NATIVE
- # undef ZEND_INTRIN_SSSE3_RESOLVER
- # define ZEND_INTRIN_SSSE3_RESOLVER 1
- # define ZEND_INTRIN_SSSE3_FUNC_PTR 1
- # undef ZEND_INTRIN_SSSE3_FUNC_DECL
- # ifdef HAVE_FUNC_ATTRIBUTE_TARGET
- # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
- # else
- # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
- # endif
- #endif
- #if ZEND_INTRIN_AVX2_NATIVE
- # include <immintrin.h>
- #elif ZEND_INTRIN_SSSE3_NATIVE
- # include <tmmintrin.h>
- #elif (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER)
- # if ZEND_INTRIN_AVX2_RESOLVER
- # include <immintrin.h>
- # else
- # include <tmmintrin.h>
- # endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */
- # include "Zend/zend_cpuinfo.h"
- # if ZEND_INTRIN_AVX2_RESOLVER
- ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length));
- ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, bool strict));
- # endif
- # if ZEND_INTRIN_SSSE3_RESOLVER
- ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length));
- ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict));
- # endif
- zend_string *php_base64_encode_default(const unsigned char *str, size_t length);
- zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, bool strict);
- # if (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO)
- PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) __attribute__((ifunc("resolve_base64_encode")));
- PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) __attribute__((ifunc("resolve_base64_decode")));
- typedef zend_string *(*base64_encode_func_t)(const unsigned char *, size_t);
- typedef zend_string *(*base64_decode_func_t)(const unsigned char *, size_t, bool);
- ZEND_NO_SANITIZE_ADDRESS
- ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
- static base64_encode_func_t resolve_base64_encode(void) {
- # if ZEND_INTRIN_AVX2_FUNC_PROTO
- if (zend_cpu_supports_avx2()) {
- return php_base64_encode_avx2;
- } else
- # endif
- #if ZEND_INTRIN_SSSE3_FUNC_PROTO
- if (zend_cpu_supports_ssse3()) {
- return php_base64_encode_ssse3;
- }
- #endif
- return php_base64_encode_default;
- }
- ZEND_NO_SANITIZE_ADDRESS
- ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
- static base64_decode_func_t resolve_base64_decode(void) {
- # if ZEND_INTRIN_AVX2_FUNC_PROTO
- if (zend_cpu_supports_avx2()) {
- return php_base64_decode_ex_avx2;
- } else
- # endif
- #if ZEND_INTRIN_SSSE3_FUNC_PROTO
- if (zend_cpu_supports_ssse3()) {
- return php_base64_decode_ex_ssse3;
- }
- #endif
- return php_base64_decode_ex_default;
- }
- # else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
- PHPAPI zend_string *(*php_base64_encode_ptr)(const unsigned char *str, size_t length) = NULL;
- PHPAPI zend_string *(*php_base64_decode_ex_ptr)(const unsigned char *str, size_t length, bool strict) = NULL;
- PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) {
- return php_base64_encode_ptr(str, length);
- }
- PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) {
- return php_base64_decode_ex_ptr(str, length, strict);
- }
- PHP_MINIT_FUNCTION(base64_intrin)
- {
- # if ZEND_INTRIN_AVX2_FUNC_PTR
- if (zend_cpu_supports_avx2()) {
- php_base64_encode_ptr = php_base64_encode_avx2;
- php_base64_decode_ex_ptr = php_base64_decode_ex_avx2;
- } else
- # endif
- #if ZEND_INTRIN_SSSE3_FUNC_PTR
- if (zend_cpu_supports_ssse3()) {
- php_base64_encode_ptr = php_base64_encode_ssse3;
- php_base64_decode_ex_ptr = php_base64_decode_ex_ssse3;
- } else
- #endif
- {
- php_base64_encode_ptr = php_base64_encode_default;
- php_base64_decode_ex_ptr = php_base64_decode_ex_default;
- }
- return SUCCESS;
- }
- # endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
- #endif /* ZEND_INTRIN_AVX2_NATIVE */
- #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
- # if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
- static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
- static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2")));
- # endif
- static __m256i php_base64_encode_avx2_reshuffle(__m256i in)
- {
- /* This one works with shifted (4 bytes) input in order to
- * be able to work efficiently in the 2 128-bit lanes */
- __m256i t0, t1, t2, t3;
- /* input, bytes MSB to LSB:
- * 0 0 0 0 x w v u t s r q p o n m
- * l k j i h g f e d c b a 0 0 0 0 */
- in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
- 10, 11, 9, 10,
- 7, 8, 6, 7,
- 4, 5, 3, 4,
- 1, 2, 0, 1,
- 14, 15, 13, 14,
- 11, 12, 10, 11,
- 8, 9, 7, 8,
- 5, 6, 4, 5));
- t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
- t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
- t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
- t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
- return _mm256_or_si256(t1, t3);
- /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
- * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
- * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
- * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
- * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
- * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
- * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
- * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
- }
- static __m256i php_base64_encode_avx2_translate(__m256i in)
- {
- __m256i lut, indices, mask;
- lut = _mm256_setr_epi8(
- 65, 71, -4, -4, -4, -4, -4, -4,
- -4, -4, -4, -4, -19, -16, 0, 0,
- 65, 71, -4, -4, -4, -4, -4, -4,
- -4, -4, -4, -4, -19, -16, 0, 0);
- indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
- mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
- indices = _mm256_sub_epi8(indices, mask);
- return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
- }
- #endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */
- #if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
- # if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
- static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
- static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3")));
- # endif
- static __m128i php_base64_encode_ssse3_reshuffle(__m128i in)
- {
- __m128i t0, t1, t2, t3;
- /* input, bytes MSB to LSB:
- * 0 0 0 0 l k j i h g f e d c b a */
- in = _mm_shuffle_epi8(in, _mm_set_epi8(
- 10, 11, 9, 10,
- 7, 8, 6, 7,
- 4, 5, 3, 4,
- 1, 2, 0, 1));
- t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00));
- t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
- t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0));
- t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
- /* output (upper case are MSB, lower case are LSB):
- * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
- * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
- * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
- * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
- return _mm_or_si128(t1, t3);
- }
- static __m128i php_base64_encode_ssse3_translate(__m128i in)
- {
- __m128i mask, indices;
- __m128i lut = _mm_setr_epi8(
- 65, 71, -4, -4,
- -4, -4, -4, -4,
- -4, -4, -4, -4,
- -19, -16, 0, 0
- );
- /* Translate values 0..63 to the Base64 alphabet. There are five sets:
- * # From To Abs Index Characters
- * 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
- * 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
- * 2 [52..61] [48..57] -4 [2..11] 0123456789
- * 3 [62] [43] -19 12 +
- * 4 [63] [47] -16 13 / */
- /* Create LUT indices from input:
- * the index for range #0 is right, others are 1 less than expected: */
- indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
- /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */
- mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
- /* subtract -1, so add 1 to indices for range #[1..4], All indices are now correct: */
- indices = _mm_sub_epi8(indices, mask);
- /* Add offsets to input values: */
- return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
- }
- #define PHP_BASE64_ENCODE_SSSE3_LOOP \
- while (length > 15) { \
- __m128i s = _mm_loadu_si128((__m128i *)c); \
- \
- s = php_base64_encode_ssse3_reshuffle(s); \
- \
- s = php_base64_encode_ssse3_translate(s); \
- \
- _mm_storeu_si128((__m128i *)o, s); \
- c += 12; \
- o += 16; \
- length -= 12; \
- }
- #endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */
- #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
- # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
- PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
- # elif ZEND_INTRIN_AVX2_RESOLVER
- zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length)
- # else /* ZEND_INTRIN_SSSE3_RESOLVER */
- zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
- # endif
- {
- const unsigned char *c = str;
- unsigned char *o;
- zend_string *result;
- result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
- o = (unsigned char *)ZSTR_VAL(result);
- # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
- if (length > 31) {
- __m256i s = _mm256_loadu_si256((__m256i *)c);
- s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
- for (;;) {
- s = php_base64_encode_avx2_reshuffle(s);
- s = php_base64_encode_avx2_translate(s);
- _mm256_storeu_si256((__m256i *)o, s);
- c += 24;
- o += 32;
- length -= 24;
- if (length < 28) {
- break;
- }
- s = _mm256_loadu_si256((__m256i *)(c - 4));
- }
- }
- # else
- PHP_BASE64_ENCODE_SSSE3_LOOP;
- # endif
- o = php_base64_encode_impl(c, length, o);
- ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
- return result;
- }
- # if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
- zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
- {
- const unsigned char *c = str;
- unsigned char *o;
- zend_string *result;
- result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
- o = (unsigned char *)ZSTR_VAL(result);
- PHP_BASE64_ENCODE_SSSE3_LOOP;
- o = php_base64_encode_impl(c, length, o);
- ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
- return result;
- }
- # endif
- #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
- /* }}} */
- #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
- # if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
- static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
- # endif
- static __m256i php_base64_decode_avx2_reshuffle(__m256i in)
- {
- __m256i merge_ab_and_bc, out;
- merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
- out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
- out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
- 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
- 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
- return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
- }
- #endif
- #if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
- # if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
- static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
- # endif
- static __m128i php_base64_decode_ssse3_reshuffle(__m128i in)
- {
- __m128i merge_ab_and_bc, out;
- merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
- /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK
- * 0000hhhh IIiiiiii 0000GGGG GGggHHHH
- * 0000eeee FFffffff 0000DDDD DDddEEEE
- * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */
- out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
- /* 00000000 JJJJJJjj KKKKkkkk LLllllll
- * 00000000 GGGGGGgg HHHHhhhh IIiiiiii
- * 00000000 DDDDDDdd EEEEeeee FFffffff
- * 00000000 AAAAAAaa BBBBbbbb CCcccccc */
- return _mm_shuffle_epi8(out, _mm_setr_epi8(
- 2, 1, 0,
- 6, 5, 4,
- 10, 9, 8,
- 14, 13, 12,
- -1, -1, -1, -1));
- /* 00000000 00000000 00000000 00000000
- * LLllllll KKKKkkkk JJJJJJjj IIiiiiii
- * HHHHhhhh GGGGGGgg FFffffff EEEEeeee
- * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */
- }
- #define PHP_BASE64_DECODE_SSSE3_LOOP \
- while (length > 15 + 6 + 2) { \
- __m128i lut_lo, lut_hi, lut_roll; \
- __m128i hi_nibbles, lo_nibbles, hi, lo; \
- __m128i s = _mm_loadu_si128((__m128i *)c); \
- \
- lut_lo = _mm_setr_epi8( \
- 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
- 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \
- lut_hi = _mm_setr_epi8( \
- 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
- 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \
- lut_roll = _mm_setr_epi8( \
- 0, 16, 19, 4, -65, -65, -71, -71, \
- 0, 0, 0, 0, 0, 0, 0, 0); \
- \
- hi_nibbles = _mm_and_si128( \
- _mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); \
- lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); \
- hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); \
- lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); \
- \
- \
- if (UNEXPECTED( \
- _mm_movemask_epi8( \
- _mm_cmpgt_epi8( \
- _mm_and_si128(lo, hi), _mm_set1_epi8(0))))) { \
- break; \
- } else { \
- __m128i eq_2f, roll; \
- \
- eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); \
- roll = _mm_shuffle_epi8( \
- lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); \
- \
- s = _mm_add_epi8(s, roll); \
- s = php_base64_decode_ssse3_reshuffle(s); \
- \
- _mm_storeu_si128((__m128i *)o, s); \
- \
- c += 16; \
- o += 12; \
- outl += 12; \
- length -= 16; \
- } \
- }
- #endif
- #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
- # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
- PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict)
- # elif ZEND_INTRIN_AVX2_RESOLVER
- zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, bool strict)
- # else
- zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict)
- # endif
- {
- const unsigned char *c = str;
- unsigned char *o;
- size_t outl = 0;
- zend_string *result;
- result = zend_string_alloc(length, 0);
- o = (unsigned char *)ZSTR_VAL(result);
- /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions"
- * https://arxiv.org/pdf/1704.00605.pdf */
- # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
- while (length > 31 + 11 + 2) {
- __m256i lut_lo, lut_hi, lut_roll;
- __m256i hi_nibbles, lo_nibbles, hi, lo;
- __m256i str = _mm256_loadu_si256((__m256i *)c);
- lut_lo = _mm256_setr_epi8(
- 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
- 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
- 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
- 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
- lut_hi = _mm256_setr_epi8(
- 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
- 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
- 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
- 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
- lut_roll = _mm256_setr_epi8(
- 0, 16, 19, 4, -65, -65, -71, -71,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 16, 19, 4, -65, -65, -71, -71,
- 0, 0, 0, 0, 0, 0, 0, 0);
- hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f));
- lo_nibbles = _mm256_and_si256(str, _mm256_set1_epi8(0x2f));
- hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
- lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
- if (!_mm256_testz_si256(lo, hi)) {
- break;
- } else {
- __m256i eq_2f, roll;
- eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f));
- roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles));
- str = _mm256_add_epi8(str, roll);
- str = php_base64_decode_avx2_reshuffle(str);
- _mm256_storeu_si256((__m256i *)o, str);
- c += 32;
- o += 24;
- outl += 24;
- length -= 32;
- }
- }
- # else
- PHP_BASE64_DECODE_SSSE3_LOOP;
- # endif
- if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
- zend_string_efree(result);
- return NULL;
- }
- ZSTR_LEN(result) = outl;
- return result;
- }
- # if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
- zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict)
- {
- const unsigned char *c = str;
- unsigned char *o;
- size_t outl = 0;
- zend_string *result;
- result = zend_string_alloc(length, 0);
- o = (unsigned char *)ZSTR_VAL(result);
- PHP_BASE64_DECODE_SSSE3_LOOP;
- if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
- zend_string_efree(result);
- return NULL;
- }
- ZSTR_LEN(result) = outl;
- return result;
- }
- # endif
- #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
- #if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
- #if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
- zend_string *php_base64_encode_default(const unsigned char *str, size_t length)
- #else
- PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
- #endif
- {
- unsigned char *p;
- zend_string *result;
- result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
- p = (unsigned char *)ZSTR_VAL(result);
- p = php_base64_encode_impl(str, length, p);
- ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result));
- return result;
- }
- #endif
- #if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
- #if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
- zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, bool strict)
- #else
- PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict)
- #endif
- {
- zend_string *result;
- size_t outl = 0;
- result = zend_string_alloc(length, 0);
- if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
- zend_string_efree(result);
- return NULL;
- }
- ZSTR_LEN(result) = outl;
- return result;
- }
- #endif
- /* }}} */
- /* {{{ Encodes string using MIME base64 algorithm */
- PHP_FUNCTION(base64_encode)
- {
- char *str;
- size_t str_len;
- zend_string *result;
- ZEND_PARSE_PARAMETERS_START(1, 1)
- Z_PARAM_STRING(str, str_len)
- ZEND_PARSE_PARAMETERS_END();
- result = php_base64_encode((unsigned char*)str, str_len);
- RETURN_STR(result);
- }
- /* }}} */
- /* {{{ Decodes string using MIME base64 algorithm */
- PHP_FUNCTION(base64_decode)
- {
- char *str;
- bool strict = 0;
- size_t str_len;
- zend_string *result;
- ZEND_PARSE_PARAMETERS_START(1, 2)
- Z_PARAM_STRING(str, str_len)
- Z_PARAM_OPTIONAL
- Z_PARAM_BOOL(strict)
- ZEND_PARSE_PARAMETERS_END();
- result = php_base64_decode_ex((unsigned char*)str, str_len, strict);
- if (result != NULL) {
- RETURN_STR(result);
- } else {
- RETURN_FALSE;
- }
- }
- /* }}} */
|