zend_string.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Zend Engine |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 2.00 of the Zend license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.zend.com/license/2_00.txt. |
  11. | If you did not receive a copy of the Zend license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@zend.com so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Dmitry Stogov <dmitry@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. #ifndef ZEND_STRING_H
  19. #define ZEND_STRING_H
  20. #include "zend.h"
  21. BEGIN_EXTERN_C()
  22. typedef void (*zend_string_copy_storage_func_t)(void);
  23. typedef zend_string *(ZEND_FASTCALL *zend_new_interned_string_func_t)(zend_string *str);
  24. typedef zend_string *(ZEND_FASTCALL *zend_string_init_interned_func_t)(const char *str, size_t size, int permanent);
  25. ZEND_API extern zend_new_interned_string_func_t zend_new_interned_string;
  26. ZEND_API extern zend_string_init_interned_func_t zend_string_init_interned;
  27. ZEND_API zend_ulong ZEND_FASTCALL zend_string_hash_func(zend_string *str);
  28. ZEND_API zend_ulong ZEND_FASTCALL zend_hash_func(const char *str, size_t len);
  29. ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_string *str);
  30. ZEND_API void zend_interned_strings_init(void);
  31. ZEND_API void zend_interned_strings_dtor(void);
  32. ZEND_API void zend_interned_strings_activate(void);
  33. ZEND_API void zend_interned_strings_deactivate(void);
  34. ZEND_API void zend_interned_strings_set_request_storage_handlers(zend_new_interned_string_func_t handler, zend_string_init_interned_func_t init_handler);
  35. ZEND_API void zend_interned_strings_set_permanent_storage_copy_handlers(zend_string_copy_storage_func_t copy_handler, zend_string_copy_storage_func_t restore_handler);
  36. ZEND_API void zend_interned_strings_switch_storage(zend_bool request);
  37. ZEND_API extern zend_string *zend_empty_string;
  38. ZEND_API extern zend_string *zend_one_char_string[256];
  39. ZEND_API extern zend_string **zend_known_strings;
  40. END_EXTERN_C()
  41. /* Shortcuts */
  42. #define ZSTR_VAL(zstr) (zstr)->val
  43. #define ZSTR_LEN(zstr) (zstr)->len
  44. #define ZSTR_H(zstr) (zstr)->h
  45. #define ZSTR_HASH(zstr) zend_string_hash_val(zstr)
  46. /* Compatibility macros */
  47. #define IS_INTERNED(s) ZSTR_IS_INTERNED(s)
  48. #define STR_EMPTY_ALLOC() ZSTR_EMPTY_ALLOC()
  49. #define _STR_HEADER_SIZE _ZSTR_HEADER_SIZE
  50. #define STR_ALLOCA_ALLOC(str, _len, use_heap) ZSTR_ALLOCA_ALLOC(str, _len, use_heap)
  51. #define STR_ALLOCA_INIT(str, s, len, use_heap) ZSTR_ALLOCA_INIT(str, s, len, use_heap)
  52. #define STR_ALLOCA_FREE(str, use_heap) ZSTR_ALLOCA_FREE(str, use_heap)
  53. /*---*/
  54. #define ZSTR_IS_INTERNED(s) (GC_FLAGS(s) & IS_STR_INTERNED)
  55. #define ZSTR_EMPTY_ALLOC() zend_empty_string
  56. #define ZSTR_CHAR(c) zend_one_char_string[c]
  57. #define ZSTR_KNOWN(idx) zend_known_strings[idx]
  58. #define _ZSTR_HEADER_SIZE XtOffsetOf(zend_string, val)
  59. #define _ZSTR_STRUCT_SIZE(len) (_ZSTR_HEADER_SIZE + len + 1)
  60. #define ZSTR_ALLOCA_ALLOC(str, _len, use_heap) do { \
  61. (str) = (zend_string *)do_alloca(ZEND_MM_ALIGNED_SIZE_EX(_ZSTR_STRUCT_SIZE(_len), 8), (use_heap)); \
  62. GC_SET_REFCOUNT(str, 1); \
  63. GC_TYPE_INFO(str) = IS_STRING; \
  64. zend_string_forget_hash_val(str); \
  65. ZSTR_LEN(str) = _len; \
  66. } while (0)
  67. #define ZSTR_ALLOCA_INIT(str, s, len, use_heap) do { \
  68. ZSTR_ALLOCA_ALLOC(str, len, use_heap); \
  69. memcpy(ZSTR_VAL(str), (s), (len)); \
  70. ZSTR_VAL(str)[(len)] = '\0'; \
  71. } while (0)
  72. #define ZSTR_ALLOCA_FREE(str, use_heap) free_alloca(str, use_heap)
  73. /*---*/
  74. static zend_always_inline zend_ulong zend_string_hash_val(zend_string *s)
  75. {
  76. return ZSTR_H(s) ? ZSTR_H(s) : zend_string_hash_func(s);
  77. }
  78. static zend_always_inline void zend_string_forget_hash_val(zend_string *s)
  79. {
  80. ZSTR_H(s) = 0;
  81. }
  82. static zend_always_inline uint32_t zend_string_refcount(const zend_string *s)
  83. {
  84. if (!ZSTR_IS_INTERNED(s)) {
  85. return GC_REFCOUNT(s);
  86. }
  87. return 1;
  88. }
  89. static zend_always_inline uint32_t zend_string_addref(zend_string *s)
  90. {
  91. if (!ZSTR_IS_INTERNED(s)) {
  92. return GC_ADDREF(s);
  93. }
  94. return 1;
  95. }
  96. static zend_always_inline uint32_t zend_string_delref(zend_string *s)
  97. {
  98. if (!ZSTR_IS_INTERNED(s)) {
  99. return GC_DELREF(s);
  100. }
  101. return 1;
  102. }
  103. static zend_always_inline zend_string *zend_string_alloc(size_t len, int persistent)
  104. {
  105. zend_string *ret = (zend_string *)pemalloc(ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
  106. GC_SET_REFCOUNT(ret, 1);
  107. GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << GC_FLAGS_SHIFT);
  108. zend_string_forget_hash_val(ret);
  109. ZSTR_LEN(ret) = len;
  110. return ret;
  111. }
  112. static zend_always_inline zend_string *zend_string_safe_alloc(size_t n, size_t m, size_t l, int persistent)
  113. {
  114. zend_string *ret = (zend_string *)safe_pemalloc(n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
  115. GC_SET_REFCOUNT(ret, 1);
  116. GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << GC_FLAGS_SHIFT);
  117. zend_string_forget_hash_val(ret);
  118. ZSTR_LEN(ret) = (n * m) + l;
  119. return ret;
  120. }
  121. static zend_always_inline zend_string *zend_string_init(const char *str, size_t len, int persistent)
  122. {
  123. zend_string *ret = zend_string_alloc(len, persistent);
  124. memcpy(ZSTR_VAL(ret), str, len);
  125. ZSTR_VAL(ret)[len] = '\0';
  126. return ret;
  127. }
  128. static zend_always_inline zend_string *zend_string_copy(zend_string *s)
  129. {
  130. if (!ZSTR_IS_INTERNED(s)) {
  131. GC_ADDREF(s);
  132. }
  133. return s;
  134. }
  135. static zend_always_inline zend_string *zend_string_dup(zend_string *s, int persistent)
  136. {
  137. if (ZSTR_IS_INTERNED(s)) {
  138. return s;
  139. } else {
  140. return zend_string_init(ZSTR_VAL(s), ZSTR_LEN(s), persistent);
  141. }
  142. }
  143. static zend_always_inline zend_string *zend_string_realloc(zend_string *s, size_t len, int persistent)
  144. {
  145. zend_string *ret;
  146. if (!ZSTR_IS_INTERNED(s)) {
  147. if (EXPECTED(GC_REFCOUNT(s) == 1)) {
  148. ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
  149. ZSTR_LEN(ret) = len;
  150. zend_string_forget_hash_val(ret);
  151. return ret;
  152. }
  153. }
  154. ret = zend_string_alloc(len, persistent);
  155. memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN(len, ZSTR_LEN(s)) + 1);
  156. if (!ZSTR_IS_INTERNED(s)) {
  157. GC_DELREF(s);
  158. }
  159. return ret;
  160. }
  161. static zend_always_inline zend_string *zend_string_extend(zend_string *s, size_t len, int persistent)
  162. {
  163. zend_string *ret;
  164. ZEND_ASSERT(len >= ZSTR_LEN(s));
  165. if (!ZSTR_IS_INTERNED(s)) {
  166. if (EXPECTED(GC_REFCOUNT(s) == 1)) {
  167. ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
  168. ZSTR_LEN(ret) = len;
  169. zend_string_forget_hash_val(ret);
  170. return ret;
  171. }
  172. }
  173. ret = zend_string_alloc(len, persistent);
  174. memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), ZSTR_LEN(s) + 1);
  175. if (!ZSTR_IS_INTERNED(s)) {
  176. GC_DELREF(s);
  177. }
  178. return ret;
  179. }
  180. static zend_always_inline zend_string *zend_string_truncate(zend_string *s, size_t len, int persistent)
  181. {
  182. zend_string *ret;
  183. ZEND_ASSERT(len <= ZSTR_LEN(s));
  184. if (!ZSTR_IS_INTERNED(s)) {
  185. if (EXPECTED(GC_REFCOUNT(s) == 1)) {
  186. ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
  187. ZSTR_LEN(ret) = len;
  188. zend_string_forget_hash_val(ret);
  189. return ret;
  190. }
  191. }
  192. ret = zend_string_alloc(len, persistent);
  193. memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), len + 1);
  194. if (!ZSTR_IS_INTERNED(s)) {
  195. GC_DELREF(s);
  196. }
  197. return ret;
  198. }
  199. static zend_always_inline zend_string *zend_string_safe_realloc(zend_string *s, size_t n, size_t m, size_t l, int persistent)
  200. {
  201. zend_string *ret;
  202. if (!ZSTR_IS_INTERNED(s)) {
  203. if (GC_REFCOUNT(s) == 1) {
  204. ret = (zend_string *)safe_perealloc(s, n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
  205. ZSTR_LEN(ret) = (n * m) + l;
  206. zend_string_forget_hash_val(ret);
  207. return ret;
  208. }
  209. }
  210. ret = zend_string_safe_alloc(n, m, l, persistent);
  211. memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN((n * m) + l, ZSTR_LEN(s)) + 1);
  212. if (!ZSTR_IS_INTERNED(s)) {
  213. GC_DELREF(s);
  214. }
  215. return ret;
  216. }
  217. static zend_always_inline void zend_string_free(zend_string *s)
  218. {
  219. if (!ZSTR_IS_INTERNED(s)) {
  220. ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
  221. pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
  222. }
  223. }
  224. static zend_always_inline void zend_string_efree(zend_string *s)
  225. {
  226. ZEND_ASSERT(!ZSTR_IS_INTERNED(s));
  227. ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
  228. ZEND_ASSERT(!(GC_FLAGS(s) & IS_STR_PERSISTENT));
  229. efree(s);
  230. }
  231. static zend_always_inline void zend_string_release(zend_string *s)
  232. {
  233. if (!ZSTR_IS_INTERNED(s)) {
  234. if (GC_DELREF(s) == 0) {
  235. pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
  236. }
  237. }
  238. }
  239. static zend_always_inline void zend_string_release_ex(zend_string *s, int persistent)
  240. {
  241. if (!ZSTR_IS_INTERNED(s)) {
  242. if (GC_DELREF(s) == 0) {
  243. if (persistent) {
  244. ZEND_ASSERT(GC_FLAGS(s) & IS_STR_PERSISTENT);
  245. free(s);
  246. } else {
  247. ZEND_ASSERT(!(GC_FLAGS(s) & IS_STR_PERSISTENT));
  248. efree(s);
  249. }
  250. }
  251. }
  252. }
  253. #if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64__) && !defined(__ILP32__)))
  254. BEGIN_EXTERN_C()
  255. ZEND_API zend_bool ZEND_FASTCALL zend_string_equal_val(zend_string *s1, zend_string *s2);
  256. END_EXTERN_C()
  257. #else
  258. static zend_always_inline zend_bool zend_string_equal_val(zend_string *s1, zend_string *s2)
  259. {
  260. return !memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), ZSTR_LEN(s1));
  261. }
  262. #endif
  263. static zend_always_inline zend_bool zend_string_equal_content(zend_string *s1, zend_string *s2)
  264. {
  265. return ZSTR_LEN(s1) == ZSTR_LEN(s2) && zend_string_equal_val(s1, s2);
  266. }
  267. static zend_always_inline zend_bool zend_string_equals(zend_string *s1, zend_string *s2)
  268. {
  269. return s1 == s2 || zend_string_equal_content(s1, s2);
  270. }
  271. #define zend_string_equals_ci(s1, s2) \
  272. (ZSTR_LEN(s1) == ZSTR_LEN(s2) && !zend_binary_strcasecmp(ZSTR_VAL(s1), ZSTR_LEN(s1), ZSTR_VAL(s2), ZSTR_LEN(s2)))
  273. #define zend_string_equals_literal_ci(str, c) \
  274. (ZSTR_LEN(str) == sizeof(c) - 1 && !zend_binary_strcasecmp(ZSTR_VAL(str), ZSTR_LEN(str), (c), sizeof(c) - 1))
  275. #define zend_string_equals_literal(str, literal) \
  276. (ZSTR_LEN(str) == sizeof(literal)-1 && !memcmp(ZSTR_VAL(str), literal, sizeof(literal) - 1))
  277. /*
  278. * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
  279. *
  280. * This is Daniel J. Bernstein's popular `times 33' hash function as
  281. * posted by him years ago on comp.lang.c. It basically uses a function
  282. * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
  283. * known hash functions for strings. Because it is both computed very
  284. * fast and distributes very well.
  285. *
  286. * The magic of number 33, i.e. why it works better than many other
  287. * constants, prime or not, has never been adequately explained by
  288. * anyone. So I try an explanation: if one experimentally tests all
  289. * multipliers between 1 and 256 (as RSE did now) one detects that even
  290. * numbers are not useable at all. The remaining 128 odd numbers
  291. * (except for the number 1) work more or less all equally well. They
  292. * all distribute in an acceptable way and this way fill a hash table
  293. * with an average percent of approx. 86%.
  294. *
  295. * If one compares the Chi^2 values of the variants, the number 33 not
  296. * even has the best value. But the number 33 and a few other equally
  297. * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
  298. * advantage to the remaining numbers in the large set of possible
  299. * multipliers: their multiply operation can be replaced by a faster
  300. * operation based on just one shift plus either a single addition
  301. * or subtraction operation. And because a hash function has to both
  302. * distribute good _and_ has to be very fast to compute, those few
  303. * numbers should be preferred and seems to be the reason why Daniel J.
  304. * Bernstein also preferred it.
  305. *
  306. *
  307. * -- Ralf S. Engelschall <rse@engelschall.com>
  308. */
  309. static zend_always_inline zend_ulong zend_inline_hash_func(const char *str, size_t len)
  310. {
  311. zend_ulong hash = Z_UL(5381);
  312. /* variant with the hash unrolled eight times */
  313. for (; len >= 8; len -= 8) {
  314. hash = ((hash << 5) + hash) + *str++;
  315. hash = ((hash << 5) + hash) + *str++;
  316. hash = ((hash << 5) + hash) + *str++;
  317. hash = ((hash << 5) + hash) + *str++;
  318. hash = ((hash << 5) + hash) + *str++;
  319. hash = ((hash << 5) + hash) + *str++;
  320. hash = ((hash << 5) + hash) + *str++;
  321. hash = ((hash << 5) + hash) + *str++;
  322. }
  323. switch (len) {
  324. case 7: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
  325. case 6: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
  326. case 5: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
  327. case 4: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
  328. case 3: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
  329. case 2: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
  330. case 1: hash = ((hash << 5) + hash) + *str++; break;
  331. case 0: break;
  332. EMPTY_SWITCH_DEFAULT_CASE()
  333. }
  334. /* Hash value can't be zero, so we always set the high bit */
  335. #if SIZEOF_ZEND_LONG == 8
  336. return hash | Z_UL(0x8000000000000000);
  337. #elif SIZEOF_ZEND_LONG == 4
  338. return hash | Z_UL(0x80000000);
  339. #else
  340. # error "Unknown SIZEOF_ZEND_LONG"
  341. #endif
  342. }
  343. #define ZEND_KNOWN_STRINGS(_) \
  344. _(ZEND_STR_FILE, "file") \
  345. _(ZEND_STR_LINE, "line") \
  346. _(ZEND_STR_FUNCTION, "function") \
  347. _(ZEND_STR_CLASS, "class") \
  348. _(ZEND_STR_OBJECT, "object") \
  349. _(ZEND_STR_TYPE, "type") \
  350. _(ZEND_STR_OBJECT_OPERATOR, "->") \
  351. _(ZEND_STR_PAAMAYIM_NEKUDOTAYIM, "::") \
  352. _(ZEND_STR_ARGS, "args") \
  353. _(ZEND_STR_UNKNOWN, "unknown") \
  354. _(ZEND_STR_EVAL, "eval") \
  355. _(ZEND_STR_INCLUDE, "include") \
  356. _(ZEND_STR_REQUIRE, "require") \
  357. _(ZEND_STR_INCLUDE_ONCE, "include_once") \
  358. _(ZEND_STR_REQUIRE_ONCE, "require_once") \
  359. _(ZEND_STR_SCALAR, "scalar") \
  360. _(ZEND_STR_ERROR_REPORTING, "error_reporting") \
  361. _(ZEND_STR_STATIC, "static") \
  362. _(ZEND_STR_THIS, "this") \
  363. _(ZEND_STR_VALUE, "value") \
  364. _(ZEND_STR_KEY, "key") \
  365. _(ZEND_STR_MAGIC_AUTOLOAD, "__autoload") \
  366. _(ZEND_STR_MAGIC_INVOKE, "__invoke") \
  367. _(ZEND_STR_PREVIOUS, "previous") \
  368. _(ZEND_STR_CODE, "code") \
  369. _(ZEND_STR_MESSAGE, "message") \
  370. _(ZEND_STR_SEVERITY, "severity") \
  371. _(ZEND_STR_STRING, "string") \
  372. _(ZEND_STR_TRACE, "trace") \
  373. _(ZEND_STR_SCHEME, "scheme") \
  374. _(ZEND_STR_HOST, "host") \
  375. _(ZEND_STR_PORT, "port") \
  376. _(ZEND_STR_USER, "user") \
  377. _(ZEND_STR_PASS, "pass") \
  378. _(ZEND_STR_PATH, "path") \
  379. _(ZEND_STR_QUERY, "query") \
  380. _(ZEND_STR_FRAGMENT, "fragment") \
  381. _(ZEND_STR_NULL, "NULL") \
  382. _(ZEND_STR_BOOLEAN, "boolean") \
  383. _(ZEND_STR_INTEGER, "integer") \
  384. _(ZEND_STR_DOUBLE, "double") \
  385. _(ZEND_STR_ARRAY, "array") \
  386. _(ZEND_STR_RESOURCE, "resource") \
  387. _(ZEND_STR_CLOSED_RESOURCE, "resource (closed)") \
  388. _(ZEND_STR_NAME, "name") \
  389. _(ZEND_STR_ARGV, "argv") \
  390. _(ZEND_STR_ARGC, "argc") \
  391. typedef enum _zend_known_string_id {
  392. #define _ZEND_STR_ID(id, str) id,
  393. ZEND_KNOWN_STRINGS(_ZEND_STR_ID)
  394. #undef _ZEND_STR_ID
  395. ZEND_STR_LAST_KNOWN
  396. } zend_known_string_id;
  397. #endif /* ZEND_STRING_H */
  398. /*
  399. * Local variables:
  400. * tab-width: 4
  401. * c-basic-offset: 4
  402. * indent-tabs-mode: t
  403. * End:
  404. * vim600: sw=4 ts=4 fdm=marker
  405. * vim<600: sw=4 ts=4
  406. */