locale_methods.c 46 KB


  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Kirti Velankar <kirtig@yahoo-inc.com> |
  14. +----------------------------------------------------------------------+
  15. */
  16. /* $Id$ */
  17. #ifdef HAVE_CONFIG_H
  18. #include "config.h"
  19. #endif
  20. #include <unicode/ustring.h>
  21. #include <unicode/udata.h>
  22. #include <unicode/putil.h>
  23. #include <unicode/ures.h>
  24. #include "php_intl.h"
  25. #include "locale.h"
  26. #include "locale_class.h"
  27. #include "locale_methods.h"
  28. #include "intl_convert.h"
  29. #include "intl_data.h"
  30. #include <zend_API.h>
  31. #include <zend.h>
  32. #include <php.h>
  33. #include "main/php_ini.h"
  34. #include "ext/standard/php_smart_str.h"
  35. ZEND_EXTERN_MODULE_GLOBALS( intl )
  36. /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
  37. #define SEPARATOR "_"
  38. #define SEPARATOR1 "-"
  39. #define DELIMITER "-_"
  40. #define EXTLANG_PREFIX "a"
  41. #define PRIVATE_PREFIX "x"
  42. #define DISP_NAME "name"
  43. #define MAX_NO_VARIANT 15
  44. #define MAX_NO_EXTLANG 3
  45. #define MAX_NO_PRIVATE 15
  46. #define MAX_NO_LOOKUP_LANG_TAG 100
  47. #define LOC_NOT_FOUND 1
  48. /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
  49. #define VARIANT_KEYNAME_LEN 11
  50. #define EXTLANG_KEYNAME_LEN 10
  51. #define PRIVATE_KEYNAME_LEN 11
  52. /* Based on IANA registry at the time of writing this code
  53. *
  54. */
  55. static const char * const LOC_GRANDFATHERED[] = {
  56. "art-lojban", "i-klingon", "i-lux", "i-navajo", "no-bok", "no-nyn",
  57. "cel-gaulish", "en-GB-oed", "i-ami",
  58. "i-bnn", "i-default", "i-enochian",
  59. "i-mingo", "i-pwn", "i-tao",
  60. "i-tay", "i-tsu", "sgn-BE-fr",
  61. "sgn-BE-nl", "sgn-CH-de", "zh-cmn",
  62. "zh-cmn-Hans", "zh-cmn-Hant", "zh-gan" ,
  63. "zh-guoyu", "zh-hakka", "zh-min",
  64. "zh-min-nan", "zh-wuu", "zh-xiang",
  65. "zh-yue", NULL
  66. };
  67. /* Based on IANA registry at the time of writing this code
  68. * This array lists the preferred values for the grandfathered tags if applicable
  69. * This is in sync with the array LOC_GRANDFATHERED
  70. * e.g. the offsets of the grandfathered tags match the offset of the preferred value
  71. */
  72. static const int LOC_PREFERRED_GRANDFATHERED_LEN = 6;
  73. static const char * const LOC_PREFERRED_GRANDFATHERED[] = {
  74. "jbo", "tlh", "lb",
  75. "nv", "nb", "nn",
  76. NULL
  77. };
  78. /*returns TRUE if a is an ID separator FALSE otherwise*/
  79. #define isIDSeparator(a) (a == '_' || a == '-')
  80. #define isKeywordSeparator(a) (a == '@' )
  81. #define isEndOfTag(a) (a == '\0' )
  82. #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
  83. /*returns TRUE if one of the special prefixes is here (s=string)
  84. 'x-' or 'i-' */
  85. #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
  86. #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
  87. /* Dot terminates it because of POSIX form where dot precedes the codepage
  88. * except for variant */
  89. #define isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
  90. /* {{{ return the offset of 'key' in the array 'list'.
  91. * returns -1 if not present */
  92. static int16_t findOffset(const char* const* list, const char* key)
  93. {
  94. const char* const* anchor = list;
  95. while (*list != NULL) {
  96. if (strcmp(key, *list) == 0) {
  97. return (int16_t)(list - anchor);
  98. }
  99. list++;
  100. }
  101. return -1;
  102. }
  103. /*}}}*/
  104. static char* getPreferredTag(const char* gf_tag)
  105. {
  106. char* result = NULL;
  107. int grOffset = 0;
  108. grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
  109. if(grOffset < 0) {
  110. return NULL;
  111. }
  112. if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
  113. /* return preferred tag */
  114. result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
  115. } else {
  116. /* Return correct grandfathered language tag */
  117. result = estrdup( LOC_GRANDFATHERED[grOffset] );
  118. }
  119. return result;
  120. }
  121. /* {{{
  122. * returns the position of next token for lookup
  123. * or -1 if no token
  124. * strtokr equivalent search for token in reverse direction
  125. */
  126. static int getStrrtokenPos(char* str, int savedPos)
  127. {
  128. int result =-1;
  129. int i;
  130. for(i=savedPos-1; i>=0; i--) {
  131. if(isIDSeparator(*(str+i)) ){
  132. /* delimiter found; check for singleton */
  133. if(i>=2 && isIDSeparator(*(str+i-2)) ){
  134. /* a singleton; so send the position of token before the singleton */
  135. result = i-2;
  136. } else {
  137. result = i;
  138. }
  139. break;
  140. }
  141. }
  142. if(result < 1){
  143. /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
  144. result =-1;
  145. }
  146. return result;
  147. }
  148. /* }}} */
  149. /* {{{
  150. * returns the position of a singleton if present
  151. * returns -1 if no singleton
  152. * strtok equivalent search for singleton
  153. */
  154. static int getSingletonPos(const char* str)
  155. {
  156. int result =-1;
  157. int i=0;
  158. int len = 0;
  159. if( str && ((len=strlen(str))>0) ){
  160. for( i=0; i<len ; i++){
  161. if( isIDSeparator(*(str+i)) ){
  162. if( i==1){
  163. /* string is of the form x-avy or a-prv1 */
  164. result =0;
  165. break;
  166. } else {
  167. /* delimiter found; check for singleton */
  168. if( isIDSeparator(*(str+i+2)) ){
  169. /* a singleton; so send the position of separator before singleton */
  170. result = i+1;
  171. break;
  172. }
  173. }
  174. }
  175. }/* end of for */
  176. }
  177. return result;
  178. }
  179. /* }}} */
  180. /* {{{ proto static string Locale::getDefault( )
  181. Get default locale */
  182. /* }}} */
  183. /* {{{ proto static string locale_get_default( )
  184. Get default locale */
  185. PHP_NAMED_FUNCTION(zif_locale_get_default)
  186. {
  187. RETURN_STRING( intl_locale_get_default( TSRMLS_C ), TRUE );
  188. }
  189. /* }}} */
  190. /* {{{ proto static string Locale::setDefault( string $locale )
  191. Set default locale */
  192. /* }}} */
  193. /* {{{ proto static string locale_set_default( string $locale )
  194. Set default locale */
  195. PHP_NAMED_FUNCTION(zif_locale_set_default)
  196. {
  197. char* locale_name = NULL;
  198. int len=0;
  199. if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
  200. &locale_name ,&len ) == FAILURE)
  201. {
  202. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  203. "locale_set_default: unable to parse input params", 0 TSRMLS_CC );
  204. RETURN_FALSE;
  205. }
  206. if(len == 0) {
  207. locale_name = (char *)uloc_getDefault() ;
  208. len = strlen(locale_name);
  209. }
  210. zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
  211. RETURN_TRUE;
  212. }
  213. /* }}} */
  214. /* {{{
  215. * Gets the value from ICU
  216. * common code shared by get_primary_language,get_script or get_region or get_variant
  217. * result = 0 if error, 1 if successful , -1 if no value
  218. */
  219. static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
  220. {
  221. char* tag_value = NULL;
  222. int32_t tag_value_len = 512;
  223. int singletonPos = 0;
  224. char* mod_loc_name = NULL;
  225. int grOffset = 0;
  226. int32_t buflen = 512;
  227. UErrorCode status = U_ZERO_ERROR;
  228. if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
  229. return NULL;
  230. }
  231. if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
  232. /* Handle grandfathered languages */
  233. grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
  234. if( grOffset >= 0 ){
  235. if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
  236. return estrdup(loc_name);
  237. } else {
  238. /* Since Grandfathered , no value , do nothing , retutn NULL */
  239. return NULL;
  240. }
  241. }
  242. if( fromParseLocale==1 ){
  243. /* Handle singletons */
  244. if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
  245. if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
  246. return estrdup(loc_name);
  247. }
  248. }
  249. singletonPos = getSingletonPos( loc_name );
  250. if( singletonPos == 0){
  251. /* singleton at start of script, region , variant etc.
  252. * or invalid singleton at start of language */
  253. return NULL;
  254. } else if( singletonPos > 0 ){
  255. /* singleton at some position except at start
  256. * strip off the singleton and rest of the loc_name */
  257. mod_loc_name = estrndup ( loc_name , singletonPos-1);
  258. }
  259. } /* end of if fromParse */
  260. } /* end of if != LOC_CANONICAL_TAG */
  261. if( mod_loc_name == NULL){
  262. mod_loc_name = estrdup(loc_name );
  263. }
  264. /* Proceed to ICU */
  265. do{
  266. tag_value = erealloc( tag_value , buflen );
  267. tag_value_len = buflen;
  268. if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
  269. buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
  270. }
  271. if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
  272. buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
  273. }
  274. if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
  275. buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
  276. }
  277. if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
  278. buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
  279. }
  280. if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
  281. buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
  282. }
  283. if( U_FAILURE( status ) ) {
  284. if( status == U_BUFFER_OVERFLOW_ERROR ) {
  285. status = U_ZERO_ERROR;
  286. buflen++; /* add space for \0 */
  287. continue;
  288. }
  289. /* Error in retriving data */
  290. *result = 0;
  291. if( tag_value ){
  292. efree( tag_value );
  293. }
  294. if( mod_loc_name ){
  295. efree( mod_loc_name);
  296. }
  297. return NULL;
  298. }
  299. } while( buflen > tag_value_len );
  300. if( buflen ==0 ){
  301. /* No value found */
  302. *result = -1;
  303. if( tag_value ){
  304. efree( tag_value );
  305. }
  306. if( mod_loc_name ){
  307. efree( mod_loc_name);
  308. }
  309. return NULL;
  310. } else {
  311. *result = 1;
  312. }
  313. if( mod_loc_name ){
  314. efree( mod_loc_name);
  315. }
  316. return tag_value;
  317. }
  318. /* }}} */
  319. /* {{{
  320. * Gets the value from ICU , called when PHP userspace function is called
  321. * common code shared by get_primary_language,get_script or get_region or get_variant
  322. */
  323. static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
  324. {
  325. const char* loc_name = NULL;
  326. int loc_name_len = 0;
  327. char* tag_value = NULL;
  328. char* empty_result = "";
  329. int result = 0;
  330. char* msg = NULL;
  331. UErrorCode status = U_ZERO_ERROR;
  332. intl_error_reset( NULL TSRMLS_CC );
  333. if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
  334. &loc_name ,&loc_name_len ) == FAILURE) {
  335. spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
  336. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 TSRMLS_CC );
  337. efree(msg);
  338. RETURN_FALSE;
  339. }
  340. if(loc_name_len == 0) {
  341. loc_name = intl_locale_get_default(TSRMLS_C);
  342. }
  343. INTL_CHECK_LOCALE_LEN(strlen(loc_name));
  344. /* Call ICU get */
  345. tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
  346. /* No value found */
  347. if( result == -1 ) {
  348. if( tag_value){
  349. efree( tag_value);
  350. }
  351. RETURN_STRING( empty_result , TRUE);
  352. }
  353. /* value found */
  354. if( tag_value){
  355. RETURN_STRING( tag_value , FALSE);
  356. }
  357. /* Error encountered while fetching the value */
  358. if( result ==0) {
  359. spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
  360. intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
  361. efree(msg);
  362. RETURN_NULL();
  363. }
  364. }
  365. /* }}} */
  366. /* {{{ proto static string Locale::getScript($locale)
  367. * gets the script for the $locale
  368. }}} */
  369. /* {{{ proto static string locale_get_script($locale)
  370. * gets the script for the $locale
  371. */
  372. PHP_FUNCTION( locale_get_script )
  373. {
  374. get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  375. }
  376. /* }}} */
  377. /* {{{ proto static string Locale::getRegion($locale)
  378. * gets the region for the $locale
  379. }}} */
  380. /* {{{ proto static string locale_get_region($locale)
  381. * gets the region for the $locale
  382. */
  383. PHP_FUNCTION( locale_get_region )
  384. {
  385. get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  386. }
  387. /* }}} */
  388. /* {{{ proto static string Locale::getPrimaryLanguage($locale)
  389. * gets the primary language for the $locale
  390. }}} */
  391. /* {{{ proto static string locale_get_primary_language($locale)
  392. * gets the primary language for the $locale
  393. */
  394. PHP_FUNCTION(locale_get_primary_language )
  395. {
  396. get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  397. }
  398. /* }}} */
  399. /* {{{
  400. * common code shared by display_xyz functions to get the value from ICU
  401. }}} */
  402. static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
  403. {
  404. const char* loc_name = NULL;
  405. int loc_name_len = 0;
  406. const char* disp_loc_name = NULL;
  407. int disp_loc_name_len = 0;
  408. int free_loc_name = 0;
  409. UChar* disp_name = NULL;
  410. int32_t disp_name_len = 0;
  411. char* mod_loc_name = NULL;
  412. int32_t buflen = 512;
  413. UErrorCode status = U_ZERO_ERROR;
  414. char* utf8value = NULL;
  415. int utf8value_len = 0;
  416. char* msg = NULL;
  417. int grOffset = 0;
  418. intl_error_reset( NULL TSRMLS_CC );
  419. if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
  420. &loc_name, &loc_name_len ,
  421. &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
  422. {
  423. spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
  424. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 TSRMLS_CC );
  425. efree(msg);
  426. RETURN_FALSE;
  427. }
  428. if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
  429. /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
  430. spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
  431. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 TSRMLS_CC );
  432. efree(msg);
  433. RETURN_FALSE;
  434. }
  435. if(loc_name_len == 0) {
  436. loc_name = intl_locale_get_default(TSRMLS_C);
  437. }
  438. if( strcmp(tag_name, DISP_NAME) != 0 ){
  439. /* Handle grandfathered languages */
  440. grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
  441. if( grOffset >= 0 ){
  442. if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
  443. mod_loc_name = getPreferredTag( loc_name );
  444. } else {
  445. /* Since Grandfathered, no value, do nothing, retutn NULL */
  446. RETURN_FALSE;
  447. }
  448. }
  449. } /* end of if != LOC_CANONICAL_TAG */
  450. if( mod_loc_name==NULL ){
  451. mod_loc_name = estrdup( loc_name );
  452. }
  453. /* Check if disp_loc_name passed , if not use default locale */
  454. if( !disp_loc_name){
  455. disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
  456. free_loc_name = 1;
  457. }
  458. /* Get the disp_value for the given locale */
  459. do{
  460. disp_name = erealloc( disp_name , buflen * sizeof(UChar) );
  461. disp_name_len = buflen;
  462. if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
  463. buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
  464. } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
  465. buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
  466. } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
  467. buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
  468. } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
  469. buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
  470. } else if( strcmp(tag_name , DISP_NAME)==0 ){
  471. buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
  472. }
  473. /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
  474. if( U_FAILURE( status ) )
  475. {
  476. if( status == U_BUFFER_OVERFLOW_ERROR )
  477. {
  478. status = U_ZERO_ERROR;
  479. continue;
  480. }
  481. spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
  482. intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
  483. efree(msg);
  484. if( disp_name){
  485. efree( disp_name );
  486. }
  487. if( mod_loc_name){
  488. efree( mod_loc_name );
  489. }
  490. if (free_loc_name) {
  491. efree((void *)disp_loc_name);
  492. disp_loc_name = NULL;
  493. }
  494. RETURN_FALSE;
  495. }
  496. } while( buflen > disp_name_len );
  497. if( mod_loc_name){
  498. efree( mod_loc_name );
  499. }
  500. if (free_loc_name) {
  501. efree((void *)disp_loc_name);
  502. disp_loc_name = NULL;
  503. }
  504. /* Convert display locale name from UTF-16 to UTF-8. */
  505. intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
  506. efree( disp_name );
  507. if( U_FAILURE( status ) )
  508. {
  509. spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
  510. intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
  511. efree(msg);
  512. RETURN_FALSE;
  513. }
  514. RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
  515. }
  516. /* }}} */
  517. /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
  518. * gets the name for the $locale in $in_locale or default_locale
  519. }}} */
  520. /* {{{ proto static string get_display_name($locale[, $in_locale = null])
  521. * gets the name for the $locale in $in_locale or default_locale
  522. */
  523. PHP_FUNCTION(locale_get_display_name)
  524. {
  525. get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  526. }
  527. /* }}} */
  528. /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
  529. * gets the language for the $locale in $in_locale or default_locale
  530. }}} */
  531. /* {{{ proto static string get_display_language($locale[, $in_locale = null])
  532. * gets the language for the $locale in $in_locale or default_locale
  533. */
  534. PHP_FUNCTION(locale_get_display_language)
  535. {
  536. get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  537. }
  538. /* }}} */
  539. /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
  540. * gets the script for the $locale in $in_locale or default_locale
  541. }}} */
  542. /* {{{ proto static string get_display_script($locale, $in_locale = null)
  543. * gets the script for the $locale in $in_locale or default_locale
  544. */
  545. PHP_FUNCTION(locale_get_display_script)
  546. {
  547. get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  548. }
  549. /* }}} */
  550. /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
  551. * gets the region for the $locale in $in_locale or default_locale
  552. }}} */
  553. /* {{{ proto static string get_display_region($locale, $in_locale = null)
  554. * gets the region for the $locale in $in_locale or default_locale
  555. */
  556. PHP_FUNCTION(locale_get_display_region)
  557. {
  558. get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  559. }
  560. /* }}} */
  561. /* {{{
  562. * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
  563. * gets the variant for the $locale in $in_locale or default_locale
  564. }}} */
  565. /* {{{
  566. * proto static string get_display_variant($locale, $in_locale = null)
  567. * gets the variant for the $locale in $in_locale or default_locale
  568. */
  569. PHP_FUNCTION(locale_get_display_variant)
  570. {
  571. get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  572. }
  573. /* }}} */
  574. /* {{{ proto static array getKeywords(string $locale) {
  575. * return an associative array containing keyword-value
  576. * pairs for this locale. The keys are keys to the array (doh!)
  577. * }}}*/
  578. /* {{{ proto static array locale_get_keywords(string $locale) {
  579. * return an associative array containing keyword-value
  580. * pairs for this locale. The keys are keys to the array (doh!)
  581. */
  582. PHP_FUNCTION( locale_get_keywords )
  583. {
  584. UEnumeration* e = NULL;
  585. UErrorCode status = U_ZERO_ERROR;
  586. const char* kw_key = NULL;
  587. int32_t kw_key_len = 0;
  588. const char* loc_name = NULL;
  589. int loc_name_len = 0;
  590. /*
  591. ICU expects the buffer to be allocated before calling the function
  592. and so the buffer size has been explicitly specified
  593. ICU uloc.h #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
  594. hence the kw_value buffer size is 100
  595. */
  596. char* kw_value = NULL;
  597. int32_t kw_value_len = 100;
  598. intl_error_reset( NULL TSRMLS_CC );
  599. if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
  600. &loc_name, &loc_name_len ) == FAILURE)
  601. {
  602. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  603. "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
  604. RETURN_FALSE;
  605. }
  606. INTL_CHECK_LOCALE_LEN(strlen(loc_name));
  607. if(loc_name_len == 0) {
  608. loc_name = intl_locale_get_default(TSRMLS_C);
  609. }
  610. /* Get the keywords */
  611. e = uloc_openKeywords( loc_name, &status );
  612. if( e != NULL )
  613. {
  614. /* Traverse it, filling the return array. */
  615. array_init( return_value );
  616. while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
  617. kw_value = ecalloc( 1 , kw_value_len );
  618. /* Get the keyword value for each keyword */
  619. kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len , &status );
  620. if (status == U_BUFFER_OVERFLOW_ERROR) {
  621. status = U_ZERO_ERROR;
  622. kw_value = erealloc( kw_value , kw_value_len+1);
  623. kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 , &status );
  624. } else if(!U_FAILURE(status)) {
  625. kw_value = erealloc( kw_value , kw_value_len+1);
  626. }
  627. if (U_FAILURE(status)) {
  628. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword value for the keyword", 0 TSRMLS_CC );
  629. if( kw_value){
  630. efree( kw_value );
  631. }
  632. zval_dtor(return_value);
  633. RETURN_FALSE;
  634. }
  635. add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
  636. } /* end of while */
  637. } /* end of if e!=NULL */
  638. uenum_close( e );
  639. }
  640. /* }}} */
  641. /* {{{ proto static string Locale::canonicalize($locale)
  642. * @return string the canonicalized locale
  643. * }}} */
  644. /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
  645. * @param string $locale The locale string to canonicalize
  646. */
  647. PHP_FUNCTION(locale_canonicalize)
  648. {
  649. get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  650. }
  651. /* }}} */
  652. /* {{{ append_key_value
  653. * Internal function which is called from locale_compose
  654. * gets the value for the key_name and appends to the loc_name
  655. * returns 1 if successful , -1 if not found ,
  656. * 0 if array element is not a string , -2 if buffer-overflow
  657. */
  658. static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
  659. {
  660. zval** ele_value = NULL;
  661. if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
  662. if(Z_TYPE_PP(ele_value)!= IS_STRING ){
  663. /* element value is not a string */
  664. return FAILURE;
  665. }
  666. if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
  667. strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
  668. /* not lang or grandfathered tag */
  669. smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
  670. }
  671. smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
  672. return SUCCESS;
  673. }
  674. return LOC_NOT_FOUND;
  675. }
  676. /* }}} */
  677. /* {{{ append_prefix , appends the prefix needed
  678. * e.g. private adds 'x'
  679. */
  680. static void add_prefix(smart_str* loc_name, char* key_name)
  681. {
  682. if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
  683. smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
  684. smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
  685. }
  686. }
  687. /* }}} */
  688. /* {{{ append_multiple_key_values
  689. * Internal function which is called from locale_compose
  690. * gets the multiple values for the key_name and appends to the loc_name
  691. * used for 'variant','extlang','private'
  692. * returns 1 if successful , -1 if not found ,
  693. * 0 if array element is not a string , -2 if buffer-overflow
  694. */
  695. static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
  696. {
  697. zval** ele_value = NULL;
  698. int i = 0;
  699. int isFirstSubtag = 0;
  700. int max_value = 0;
  701. /* Variant/ Extlang/Private etc. */
  702. if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
  703. if( Z_TYPE_PP(ele_value) == IS_STRING ){
  704. add_prefix( loc_name , key_name);
  705. smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
  706. smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
  707. return SUCCESS;
  708. } else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
  709. HashPosition pos;
  710. HashTable *arr = HASH_OF(*ele_value);
  711. zval **data = NULL;
  712. zend_hash_internal_pointer_reset_ex(arr, &pos);
  713. while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
  714. if(Z_TYPE_PP(data) != IS_STRING) {
  715. return FAILURE;
  716. }
  717. if (isFirstSubtag++ == 0){
  718. add_prefix(loc_name , key_name);
  719. }
  720. smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
  721. smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
  722. zend_hash_move_forward_ex(arr, &pos);
  723. }
  724. return SUCCESS;
  725. } else {
  726. return FAILURE;
  727. }
  728. } else {
  729. char cur_key_name[31];
  730. /* Decide the max_value: the max. no. of elements allowed */
  731. if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
  732. max_value = MAX_NO_VARIANT;
  733. }
  734. if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
  735. max_value = MAX_NO_EXTLANG;
  736. }
  737. if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
  738. max_value = MAX_NO_PRIVATE;
  739. }
  740. /* Multiple variant values as variant0, variant1 ,variant2 */
  741. isFirstSubtag = 0;
  742. for( i=0 ; i< max_value; i++ ){
  743. snprintf( cur_key_name , 30, "%s%d", key_name , i);
  744. if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
  745. if( Z_TYPE_PP(ele_value)!= IS_STRING ){
  746. /* variant is not a string */
  747. return FAILURE;
  748. }
  749. /* Add the contents */
  750. if (isFirstSubtag++ == 0){
  751. add_prefix(loc_name , cur_key_name);
  752. }
  753. smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
  754. smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
  755. }
  756. } /* end of for */
  757. } /* end of else */
  758. return SUCCESS;
  759. }
  760. /* }}} */
  761. /*{{{
  762. * If applicable sets error message and aborts locale_compose gracefully
  763. * returns 0 if locale_compose needs to be aborted
  764. * otherwise returns 1
  765. */
  766. static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
  767. {
  768. intl_error_reset( NULL TSRMLS_CC );
  769. if( result == FAILURE) {
  770. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  771. "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
  772. smart_str_free(loc_name);
  773. return 0;
  774. }
  775. return 1;
  776. }
  777. /* }}} */
  778. #define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
  779. /* {{{ proto static string Locale::composeLocale($array)
  780. * Creates a locale by combining the parts of locale-ID passed
  781. * }}} */
  782. /* {{{ proto static string compose_locale($array)
  783. * Creates a locale by combining the parts of locale-ID passed
  784. * }}} */
  785. PHP_FUNCTION(locale_compose)
  786. {
  787. smart_str loc_name_s = {0};
  788. smart_str *loc_name = &loc_name_s;
  789. zval* arr = NULL;
  790. HashTable* hash_arr = NULL;
  791. int result = 0;
  792. intl_error_reset( NULL TSRMLS_CC );
  793. if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
  794. &arr) == FAILURE)
  795. {
  796. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  797. "locale_compose: unable to parse input params", 0 TSRMLS_CC );
  798. RETURN_FALSE;
  799. }
  800. hash_arr = HASH_OF( arr );
  801. if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
  802. RETURN_FALSE;
  803. /* Check for grandfathered first */
  804. result = append_key_value(loc_name, hash_arr, LOC_GRANDFATHERED_LANG_TAG);
  805. if( result == SUCCESS){
  806. RETURN_SMART_STR(loc_name);
  807. }
  808. if( !handleAppendResult( result, loc_name TSRMLS_CC)){
  809. RETURN_FALSE;
  810. }
  811. /* Not grandfathered */
  812. result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
  813. if( result == LOC_NOT_FOUND ){
  814. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  815. "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
  816. smart_str_free(loc_name);
  817. RETURN_FALSE;
  818. }
  819. if( !handleAppendResult( result, loc_name TSRMLS_CC)){
  820. RETURN_FALSE;
  821. }
  822. /* Extlang */
  823. result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
  824. if( !handleAppendResult( result, loc_name TSRMLS_CC)){
  825. RETURN_FALSE;
  826. }
  827. /* Script */
  828. result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
  829. if( !handleAppendResult( result, loc_name TSRMLS_CC)){
  830. RETURN_FALSE;
  831. }
  832. /* Region */
  833. result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
  834. if( !handleAppendResult( result, loc_name TSRMLS_CC)){
  835. RETURN_FALSE;
  836. }
  837. /* Variant */
  838. result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
  839. if( !handleAppendResult( result, loc_name TSRMLS_CC)){
  840. RETURN_FALSE;
  841. }
  842. /* Private */
  843. result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
  844. if( !handleAppendResult( result, loc_name TSRMLS_CC)){
  845. RETURN_FALSE;
  846. }
  847. RETURN_SMART_STR(loc_name);
  848. }
  849. /* }}} */
  850. /*{{{
  851. * Parses the locale and returns private subtags if existing
  852. * else returns NULL
  853. * e.g. for locale='en_US-x-prv1-prv2-prv3'
  854. * returns a pointer to the string 'prv1-prv2-prv3'
  855. */
  856. static char* get_private_subtags(const char* loc_name)
  857. {
  858. char* result =NULL;
  859. int singletonPos = 0;
  860. int len =0;
  861. const char* mod_loc_name =NULL;
  862. if( loc_name && (len = strlen(loc_name)>0 ) ){
  863. mod_loc_name = loc_name ;
  864. len = strlen(mod_loc_name);
  865. while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
  866. if( singletonPos!=-1){
  867. if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
  868. /* private subtag start found */
  869. if( singletonPos + 2 == len){
  870. /* loc_name ends with '-x-' ; return NULL */
  871. }
  872. else{
  873. /* result = mod_loc_name + singletonPos +2; */
  874. result = estrndup(mod_loc_name + singletonPos+2 , (len -( singletonPos +2) ) );
  875. }
  876. break;
  877. }
  878. else{
  879. if( singletonPos + 1 >= len){
  880. /* String end */
  881. break;
  882. } else {
  883. /* singleton found but not a private subtag , hence check further in the string for the private subtag */
  884. mod_loc_name = mod_loc_name + singletonPos +1;
  885. len = strlen(mod_loc_name);
  886. }
  887. }
  888. }
  889. } /* end of while */
  890. }
  891. return result;
  892. }
  893. /* }}} */
  894. /* {{{ code used by locale_parse
  895. */
  896. static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
  897. {
  898. char* key_value = NULL;
  899. char* cur_key_name = NULL;
  900. char* token = NULL;
  901. char* last_ptr = NULL;
  902. int result = 0;
  903. int cur_result = 0;
  904. int cnt = 0;
  905. if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
  906. key_value = get_private_subtags( loc_name );
  907. result = 1;
  908. } else {
  909. key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
  910. }
  911. if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
  912. ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
  913. if( result > 0 && key_value){
  914. /* Tokenize on the "_" or "-" */
  915. token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
  916. if( cur_key_name ){
  917. efree( cur_key_name);
  918. }
  919. cur_key_name = (char*)ecalloc( 25, 25);
  920. sprintf( cur_key_name , "%s%d", key_name , cnt++);
  921. add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
  922. /* tokenize on the "_" or "-" and stop at singleton if any */
  923. while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
  924. sprintf( cur_key_name , "%s%d", key_name , cnt++);
  925. add_assoc_string( hash_arr, cur_key_name , token , TRUE );
  926. }
  927. /*
  928. if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
  929. }
  930. */
  931. }
  932. } else {
  933. if( result == 1 ){
  934. add_assoc_string( hash_arr, key_name , key_value , TRUE );
  935. cur_result = 1;
  936. }
  937. }
  938. if( cur_key_name ){
  939. efree( cur_key_name);
  940. }
  941. /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
  942. if( key_value){
  943. efree(key_value);
  944. }
  945. return cur_result;
  946. }
  947. /* }}} */
  948. /* {{{ proto static array Locale::parseLocale($locale)
  949. * parses a locale-id into an array the different parts of it
  950. }}} */
  951. /* {{{ proto static array parse_locale($locale)
  952. * parses a locale-id into an array the different parts of it
  953. */
  954. PHP_FUNCTION(locale_parse)
  955. {
  956. const char* loc_name = NULL;
  957. int loc_name_len = 0;
  958. int grOffset = 0;
  959. intl_error_reset( NULL TSRMLS_CC );
  960. if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
  961. &loc_name, &loc_name_len ) == FAILURE)
  962. {
  963. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  964. "locale_parse: unable to parse input params", 0 TSRMLS_CC );
  965. RETURN_FALSE;
  966. }
  967. INTL_CHECK_LOCALE_LEN(strlen(loc_name));
  968. if(loc_name_len == 0) {
  969. loc_name = intl_locale_get_default(TSRMLS_C);
  970. }
  971. array_init( return_value );
  972. grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
  973. if( grOffset >= 0 ){
  974. add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
  975. }
  976. else{
  977. /* Not grandfathered */
  978. add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
  979. add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
  980. add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
  981. add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
  982. add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
  983. }
  984. }
  985. /* }}} */
  986. /* {{{ proto static array Locale::getAllVariants($locale)
  987. * gets an array containing the list of variants, or null
  988. }}} */
  989. /* {{{ proto static array locale_get_all_variants($locale)
  990. * gets an array containing the list of variants, or null
  991. */
  992. PHP_FUNCTION(locale_get_all_variants)
  993. {
  994. const char* loc_name = NULL;
  995. int loc_name_len = 0;
  996. int result = 0;
  997. char* token = NULL;
  998. char* variant = NULL;
  999. char* saved_ptr = NULL;
  1000. intl_error_reset( NULL TSRMLS_CC );
  1001. if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
  1002. &loc_name, &loc_name_len ) == FAILURE)
  1003. {
  1004. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  1005. "locale_parse: unable to parse input params", 0 TSRMLS_CC );
  1006. RETURN_FALSE;
  1007. }
  1008. if(loc_name_len == 0) {
  1009. loc_name = intl_locale_get_default(TSRMLS_C);
  1010. }
  1011. INTL_CHECK_LOCALE_LEN(strlen(loc_name));
  1012. array_init( return_value );
  1013. /* If the locale is grandfathered, stop, no variants */
  1014. if( findOffset( LOC_GRANDFATHERED , loc_name ) >= 0 ){
  1015. /* ("Grandfathered Tag. No variants."); */
  1016. }
  1017. else {
  1018. /* Call ICU variant */
  1019. variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
  1020. if( result > 0 && variant){
  1021. /* Tokenize on the "_" or "-" */
  1022. token = php_strtok_r( variant , DELIMITER , &saved_ptr);
  1023. add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
  1024. /* tokenize on the "_" or "-" and stop at singleton if any */
  1025. while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
  1026. add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
  1027. }
  1028. }
  1029. if( variant ){
  1030. efree( variant );
  1031. }
  1032. }
  1033. }
  1034. /* }}} */
  1035. /*{{{
  1036. * Converts to lower case and also replaces all hyphens with the underscore
  1037. */
  1038. static int strToMatch(const char* str ,char *retstr)
  1039. {
  1040. char* anchor = NULL;
  1041. const char* anchor1 = NULL;
  1042. int result = 0;
  1043. if( (!str) || str[0] == '\0'){
  1044. return result;
  1045. } else {
  1046. anchor = retstr;
  1047. anchor1 = str;
  1048. while( (*str)!='\0' ){
  1049. if( *str == '-' ){
  1050. *retstr = '_';
  1051. } else {
  1052. *retstr = tolower(*str);
  1053. }
  1054. str++;
  1055. retstr++;
  1056. }
  1057. *retstr = '\0';
  1058. retstr= anchor;
  1059. str= anchor1;
  1060. result = 1;
  1061. }
  1062. return(result);
  1063. }
  1064. /* }}} */
  1065. /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
  1066. * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
  1067. */
  1068. /* }}} */
  1069. /* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
  1070. * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
  1071. */
  1072. PHP_FUNCTION(locale_filter_matches)
  1073. {
  1074. char* lang_tag = NULL;
  1075. int lang_tag_len = 0;
  1076. const char* loc_range = NULL;
  1077. int loc_range_len = 0;
  1078. int result = 0;
  1079. char* token = 0;
  1080. char* chrcheck = NULL;
  1081. char* can_lang_tag = NULL;
  1082. char* can_loc_range = NULL;
  1083. char* cur_lang_tag = NULL;
  1084. char* cur_loc_range = NULL;
  1085. zend_bool boolCanonical = 0;
  1086. UErrorCode status = U_ZERO_ERROR;
  1087. intl_error_reset( NULL TSRMLS_CC );
  1088. if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
  1089. &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
  1090. &boolCanonical) == FAILURE)
  1091. {
  1092. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  1093. "locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
  1094. RETURN_FALSE;
  1095. }
  1096. if(loc_range_len == 0) {
  1097. loc_range = intl_locale_get_default(TSRMLS_C);
  1098. }
  1099. if( strcmp(loc_range,"*")==0){
  1100. RETURN_TRUE;
  1101. }
  1102. INTL_CHECK_LOCALE_LEN(strlen(loc_range));
  1103. INTL_CHECK_LOCALE_LEN(strlen(lang_tag));
  1104. if( boolCanonical ){
  1105. /* canonicalize loc_range */
  1106. can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
  1107. if( result ==0) {
  1108. intl_error_set( NULL, status,
  1109. "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
  1110. RETURN_FALSE;
  1111. }
  1112. /* canonicalize lang_tag */
  1113. can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result , 0);
  1114. if( result ==0) {
  1115. intl_error_set( NULL, status,
  1116. "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
  1117. RETURN_FALSE;
  1118. }
  1119. /* Convert to lower case for case-insensitive comparison */
  1120. cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
  1121. /* Convert to lower case for case-insensitive comparison */
  1122. result = strToMatch( can_lang_tag , cur_lang_tag);
  1123. if( result == 0) {
  1124. efree( cur_lang_tag );
  1125. efree( can_lang_tag );
  1126. RETURN_FALSE;
  1127. }
  1128. cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
  1129. result = strToMatch( can_loc_range , cur_loc_range );
  1130. if( result == 0) {
  1131. efree( cur_lang_tag );
  1132. efree( can_lang_tag );
  1133. efree( cur_loc_range );
  1134. efree( can_loc_range );
  1135. RETURN_FALSE;
  1136. }
  1137. /* check if prefix */
  1138. token = strstr( cur_lang_tag , cur_loc_range );
  1139. if( token && (token==cur_lang_tag) ){
  1140. /* check if the char. after match is SEPARATOR */
  1141. chrcheck = token + (strlen(cur_loc_range));
  1142. if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
  1143. if( cur_lang_tag){
  1144. efree( cur_lang_tag );
  1145. }
  1146. if( cur_loc_range){
  1147. efree( cur_loc_range );
  1148. }
  1149. if( can_lang_tag){
  1150. efree( can_lang_tag );
  1151. }
  1152. if( can_loc_range){
  1153. efree( can_loc_range );
  1154. }
  1155. RETURN_TRUE;
  1156. }
  1157. }
  1158. /* No prefix as loc_range */
  1159. if( cur_lang_tag){
  1160. efree( cur_lang_tag );
  1161. }
  1162. if( cur_loc_range){
  1163. efree( cur_loc_range );
  1164. }
  1165. if( can_lang_tag){
  1166. efree( can_lang_tag );
  1167. }
  1168. if( can_loc_range){
  1169. efree( can_loc_range );
  1170. }
  1171. RETURN_FALSE;
  1172. } /* end of if isCanonical */
  1173. else{
  1174. /* Convert to lower case for case-insensitive comparison */
  1175. cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
  1176. result = strToMatch( lang_tag , cur_lang_tag);
  1177. if( result == 0) {
  1178. efree( cur_lang_tag );
  1179. RETURN_FALSE;
  1180. }
  1181. cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
  1182. result = strToMatch( loc_range , cur_loc_range );
  1183. if( result == 0) {
  1184. efree( cur_lang_tag );
  1185. efree( cur_loc_range );
  1186. RETURN_FALSE;
  1187. }
  1188. /* check if prefix */
  1189. token = strstr( cur_lang_tag , cur_loc_range );
  1190. if( token && (token==cur_lang_tag) ){
  1191. /* check if the char. after match is SEPARATOR */
  1192. chrcheck = token + (strlen(cur_loc_range));
  1193. if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
  1194. if( cur_lang_tag){
  1195. efree( cur_lang_tag );
  1196. }
  1197. if( cur_loc_range){
  1198. efree( cur_loc_range );
  1199. }
  1200. RETURN_TRUE;
  1201. }
  1202. }
  1203. /* No prefix as loc_range */
  1204. if( cur_lang_tag){
  1205. efree( cur_lang_tag );
  1206. }
  1207. if( cur_loc_range){
  1208. efree( cur_loc_range );
  1209. }
  1210. RETURN_FALSE;
  1211. }
  1212. }
  1213. /* }}} */
  1214. static void array_cleanup( char* arr[] , int arr_size)
  1215. {
  1216. int i=0;
  1217. for( i=0; i< arr_size; i++ ){
  1218. if( arr[i*2] ){
  1219. efree( arr[i*2]);
  1220. }
  1221. }
  1222. efree(arr);
  1223. }
  1224. #define LOOKUP_CLEAN_RETURN(value) array_cleanup(cur_arr, cur_arr_len); return (value)
  1225. /* {{{
  1226. * returns the lookup result to lookup_loc_range_src_php
  1227. * internal function
  1228. */
  1229. static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize TSRMLS_DC)
  1230. {
  1231. int i = 0;
  1232. int cur_arr_len = 0;
  1233. int result = 0;
  1234. char* lang_tag = NULL;
  1235. zval** ele_value = NULL;
  1236. char** cur_arr = NULL;
  1237. char* cur_loc_range = NULL;
  1238. char* can_loc_range = NULL;
  1239. int saved_pos = 0;
  1240. char* return_value = NULL;
  1241. cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
  1242. /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
  1243. for(zend_hash_internal_pointer_reset(hash_arr);
  1244. zend_hash_has_more_elements(hash_arr) == SUCCESS;
  1245. zend_hash_move_forward(hash_arr)) {
  1246. if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
  1247. /* Should never actually fail since the key is known to exist.*/
  1248. continue;
  1249. }
  1250. if(Z_TYPE_PP(ele_value)!= IS_STRING) {
  1251. /* element value is not a string */
  1252. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
  1253. LOOKUP_CLEAN_RETURN(NULL);
  1254. }
  1255. cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
  1256. result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
  1257. if(result == 0) {
  1258. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
  1259. LOOKUP_CLEAN_RETURN(NULL);
  1260. }
  1261. cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
  1262. cur_arr_len++ ;
  1263. } /* end of for */
  1264. /* Canonicalize array elements */
  1265. if(canonicalize) {
  1266. for(i=0; i<cur_arr_len; i++) {
  1267. lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
  1268. if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
  1269. if(lang_tag) {
  1270. efree(lang_tag);
  1271. }
  1272. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
  1273. LOOKUP_CLEAN_RETURN(NULL);
  1274. }
  1275. cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
  1276. result = strToMatch(lang_tag, cur_arr[i*2]);
  1277. efree(lang_tag);
  1278. if(result == 0) {
  1279. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
  1280. LOOKUP_CLEAN_RETURN(NULL);
  1281. }
  1282. }
  1283. }
  1284. if(canonicalize) {
  1285. /* Canonicalize the loc_range */
  1286. can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
  1287. if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
  1288. /* Error */
  1289. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
  1290. if(can_loc_range) {
  1291. efree(can_loc_range);
  1292. }
  1293. LOOKUP_CLEAN_RETURN(NULL);
  1294. } else {
  1295. loc_range = can_loc_range;
  1296. }
  1297. }
  1298. cur_loc_range = ecalloc(1, strlen(loc_range)+1);
  1299. /* convert to lower and replace hyphens */
  1300. result = strToMatch(loc_range, cur_loc_range);
  1301. if(can_loc_range) {
  1302. efree(can_loc_range);
  1303. }
  1304. if(result == 0) {
  1305. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
  1306. LOOKUP_CLEAN_RETURN(NULL);
  1307. }
  1308. /* Lookup for the lang_tag match */
  1309. saved_pos = strlen(cur_loc_range);
  1310. while(saved_pos > 0) {
  1311. for(i=0; i< cur_arr_len; i++){
  1312. if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
  1313. /* Match found */
  1314. return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
  1315. efree(cur_loc_range);
  1316. LOOKUP_CLEAN_RETURN(return_value);
  1317. }
  1318. }
  1319. saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
  1320. }
  1321. /* Match not found */
  1322. efree(cur_loc_range);
  1323. LOOKUP_CLEAN_RETURN(NULL);
  1324. }
  1325. /* }}} */
  1326. /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
  1327. * Searchs the items in $langtag for the best match to the language
  1328. * range
  1329. */
  1330. /* }}} */
  1331. /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
  1332. * Searchs the items in $langtag for the best match to the language
  1333. * range
  1334. */
  1335. PHP_FUNCTION(locale_lookup)
  1336. {
  1337. char* fallback_loc = NULL;
  1338. int fallback_loc_len = 0;
  1339. const char* loc_range = NULL;
  1340. int loc_range_len = 0;
  1341. zval* arr = NULL;
  1342. HashTable* hash_arr = NULL;
  1343. zend_bool boolCanonical = 0;
  1344. char* result =NULL;
  1345. intl_error_reset( NULL TSRMLS_CC );
  1346. if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
  1347. &boolCanonical, &fallback_loc, &fallback_loc_len) == FAILURE) {
  1348. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 TSRMLS_CC );
  1349. RETURN_FALSE;
  1350. }
  1351. if(loc_range_len == 0) {
  1352. loc_range = intl_locale_get_default(TSRMLS_C);
  1353. }
  1354. INTL_CHECK_LOCALE_LEN(strlen(loc_range));
  1355. hash_arr = HASH_OF(arr);
  1356. if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
  1357. RETURN_EMPTY_STRING();
  1358. }
  1359. result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
  1360. if(result == NULL || result[0] == '\0') {
  1361. if( fallback_loc ) {
  1362. result = estrndup(fallback_loc, fallback_loc_len);
  1363. } else {
  1364. RETURN_EMPTY_STRING();
  1365. }
  1366. }
  1367. RETVAL_STRINGL(result, strlen(result), 0);
  1368. }
  1369. /* }}} */
  1370. /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
  1371. * Tries to find out best available locale based on HTTP �Accept-Language� header
  1372. */
  1373. /* }}} */
  1374. /* {{{ proto string locale_accept_from_http(string $http_accept)
  1375. * Tries to find out best available locale based on HTTP �Accept-Language� header
  1376. */
  1377. PHP_FUNCTION(locale_accept_from_http)
  1378. {
  1379. UEnumeration *available;
  1380. char *http_accept = NULL;
  1381. int http_accept_len;
  1382. UErrorCode status = 0;
  1383. int len;
  1384. char resultLocale[INTL_MAX_LOCALE_LEN+1];
  1385. UAcceptResult outResult;
  1386. if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
  1387. {
  1388. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  1389. "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
  1390. RETURN_FALSE;
  1391. }
  1392. if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
  1393. /* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
  1394. char *start = http_accept;
  1395. char *end;
  1396. size_t len;
  1397. do {
  1398. end = strchr(start, ',');
  1399. len = end ? end-start : http_accept_len-(start-http_accept);
  1400. if(len > ULOC_FULLNAME_CAPACITY) {
  1401. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  1402. "locale_accept_from_http: locale string too long", 0 TSRMLS_CC );
  1403. RETURN_FALSE;
  1404. }
  1405. if(end) {
  1406. start = end+1;
  1407. }
  1408. } while(end != NULL);
  1409. }
  1410. available = ures_openAvailableLocales(NULL, &status);
  1411. INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
  1412. len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
  1413. &outResult, http_accept, available, &status);
  1414. uenum_close(available);
  1415. INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
  1416. if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
  1417. RETURN_FALSE;
  1418. }
  1419. RETURN_STRINGL(resultLocale, len, 1);
  1420. }
  1421. /* }}} */
  1422. /*
  1423. * Local variables:
  1424. * tab-width: 4
  1425. * c-basic-offset: 4
  1426. * End:
  1427. * vim600: noet sw=4 ts=4 fdm=marker
  1428. * vim<600: noet sw=4 ts=4
  1429. *can_loc_len
  1430. */