locale_methods.c 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534
  1. /*
  2. +----------------------------------------------------------------------+
  3. | This source file is subject to version 3.01 of the PHP license, |
  4. | that is bundled with this package in the file LICENSE, and is |
  5. | available through the world-wide-web at the following url: |
  6. | https://www.php.net/license/3_01.txt |
  7. | If you did not receive a copy of the PHP license and are unable to |
  8. | obtain it through the world-wide-web, please send a note to |
  9. | license@php.net so we can mail you a copy immediately. |
  10. +----------------------------------------------------------------------+
  11. | Authors: Kirti Velankar <kirtig@yahoo-inc.com> |
  12. +----------------------------------------------------------------------+
  13. */
  14. #ifdef HAVE_CONFIG_H
  15. #include "config.h"
  16. #endif
  17. #include <unicode/ustring.h>
  18. #include <unicode/udata.h>
  19. #include <unicode/putil.h>
  20. #include <unicode/ures.h>
  21. #include "php_intl.h"
  22. #include "locale.h"
  23. #include "locale_class.h"
  24. #include "intl_convert.h"
  25. #include "intl_data.h"
  26. #include <zend_API.h>
  27. #include <zend.h>
  28. #include <php.h>
  29. #include "main/php_ini.h"
  30. #include "zend_smart_str.h"
  31. ZEND_EXTERN_MODULE_GLOBALS( intl )
  32. /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
  33. #define SEPARATOR "_"
  34. #define SEPARATOR1 "-"
  35. #define DELIMITER "-_"
  36. #define EXTLANG_PREFIX "a"
  37. #define PRIVATE_PREFIX "x"
  38. #define DISP_NAME "name"
  39. #define MAX_NO_VARIANT 15
  40. #define MAX_NO_EXTLANG 3
  41. #define MAX_NO_PRIVATE 15
  42. #define MAX_NO_LOOKUP_LANG_TAG 100
  43. #define LOC_NOT_FOUND 1
  44. /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
  45. #define VARIANT_KEYNAME_LEN 11
  46. #define EXTLANG_KEYNAME_LEN 10
  47. #define PRIVATE_KEYNAME_LEN 11
  48. /* Based on IANA registry at the time of writing this code
  49. *
  50. */
  51. static const char * const LOC_GRANDFATHERED[] = {
  52. "art-lojban", "i-klingon", "i-lux", "i-navajo", "no-bok", "no-nyn",
  53. "cel-gaulish", "en-GB-oed", "i-ami",
  54. "i-bnn", "i-default", "i-enochian",
  55. "i-mingo", "i-pwn", "i-tao",
  56. "i-tay", "i-tsu", "sgn-BE-fr",
  57. "sgn-BE-nl", "sgn-CH-de", "zh-cmn",
  58. "zh-cmn-Hans", "zh-cmn-Hant", "zh-gan" ,
  59. "zh-guoyu", "zh-hakka", "zh-min",
  60. "zh-min-nan", "zh-wuu", "zh-xiang",
  61. "zh-yue", NULL
  62. };
  63. /* Based on IANA registry at the time of writing this code
  64. * This array lists the preferred values for the grandfathered tags if applicable
  65. * This is in sync with the array LOC_GRANDFATHERED
  66. * e.g. the offsets of the grandfathered tags match the offset of the preferred value
  67. */
  68. static const int LOC_PREFERRED_GRANDFATHERED_LEN = 6;
  69. static const char * const LOC_PREFERRED_GRANDFATHERED[] = {
  70. "jbo", "tlh", "lb",
  71. "nv", "nb", "nn",
  72. NULL
  73. };
  74. /* returns true if a is an ID separator, false otherwise */
  75. #define isIDSeparator(a) (a == '_' || a == '-')
  76. #define isKeywordSeparator(a) (a == '@' )
  77. #define isEndOfTag(a) (a == '\0' )
  78. #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
  79. /*returns true if one of the special prefixes is here (s=string)
  80. 'x-' or 'i-' */
  81. #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
  82. #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
  83. /* Dot terminates it because of POSIX form where dot precedes the codepage
  84. * except for variant */
  85. #define isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
  86. /* {{{ return the offset of 'key' in the array 'list'.
  87. * returns -1 if not present */
  88. static int16_t findOffset(const char* const* list, const char* key)
  89. {
  90. const char* const* anchor = list;
  91. while (*list != NULL) {
  92. if (strcmp(key, *list) == 0) {
  93. return (int16_t)(list - anchor);
  94. }
  95. list++;
  96. }
  97. return -1;
  98. }
  99. /*}}}*/
  100. static char* getPreferredTag(const char* gf_tag)
  101. {
  102. char* result = NULL;
  103. zend_off_t grOffset = 0;
  104. grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
  105. if(grOffset < 0) {
  106. return NULL;
  107. }
  108. if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
  109. /* return preferred tag */
  110. result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
  111. } else {
  112. /* Return correct grandfathered language tag */
  113. result = estrdup( LOC_GRANDFATHERED[grOffset] );
  114. }
  115. return result;
  116. }
  117. /* {{{
  118. * returns the position of next token for lookup
  119. * or -1 if no token
  120. * strtokr equivalent search for token in reverse direction
  121. */
  122. static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos)
  123. {
  124. zend_off_t result =-1;
  125. zend_off_t i;
  126. for(i=savedPos-1; i>=0; i--) {
  127. if(isIDSeparator(*(str+i)) || isKeywordSeparator(*(str+i))){
  128. /* delimiter found; check for singleton */
  129. if(i>=2 && isIDSeparator(*(str+i-2)) ){
  130. /* a singleton; so send the position of token before the singleton */
  131. result = i-2;
  132. } else {
  133. result = i;
  134. }
  135. break;
  136. }
  137. }
  138. if(result < 1){
  139. /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
  140. result =-1;
  141. }
  142. return result;
  143. }
  144. /* }}} */
  145. /* {{{
  146. * returns the position of a singleton if present
  147. * returns -1 if no singleton
  148. * strtok equivalent search for singleton
  149. */
  150. static zend_off_t getSingletonPos(const char* str)
  151. {
  152. zend_off_t result =-1;
  153. size_t len = 0;
  154. if( str && ((len=strlen(str))>0) ){
  155. zend_off_t i = 0;
  156. for( i=0; (size_t)i < len ; i++){
  157. if( isIDSeparator(*(str+i)) ){
  158. if( i==1){
  159. /* string is of the form x-avy or a-prv1 */
  160. result =0;
  161. break;
  162. } else {
  163. /* delimiter found; check for singleton */
  164. if( isIDSeparator(*(str+i+2)) ){
  165. /* a singleton; so send the position of separator before singleton */
  166. result = i+1;
  167. break;
  168. }
  169. }
  170. }
  171. }/* end of for */
  172. }
  173. return result;
  174. }
  175. /* }}} */
  176. /* {{{ Get default locale */
  177. /* }}} */
  178. /* {{{ Get default locale */
  179. PHP_NAMED_FUNCTION(zif_locale_get_default)
  180. {
  181. if (zend_parse_parameters_none() == FAILURE) {
  182. RETURN_THROWS();
  183. }
  184. RETURN_STRING( intl_locale_get_default( ) );
  185. }
  186. /* }}} */
  187. /* {{{ Set default locale */
  188. /* }}} */
  189. /* {{{ Set default locale */
  190. PHP_NAMED_FUNCTION(zif_locale_set_default)
  191. {
  192. zend_string* locale_name;
  193. zend_string *ini_name;
  194. char *default_locale = NULL;
  195. if(zend_parse_parameters( ZEND_NUM_ARGS(), "S", &locale_name) == FAILURE)
  196. {
  197. RETURN_THROWS();
  198. }
  199. if (ZSTR_LEN(locale_name) == 0) {
  200. default_locale = (char *)uloc_getDefault();
  201. locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
  202. }
  203. ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
  204. zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
  205. zend_string_release_ex(ini_name, 0);
  206. if (default_locale != NULL) {
  207. zend_string_release_ex(locale_name, 0);
  208. }
  209. RETURN_TRUE;
  210. }
  211. /* }}} */
  212. /* {{{
  213. * Gets the value from ICU
  214. * common code shared by get_primary_language,get_script or get_region or get_variant
  215. * result = 0 if error, 1 if successful , -1 if no value
  216. */
  217. static zend_string* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
  218. {
  219. zend_string* tag_value = NULL;
  220. int32_t tag_value_len = 512;
  221. char* mod_loc_name = NULL;
  222. int32_t buflen = 512;
  223. UErrorCode status = U_ZERO_ERROR;
  224. if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
  225. return NULL;
  226. }
  227. if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
  228. /* Handle grandfathered languages */
  229. zend_off_t grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
  230. if( grOffset >= 0 ){
  231. if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
  232. return zend_string_init(loc_name, strlen(loc_name), 0);
  233. } else {
  234. /* Since Grandfathered , no value , do nothing , retutn NULL */
  235. return NULL;
  236. }
  237. }
  238. if( fromParseLocale==1 ){
  239. zend_off_t singletonPos = 0;
  240. /* Handle singletons */
  241. if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
  242. if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
  243. return zend_string_init(loc_name, strlen(loc_name), 0);
  244. }
  245. }
  246. singletonPos = getSingletonPos( loc_name );
  247. if( singletonPos == 0){
  248. /* singleton at start of script, region , variant etc.
  249. * or invalid singleton at start of language */
  250. return NULL;
  251. } else if( singletonPos > 0 ){
  252. /* singleton at some position except at start
  253. * strip off the singleton and rest of the loc_name */
  254. mod_loc_name = estrndup ( loc_name , singletonPos-1);
  255. }
  256. } /* end of if fromParse */
  257. } /* end of if != LOC_CANONICAL_TAG */
  258. if( mod_loc_name == NULL){
  259. mod_loc_name = estrdup(loc_name );
  260. }
  261. /* Proceed to ICU */
  262. do{
  263. if (tag_value) {
  264. tag_value = zend_string_realloc( tag_value , buflen, 0);
  265. } else {
  266. tag_value = zend_string_alloc( buflen, 0);
  267. }
  268. tag_value_len = buflen;
  269. if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
  270. buflen = uloc_getScript ( mod_loc_name , tag_value->val , tag_value_len , &status);
  271. }
  272. if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
  273. buflen = uloc_getLanguage ( mod_loc_name , tag_value->val , tag_value_len , &status);
  274. }
  275. if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
  276. buflen = uloc_getCountry ( mod_loc_name , tag_value->val , tag_value_len , &status);
  277. }
  278. if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
  279. buflen = uloc_getVariant ( mod_loc_name , tag_value->val , tag_value_len , &status);
  280. }
  281. if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
  282. buflen = uloc_canonicalize ( mod_loc_name , tag_value->val , tag_value_len , &status);
  283. }
  284. if( U_FAILURE( status ) ) {
  285. if( status == U_BUFFER_OVERFLOW_ERROR ) {
  286. status = U_ZERO_ERROR;
  287. buflen++; /* add space for \0 */
  288. continue;
  289. }
  290. /* Error in retrieving data */
  291. *result = 0;
  292. if( tag_value ){
  293. zend_string_release_ex( tag_value, 0 );
  294. }
  295. if( mod_loc_name ){
  296. efree( mod_loc_name);
  297. }
  298. return NULL;
  299. }
  300. } while( buflen > tag_value_len );
  301. if( buflen ==0 ){
  302. /* No value found */
  303. *result = -1;
  304. if( tag_value ){
  305. zend_string_release_ex( tag_value, 0 );
  306. }
  307. if( mod_loc_name ){
  308. efree( mod_loc_name);
  309. }
  310. return NULL;
  311. } else {
  312. *result = 1;
  313. }
  314. if( mod_loc_name ){
  315. efree( mod_loc_name);
  316. }
  317. tag_value->len = strlen(tag_value->val);
  318. return tag_value;
  319. }
  320. /* }}} */
  321. /* {{{
  322. * Gets the value from ICU , called when PHP userspace function is called
  323. * common code shared by get_primary_language,get_script or get_region or get_variant
  324. */
  325. static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
  326. {
  327. const char* loc_name = NULL;
  328. size_t loc_name_len = 0;
  329. zend_string* tag_value = NULL;
  330. char* empty_result = "";
  331. int result = 0;
  332. char* msg = NULL;
  333. UErrorCode status = U_ZERO_ERROR;
  334. intl_error_reset( NULL );
  335. if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
  336. &loc_name ,&loc_name_len ) == FAILURE) {
  337. RETURN_THROWS();
  338. }
  339. if(loc_name_len == 0) {
  340. loc_name = intl_locale_get_default();
  341. loc_name_len = strlen(loc_name);
  342. }
  343. INTL_CHECK_LOCALE_LEN(loc_name_len);
  344. /* Call ICU get */
  345. tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
  346. /* No value found */
  347. if( result == -1 ) {
  348. if( tag_value){
  349. zend_string_release_ex( tag_value, 0 );
  350. }
  351. RETURN_STRING( empty_result);
  352. }
  353. /* value found */
  354. if( tag_value){
  355. RETVAL_STR( tag_value );
  356. return;
  357. }
  358. /* Error encountered while fetching the value */
  359. if( result ==0) {
  360. spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
  361. intl_error_set( NULL, status, msg , 1 );
  362. efree(msg);
  363. RETURN_NULL();
  364. }
  365. }
  366. /* }}} */
  367. /* {{{ gets the script for the $locale */
  368. PHP_FUNCTION( locale_get_script )
  369. {
  370. get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  371. }
  372. /* }}} */
  373. /* {{{ gets the region for the $locale */
  374. PHP_FUNCTION( locale_get_region )
  375. {
  376. get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  377. }
  378. /* }}} */
  379. /* {{{ gets the primary language for the $locale */
  380. PHP_FUNCTION(locale_get_primary_language )
  381. {
  382. get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  383. }
  384. /* }}} */
  385. /* {{{
  386. * common code shared by display_xyz functions to get the value from ICU
  387. }}} */
  388. static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
  389. {
  390. const char* loc_name = NULL;
  391. size_t loc_name_len = 0;
  392. const char* disp_loc_name = NULL;
  393. size_t disp_loc_name_len = 0;
  394. int free_loc_name = 0;
  395. UChar* disp_name = NULL;
  396. int32_t disp_name_len = 0;
  397. char* mod_loc_name = NULL;
  398. int32_t buflen = 512;
  399. UErrorCode status = U_ZERO_ERROR;
  400. zend_string* u8str;
  401. char* msg = NULL;
  402. intl_error_reset( NULL );
  403. if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|s!",
  404. &loc_name, &loc_name_len ,
  405. &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
  406. {
  407. RETURN_THROWS();
  408. }
  409. if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
  410. /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
  411. spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
  412. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 );
  413. efree(msg);
  414. RETURN_FALSE;
  415. }
  416. if(loc_name_len == 0) {
  417. loc_name = intl_locale_get_default();
  418. }
  419. if( strcmp(tag_name, DISP_NAME) != 0 ){
  420. /* Handle grandfathered languages */
  421. int grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
  422. if( grOffset >= 0 ){
  423. if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
  424. mod_loc_name = getPreferredTag( loc_name );
  425. } else {
  426. /* Since Grandfathered, no value, do nothing, retutn NULL */
  427. RETURN_FALSE;
  428. }
  429. }
  430. } /* end of if != LOC_CANONICAL_TAG */
  431. if( mod_loc_name==NULL ){
  432. mod_loc_name = estrdup( loc_name );
  433. }
  434. /* Check if disp_loc_name passed , if not use default locale */
  435. if( !disp_loc_name){
  436. disp_loc_name = estrdup(intl_locale_get_default());
  437. free_loc_name = 1;
  438. }
  439. /* Get the disp_value for the given locale */
  440. do{
  441. disp_name = erealloc( disp_name , buflen * sizeof(UChar) );
  442. disp_name_len = buflen;
  443. if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
  444. buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
  445. } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
  446. buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
  447. } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
  448. buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
  449. } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
  450. buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
  451. } else if( strcmp(tag_name , DISP_NAME)==0 ){
  452. buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
  453. }
  454. /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
  455. if( U_FAILURE( status ) )
  456. {
  457. if( status == U_BUFFER_OVERFLOW_ERROR )
  458. {
  459. status = U_ZERO_ERROR;
  460. continue;
  461. }
  462. spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
  463. intl_error_set( NULL, status, msg , 1 );
  464. efree(msg);
  465. if( disp_name){
  466. efree( disp_name );
  467. }
  468. if( mod_loc_name){
  469. efree( mod_loc_name );
  470. }
  471. if (free_loc_name) {
  472. efree((void *)disp_loc_name);
  473. disp_loc_name = NULL;
  474. }
  475. RETURN_FALSE;
  476. }
  477. } while( buflen > disp_name_len );
  478. if( mod_loc_name){
  479. efree( mod_loc_name );
  480. }
  481. if (free_loc_name) {
  482. efree((void *)disp_loc_name);
  483. disp_loc_name = NULL;
  484. }
  485. /* Convert display locale name from UTF-16 to UTF-8. */
  486. u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
  487. efree( disp_name );
  488. if( !u8str )
  489. {
  490. spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
  491. intl_error_set( NULL, status, msg , 1 );
  492. efree(msg);
  493. RETURN_FALSE;
  494. }
  495. RETVAL_NEW_STR( u8str );
  496. }
  497. /* }}} */
  498. /* {{{ gets the name for the $locale in $in_locale or default_locale */
  499. PHP_FUNCTION(locale_get_display_name)
  500. {
  501. get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  502. }
  503. /* }}} */
  504. /* {{{ gets the language for the $locale in $in_locale or default_locale */
  505. PHP_FUNCTION(locale_get_display_language)
  506. {
  507. get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  508. }
  509. /* }}} */
  510. /* {{{ gets the script for the $locale in $in_locale or default_locale */
  511. PHP_FUNCTION(locale_get_display_script)
  512. {
  513. get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  514. }
  515. /* }}} */
  516. /* {{{ gets the region for the $locale in $in_locale or default_locale */
  517. PHP_FUNCTION(locale_get_display_region)
  518. {
  519. get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  520. }
  521. /* }}} */
  522. /* {{{
  523. * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
  524. * gets the variant for the $locale in $in_locale or default_locale
  525. }}} */
  526. /* {{{
  527. * proto static string get_display_variant($locale, $in_locale = null)
  528. * gets the variant for the $locale in $in_locale or default_locale
  529. */
  530. PHP_FUNCTION(locale_get_display_variant)
  531. {
  532. get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  533. }
  534. /* }}} */
  535. /* {{{ return an associative array containing keyword-value
  536. * pairs for this locale. The keys are keys to the array (doh!)
  537. * }}}*/
  538. /* {{{ return an associative array containing keyword-value
  539. * pairs for this locale. The keys are keys to the array (doh!)
  540. */
  541. PHP_FUNCTION( locale_get_keywords )
  542. {
  543. UEnumeration* e = NULL;
  544. UErrorCode status = U_ZERO_ERROR;
  545. const char* kw_key = NULL;
  546. int32_t kw_key_len = 0;
  547. const char* loc_name = NULL;
  548. size_t loc_name_len = 0;
  549. intl_error_reset( NULL );
  550. if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
  551. &loc_name, &loc_name_len ) == FAILURE)
  552. {
  553. RETURN_THROWS();
  554. }
  555. INTL_CHECK_LOCALE_LEN(strlen(loc_name));
  556. if(loc_name_len == 0) {
  557. loc_name = intl_locale_get_default();
  558. }
  559. /* Get the keywords */
  560. e = uloc_openKeywords( loc_name, &status );
  561. if( e != NULL ) {
  562. /*
  563. ICU expects the buffer to be allocated before calling the function
  564. and so the buffer size has been explicitly specified
  565. ICU uloc.h #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
  566. hence the kw_value buffer size is 100
  567. */
  568. /* Traverse it, filling the return array. */
  569. array_init( return_value );
  570. while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
  571. int32_t kw_value_len = 100;
  572. zend_string *kw_value_str = zend_string_alloc(kw_value_len, 0);
  573. /* Get the keyword value for each keyword */
  574. kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
  575. if (status == U_BUFFER_OVERFLOW_ERROR) {
  576. status = U_ZERO_ERROR;
  577. kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
  578. kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
  579. } else if(!U_FAILURE(status)) {
  580. kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
  581. }
  582. if (U_FAILURE(status)) {
  583. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword value for the keyword", 0 );
  584. if( kw_value_str){
  585. zend_string_efree( kw_value_str );
  586. }
  587. zend_array_destroy(Z_ARR_P(return_value));
  588. RETURN_FALSE;
  589. }
  590. add_assoc_str( return_value, (char *)kw_key, kw_value_str);
  591. } /* end of while */
  592. } /* end of if e!=NULL */
  593. uenum_close( e );
  594. }
  595. /* }}} */
  596. /* {{{ @return string the canonicalized locale
  597. * }}} */
  598. /* {{{ @param string $locale The locale string to canonicalize */
  599. PHP_FUNCTION(locale_canonicalize)
  600. {
  601. get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
  602. }
  603. /* }}} */
  604. /* {{{ append_key_value
  605. * Internal function which is called from locale_compose
  606. * gets the value for the key_name and appends to the loc_name
  607. * returns 1 if successful , -1 if not found ,
  608. * 0 if array element is not a string , -2 if buffer-overflow
  609. */
  610. static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
  611. {
  612. zval *ele_value;
  613. if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
  614. if(Z_TYPE_P(ele_value)!= IS_STRING ){
  615. /* element value is not a string */
  616. return FAILURE;
  617. }
  618. if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
  619. strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
  620. /* not lang or grandfathered tag */
  621. smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
  622. }
  623. smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
  624. return SUCCESS;
  625. }
  626. return LOC_NOT_FOUND;
  627. }
  628. /* }}} */
  629. /* {{{ append_prefix , appends the prefix needed
  630. * e.g. private adds 'x'
  631. */
  632. static void add_prefix(smart_str* loc_name, char* key_name)
  633. {
  634. if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
  635. smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
  636. smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
  637. }
  638. }
  639. /* }}} */
  640. /* {{{ append_multiple_key_values
  641. * Internal function which is called from locale_compose
  642. * gets the multiple values for the key_name and appends to the loc_name
  643. * used for 'variant','extlang','private'
  644. * returns 1 if successful , -1 if not found ,
  645. * 0 if array element is not a string , -2 if buffer-overflow
  646. */
  647. static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
  648. {
  649. zval *ele_value;
  650. int isFirstSubtag = 0;
  651. /* Variant/ Extlang/Private etc. */
  652. if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
  653. if( Z_TYPE_P(ele_value) == IS_STRING ){
  654. add_prefix( loc_name , key_name);
  655. smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
  656. smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
  657. return SUCCESS;
  658. } else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
  659. HashTable *arr = Z_ARRVAL_P(ele_value);
  660. zval *data;
  661. ZEND_HASH_FOREACH_VAL(arr, data) {
  662. if(Z_TYPE_P(data) != IS_STRING) {
  663. return FAILURE;
  664. }
  665. if (isFirstSubtag++ == 0){
  666. add_prefix(loc_name , key_name);
  667. }
  668. smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
  669. smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
  670. } ZEND_HASH_FOREACH_END();
  671. return SUCCESS;
  672. } else {
  673. return FAILURE;
  674. }
  675. } else {
  676. char cur_key_name[31];
  677. int max_value = 0, i;
  678. /* Decide the max_value: the max. no. of elements allowed */
  679. if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
  680. max_value = MAX_NO_VARIANT;
  681. }
  682. if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
  683. max_value = MAX_NO_EXTLANG;
  684. }
  685. if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
  686. max_value = MAX_NO_PRIVATE;
  687. }
  688. /* Multiple variant values as variant0, variant1 ,variant2 */
  689. isFirstSubtag = 0;
  690. for( i=0 ; i< max_value; i++ ){
  691. snprintf( cur_key_name , 30, "%s%d", key_name , i);
  692. if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
  693. if( Z_TYPE_P(ele_value)!= IS_STRING ){
  694. /* variant is not a string */
  695. return FAILURE;
  696. }
  697. /* Add the contents */
  698. if (isFirstSubtag++ == 0){
  699. add_prefix(loc_name , cur_key_name);
  700. }
  701. smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
  702. smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
  703. }
  704. } /* end of for */
  705. } /* end of else */
  706. return SUCCESS;
  707. }
  708. /* }}} */
  709. /*{{{
  710. * If applicable sets error message and aborts locale_compose gracefully
  711. * returns 0 if locale_compose needs to be aborted
  712. * otherwise returns 1
  713. */
  714. static int handleAppendResult( int result, smart_str* loc_name)
  715. {
  716. intl_error_reset( NULL );
  717. if( result == FAILURE) {
  718. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  719. "locale_compose: parameter array element is not a string", 0 );
  720. smart_str_free(loc_name);
  721. return 0;
  722. }
  723. return 1;
  724. }
  725. /* }}} */
  726. #define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
  727. /* {{{ Creates a locale by combining the parts of locale-ID passed
  728. * }}} */
  729. /* {{{ Creates a locale by combining the parts of locale-ID passed
  730. * }}} */
  731. PHP_FUNCTION(locale_compose)
  732. {
  733. smart_str loc_name_s = {0};
  734. smart_str *loc_name = &loc_name_s;
  735. zval* arr = NULL;
  736. HashTable* hash_arr = NULL;
  737. int result = 0;
  738. intl_error_reset( NULL );
  739. if(zend_parse_parameters( ZEND_NUM_ARGS(), "a",
  740. &arr) == FAILURE)
  741. {
  742. RETURN_THROWS();
  743. }
  744. hash_arr = Z_ARRVAL_P( arr );
  745. if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
  746. RETURN_FALSE;
  747. /* Check for grandfathered first */
  748. result = append_key_value(loc_name, hash_arr, LOC_GRANDFATHERED_LANG_TAG);
  749. if( result == SUCCESS){
  750. RETURN_SMART_STR(loc_name);
  751. }
  752. if( !handleAppendResult( result, loc_name)){
  753. RETURN_FALSE;
  754. }
  755. /* Not grandfathered */
  756. result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
  757. if( result == LOC_NOT_FOUND ){
  758. zend_argument_value_error(1, "must contain a \"%s\" key", LOC_LANG_TAG);
  759. smart_str_free(loc_name);
  760. RETURN_THROWS();
  761. }
  762. if( !handleAppendResult( result, loc_name)){
  763. RETURN_FALSE;
  764. }
  765. /* Extlang */
  766. result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
  767. if( !handleAppendResult( result, loc_name)){
  768. RETURN_FALSE;
  769. }
  770. /* Script */
  771. result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
  772. if( !handleAppendResult( result, loc_name)){
  773. RETURN_FALSE;
  774. }
  775. /* Region */
  776. result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
  777. if( !handleAppendResult( result, loc_name)){
  778. RETURN_FALSE;
  779. }
  780. /* Variant */
  781. result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
  782. if( !handleAppendResult( result, loc_name)){
  783. RETURN_FALSE;
  784. }
  785. /* Private */
  786. result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
  787. if( !handleAppendResult( result, loc_name)){
  788. RETURN_FALSE;
  789. }
  790. RETURN_SMART_STR(loc_name);
  791. }
  792. /* }}} */
  793. /*{{{
  794. * Parses the locale and returns private subtags if existing
  795. * else returns NULL
  796. * e.g. for locale='en_US-x-prv1-prv2-prv3'
  797. * returns a pointer to the string 'prv1-prv2-prv3'
  798. */
  799. static zend_string* get_private_subtags(const char* loc_name)
  800. {
  801. zend_string* result = NULL;
  802. size_t len = 0;
  803. const char* mod_loc_name =NULL;
  804. if( loc_name && (len = strlen(loc_name)) > 0 ){
  805. zend_off_t singletonPos = 0;
  806. mod_loc_name = loc_name ;
  807. while( (singletonPos = getSingletonPos(mod_loc_name)) > -1){
  808. if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
  809. /* private subtag start found */
  810. if( singletonPos + 2 == len){
  811. /* loc_name ends with '-x-' ; return NULL */
  812. }
  813. else{
  814. /* result = mod_loc_name + singletonPos +2; */
  815. result = zend_string_init(mod_loc_name + singletonPos+2 , (len -( singletonPos +2) ), 0);
  816. }
  817. break;
  818. }
  819. else{
  820. if((size_t)(singletonPos + 1) >= len){
  821. /* String end */
  822. break;
  823. } else {
  824. /* singleton found but not a private subtag , hence check further in the string for the private subtag */
  825. mod_loc_name = mod_loc_name + singletonPos +1;
  826. len = strlen(mod_loc_name);
  827. }
  828. }
  829. } /* end of while */
  830. }
  831. return result;
  832. }
  833. /* }}} */
  834. /* {{{ code used by locale_parse */
  835. static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
  836. {
  837. zend_string* key_value = NULL;
  838. char* cur_key_name = NULL;
  839. char* token = NULL;
  840. char* last_ptr = NULL;
  841. int result = 0;
  842. int cur_result = 0;
  843. if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
  844. key_value = get_private_subtags( loc_name );
  845. result = 1;
  846. } else {
  847. key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
  848. }
  849. if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
  850. ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
  851. if( result > 0 && key_value){
  852. int cnt = 0;
  853. /* Tokenize on the "_" or "-" */
  854. token = php_strtok_r( key_value->val , DELIMITER ,&last_ptr);
  855. if( cur_key_name ){
  856. efree( cur_key_name);
  857. }
  858. cur_key_name = (char*)ecalloc( 25, 25);
  859. sprintf( cur_key_name , "%s%d", key_name , cnt++);
  860. add_assoc_string( hash_arr, cur_key_name , token);
  861. /* tokenize on the "_" or "-" and stop at singleton if any */
  862. while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
  863. sprintf( cur_key_name , "%s%d", key_name , cnt++);
  864. add_assoc_string( hash_arr, cur_key_name , token);
  865. }
  866. /*
  867. if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
  868. }
  869. */
  870. }
  871. if (key_value) {
  872. zend_string_release_ex(key_value, 0);
  873. }
  874. } else {
  875. if( result == 1 ){
  876. add_assoc_str( hash_arr, key_name , key_value);
  877. cur_result = 1;
  878. } else if (key_value) {
  879. zend_string_release_ex(key_value, 0);
  880. }
  881. }
  882. if( cur_key_name ){
  883. efree( cur_key_name);
  884. }
  885. /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
  886. return cur_result;
  887. }
  888. /* }}} */
  889. /* {{{ parses a locale-id into an array the different parts of it */
  890. PHP_FUNCTION(locale_parse)
  891. {
  892. const char* loc_name = NULL;
  893. size_t loc_name_len = 0;
  894. int grOffset = 0;
  895. intl_error_reset( NULL );
  896. if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
  897. &loc_name, &loc_name_len ) == FAILURE)
  898. {
  899. RETURN_THROWS();
  900. }
  901. INTL_CHECK_LOCALE_LEN(strlen(loc_name));
  902. if(loc_name_len == 0) {
  903. loc_name = intl_locale_get_default();
  904. }
  905. array_init( return_value );
  906. grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
  907. if( grOffset >= 0 ){
  908. add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
  909. }
  910. else{
  911. /* Not grandfathered */
  912. add_array_entry( loc_name , return_value , LOC_LANG_TAG);
  913. add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
  914. add_array_entry( loc_name , return_value , LOC_REGION_TAG);
  915. add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
  916. add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
  917. }
  918. }
  919. /* }}} */
  920. /* {{{ gets an array containing the list of variants, or null */
  921. PHP_FUNCTION(locale_get_all_variants)
  922. {
  923. const char* loc_name = NULL;
  924. size_t loc_name_len = 0;
  925. int result = 0;
  926. char* token = NULL;
  927. zend_string* variant = NULL;
  928. char* saved_ptr = NULL;
  929. intl_error_reset( NULL );
  930. if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
  931. &loc_name, &loc_name_len ) == FAILURE)
  932. {
  933. RETURN_THROWS();
  934. }
  935. if(loc_name_len == 0) {
  936. loc_name = intl_locale_get_default();
  937. loc_name_len = strlen(loc_name);
  938. }
  939. INTL_CHECK_LOCALE_LEN(loc_name_len);
  940. array_init( return_value );
  941. /* If the locale is grandfathered, stop, no variants */
  942. if( findOffset( LOC_GRANDFATHERED , loc_name ) >= 0 ){
  943. /* ("Grandfathered Tag. No variants."); */
  944. }
  945. else {
  946. /* Call ICU variant */
  947. variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
  948. if( result > 0 && variant){
  949. /* Tokenize on the "_" or "-" */
  950. token = php_strtok_r( variant->val , DELIMITER , &saved_ptr);
  951. add_next_index_stringl( return_value, token , strlen(token));
  952. /* tokenize on the "_" or "-" and stop at singleton if any */
  953. while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
  954. add_next_index_stringl( return_value, token , strlen(token));
  955. }
  956. }
  957. if( variant ){
  958. zend_string_release_ex( variant, 0 );
  959. }
  960. }
  961. }
  962. /* }}} */
  963. /* {{{ Converts to lower case and also replaces all hyphens with the underscore */
  964. static int strToMatch(const char* str ,char *retstr)
  965. {
  966. char* anchor = NULL;
  967. const char* anchor1 = NULL;
  968. int result = 0;
  969. if( (!str) || str[0] == '\0'){
  970. return result;
  971. } else {
  972. anchor = retstr;
  973. anchor1 = str;
  974. while( (*str)!='\0' ){
  975. if( *str == '-' ){
  976. *retstr = '_';
  977. } else {
  978. *retstr = tolower(*str);
  979. }
  980. str++;
  981. retstr++;
  982. }
  983. *retstr = '\0';
  984. retstr= anchor;
  985. str= anchor1;
  986. result = 1;
  987. }
  988. return(result);
  989. }
  990. /* }}} */
  991. /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
  992. /* }}} */
  993. /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
  994. PHP_FUNCTION(locale_filter_matches)
  995. {
  996. char* lang_tag = NULL;
  997. size_t lang_tag_len = 0;
  998. const char* loc_range = NULL;
  999. size_t loc_range_len = 0;
  1000. int result = 0;
  1001. char* token = 0;
  1002. char* chrcheck = NULL;
  1003. zend_string* can_lang_tag = NULL;
  1004. zend_string* can_loc_range = NULL;
  1005. char* cur_lang_tag = NULL;
  1006. char* cur_loc_range = NULL;
  1007. bool boolCanonical = 0;
  1008. UErrorCode status = U_ZERO_ERROR;
  1009. intl_error_reset( NULL );
  1010. if(zend_parse_parameters( ZEND_NUM_ARGS(), "ss|b",
  1011. &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
  1012. &boolCanonical) == FAILURE)
  1013. {
  1014. RETURN_THROWS();
  1015. }
  1016. if(loc_range_len == 0) {
  1017. loc_range = intl_locale_get_default();
  1018. loc_range_len = strlen(loc_range);
  1019. }
  1020. if( strcmp(loc_range,"*")==0){
  1021. RETURN_TRUE;
  1022. }
  1023. INTL_CHECK_LOCALE_LEN(loc_range_len);
  1024. INTL_CHECK_LOCALE_LEN(lang_tag_len);
  1025. if( boolCanonical ){
  1026. /* canonicalize loc_range */
  1027. can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
  1028. if( result <=0) {
  1029. intl_error_set( NULL, status,
  1030. "locale_filter_matches : unable to canonicalize loc_range" , 0 );
  1031. RETURN_FALSE;
  1032. }
  1033. /* canonicalize lang_tag */
  1034. can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result , 0);
  1035. if( result <=0) {
  1036. intl_error_set( NULL, status,
  1037. "locale_filter_matches : unable to canonicalize lang_tag" , 0 );
  1038. RETURN_FALSE;
  1039. }
  1040. /* Convert to lower case for case-insensitive comparison */
  1041. cur_lang_tag = ecalloc( 1, can_lang_tag->len + 1);
  1042. /* Convert to lower case for case-insensitive comparison */
  1043. result = strToMatch( can_lang_tag->val , cur_lang_tag);
  1044. if( result == 0) {
  1045. efree( cur_lang_tag );
  1046. zend_string_release_ex( can_lang_tag, 0 );
  1047. RETURN_FALSE;
  1048. }
  1049. cur_loc_range = ecalloc( 1, can_loc_range->len + 1);
  1050. result = strToMatch( can_loc_range->val , cur_loc_range );
  1051. if( result == 0) {
  1052. efree( cur_lang_tag );
  1053. zend_string_release_ex( can_lang_tag, 0 );
  1054. efree( cur_loc_range );
  1055. zend_string_release_ex( can_loc_range, 0 );
  1056. RETURN_FALSE;
  1057. }
  1058. /* check if prefix */
  1059. token = strstr( cur_lang_tag , cur_loc_range );
  1060. if( token && (token==cur_lang_tag) ){
  1061. /* check if the char. after match is SEPARATOR */
  1062. chrcheck = token + (strlen(cur_loc_range));
  1063. if( isIDSeparator(*chrcheck) || isKeywordSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
  1064. efree( cur_lang_tag );
  1065. efree( cur_loc_range );
  1066. if( can_lang_tag){
  1067. zend_string_release_ex( can_lang_tag, 0 );
  1068. }
  1069. if( can_loc_range){
  1070. zend_string_release_ex( can_loc_range, 0 );
  1071. }
  1072. RETURN_TRUE;
  1073. }
  1074. }
  1075. /* No prefix as loc_range */
  1076. if( cur_lang_tag){
  1077. efree( cur_lang_tag );
  1078. }
  1079. if( cur_loc_range){
  1080. efree( cur_loc_range );
  1081. }
  1082. if( can_lang_tag){
  1083. zend_string_release_ex( can_lang_tag, 0 );
  1084. }
  1085. if( can_loc_range){
  1086. zend_string_release_ex( can_loc_range, 0 );
  1087. }
  1088. RETURN_FALSE;
  1089. } /* end of if isCanonical */
  1090. else{
  1091. /* Convert to lower case for case-insensitive comparison */
  1092. cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
  1093. result = strToMatch( lang_tag , cur_lang_tag);
  1094. if( result == 0) {
  1095. efree( cur_lang_tag );
  1096. RETURN_FALSE;
  1097. }
  1098. cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
  1099. result = strToMatch( loc_range , cur_loc_range );
  1100. if( result == 0) {
  1101. efree( cur_lang_tag );
  1102. efree( cur_loc_range );
  1103. RETURN_FALSE;
  1104. }
  1105. /* check if prefix */
  1106. token = strstr( cur_lang_tag , cur_loc_range );
  1107. if( token && (token==cur_lang_tag) ){
  1108. /* check if the char. after match is SEPARATOR */
  1109. chrcheck = token + (strlen(cur_loc_range));
  1110. if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
  1111. efree( cur_lang_tag );
  1112. efree( cur_loc_range );
  1113. RETURN_TRUE;
  1114. }
  1115. }
  1116. /* No prefix as loc_range */
  1117. if( cur_lang_tag){
  1118. efree( cur_lang_tag );
  1119. }
  1120. if( cur_loc_range){
  1121. efree( cur_loc_range );
  1122. }
  1123. RETURN_FALSE;
  1124. }
  1125. }
  1126. /* }}} */
  1127. static void array_cleanup( char* arr[] , int arr_size)
  1128. {
  1129. int i=0;
  1130. for( i=0; i< arr_size; i++ ){
  1131. if( arr[i*2] ){
  1132. efree( arr[i*2]);
  1133. }
  1134. }
  1135. efree(arr);
  1136. }
  1137. #define LOOKUP_CLEAN_RETURN(value) array_cleanup(cur_arr, cur_arr_len); return (value)
  1138. /* {{{
  1139. * returns the lookup result to lookup_loc_range_src_php
  1140. * internal function
  1141. */
  1142. static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
  1143. {
  1144. int i = 0;
  1145. int cur_arr_len = 0;
  1146. int result = 0;
  1147. zend_string* lang_tag = NULL;
  1148. zval* ele_value = NULL;
  1149. char* cur_loc_range = NULL;
  1150. zend_string* can_loc_range = NULL;
  1151. zend_off_t saved_pos = 0;
  1152. zend_string* return_value = NULL;
  1153. char **cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
  1154. ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
  1155. /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
  1156. if(Z_TYPE_P(ele_value)!= IS_STRING) {
  1157. /* element value is not a string */
  1158. zend_argument_type_error(2, "must only contain string values");
  1159. LOOKUP_CLEAN_RETURN(NULL);
  1160. }
  1161. cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
  1162. result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
  1163. if(result == 0) {
  1164. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
  1165. LOOKUP_CLEAN_RETURN(NULL);
  1166. }
  1167. cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
  1168. cur_arr_len++ ;
  1169. } ZEND_HASH_FOREACH_END(); /* end of for */
  1170. /* Canonicalize array elements */
  1171. if(canonicalize) {
  1172. for(i=0; i<cur_arr_len; i++) {
  1173. lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
  1174. if(result != 1 || lang_tag == NULL || !lang_tag->val[0]) {
  1175. if(lang_tag) {
  1176. zend_string_release_ex(lang_tag, 0);
  1177. }
  1178. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
  1179. LOOKUP_CLEAN_RETURN(NULL);
  1180. }
  1181. cur_arr[i*2] = erealloc(cur_arr[i*2], lang_tag->len+1);
  1182. result = strToMatch(lang_tag->val, cur_arr[i*2]);
  1183. zend_string_release_ex(lang_tag, 0);
  1184. if(result == 0) {
  1185. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
  1186. LOOKUP_CLEAN_RETURN(NULL);
  1187. }
  1188. }
  1189. }
  1190. if(canonicalize) {
  1191. /* Canonicalize the loc_range */
  1192. can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
  1193. if( result != 1 || can_loc_range == NULL || !can_loc_range->val[0]) {
  1194. /* Error */
  1195. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
  1196. if(can_loc_range) {
  1197. zend_string_release_ex(can_loc_range, 0);
  1198. }
  1199. LOOKUP_CLEAN_RETURN(NULL);
  1200. } else {
  1201. loc_range = can_loc_range->val;
  1202. }
  1203. }
  1204. cur_loc_range = ecalloc(1, strlen(loc_range)+1);
  1205. /* convert to lower and replace hyphens */
  1206. result = strToMatch(loc_range, cur_loc_range);
  1207. if(can_loc_range) {
  1208. zend_string_release_ex(can_loc_range, 0);
  1209. }
  1210. if(result == 0) {
  1211. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
  1212. LOOKUP_CLEAN_RETURN(NULL);
  1213. }
  1214. /* Lookup for the lang_tag match */
  1215. saved_pos = strlen(cur_loc_range);
  1216. while(saved_pos > 0) {
  1217. for(i=0; i< cur_arr_len; i++){
  1218. if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
  1219. /* Match found */
  1220. char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
  1221. return_value = zend_string_init(str, strlen(str), 0);
  1222. efree(cur_loc_range);
  1223. LOOKUP_CLEAN_RETURN(return_value);
  1224. }
  1225. }
  1226. saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
  1227. }
  1228. /* Match not found */
  1229. efree(cur_loc_range);
  1230. LOOKUP_CLEAN_RETURN(NULL);
  1231. }
  1232. /* }}} */
  1233. /* {{{ Searches the items in $langtag for the best match to the language
  1234. * range
  1235. */
  1236. /* }}} */
  1237. /* {{{ Searches the items in $langtag for the best match to the language
  1238. * range
  1239. */
  1240. PHP_FUNCTION(locale_lookup)
  1241. {
  1242. zend_string* fallback_loc_str = NULL;
  1243. const char* loc_range = NULL;
  1244. size_t loc_range_len = 0;
  1245. zval* arr = NULL;
  1246. HashTable* hash_arr = NULL;
  1247. bool boolCanonical = 0;
  1248. zend_string* result_str = NULL;
  1249. intl_error_reset( NULL );
  1250. if(zend_parse_parameters( ZEND_NUM_ARGS(), "as|bS!", &arr, &loc_range, &loc_range_len,
  1251. &boolCanonical, &fallback_loc_str) == FAILURE) {
  1252. RETURN_THROWS();
  1253. }
  1254. if(loc_range_len == 0) {
  1255. if(fallback_loc_str) {
  1256. loc_range = ZSTR_VAL(fallback_loc_str);
  1257. loc_range_len = ZSTR_LEN(fallback_loc_str);
  1258. } else {
  1259. loc_range = intl_locale_get_default();
  1260. loc_range_len = strlen(loc_range);
  1261. }
  1262. }
  1263. hash_arr = Z_ARRVAL_P(arr);
  1264. INTL_CHECK_LOCALE_LEN(loc_range_len);
  1265. if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
  1266. RETURN_EMPTY_STRING();
  1267. }
  1268. result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
  1269. if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
  1270. if( fallback_loc_str ) {
  1271. result_str = zend_string_copy(fallback_loc_str);
  1272. } else {
  1273. RETURN_EMPTY_STRING();
  1274. }
  1275. }
  1276. RETURN_STR(result_str);
  1277. }
  1278. /* }}} */
  1279. /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
  1280. /* }}} */
  1281. /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
  1282. PHP_FUNCTION(locale_accept_from_http)
  1283. {
  1284. UEnumeration *available;
  1285. char *http_accept = NULL;
  1286. size_t http_accept_len;
  1287. UErrorCode status = 0;
  1288. int len;
  1289. char resultLocale[INTL_MAX_LOCALE_LEN+1];
  1290. UAcceptResult outResult;
  1291. if(zend_parse_parameters( ZEND_NUM_ARGS(), "s", &http_accept, &http_accept_len) == FAILURE)
  1292. {
  1293. RETURN_THROWS();
  1294. }
  1295. if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
  1296. /* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
  1297. char *start = http_accept;
  1298. char *end;
  1299. size_t len;
  1300. do {
  1301. end = strchr(start, ',');
  1302. len = end ? end-start : http_accept_len-(start-http_accept);
  1303. if(len > ULOC_FULLNAME_CAPACITY) {
  1304. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  1305. "locale_accept_from_http: locale string too long", 0 );
  1306. RETURN_FALSE;
  1307. }
  1308. if(end) {
  1309. start = end+1;
  1310. }
  1311. } while(end != NULL);
  1312. }
  1313. available = ures_openAvailableLocales(NULL, &status);
  1314. INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
  1315. len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
  1316. &outResult, http_accept, available, &status);
  1317. uenum_close(available);
  1318. INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
  1319. if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
  1320. RETURN_FALSE;
  1321. }
  1322. RETURN_STRINGL(resultLocale, len);
  1323. }
  1324. /* }}} */