parse_posix.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. /*
  2. * The MIT License (MIT)
  3. *
  4. * Copyright (c) 2021 MongoDB, Inc.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. #include "timelib.h"
  25. #include "timelib_private.h"
  26. // This section adds the missing 'strndup' implementation on Windows.
  27. #if TIMELIB_USE_BUILTIN_STRNDUP == 1
  28. # include <stdlib.h>
  29. # include <string.h>
  30. /**
  31. * char* timelib_strndup(const char* s, size_t n)
  32. *
  33. * Returns a pointer to a copy of 's' with at most 'n' characters
  34. * in memory obtained from 'malloc', or 'NULL' if insufficient
  35. * memory was available. The result is always 'NULL' terminated.
  36. */
  37. static char* timelib_strndup(const char* s, size_t n)
  38. {
  39. char* result;
  40. size_t len = strlen(s);
  41. if (n < len) {
  42. len = n;
  43. }
  44. result = (char*)malloc(len + 1);
  45. if (!result) {
  46. return 0;
  47. }
  48. result[len] = '\0';
  49. return (char*)memcpy(result, s, len);
  50. }
  51. #endif
  52. /* Forwards declrations */
  53. static timelib_posix_trans_info *timelib_posix_trans_info_ctor(void);
  54. static void timelib_posix_trans_info_dtor(timelib_posix_trans_info* ts);
  55. /* "<" [+-]? .+? ">" */
  56. static char *read_description_numeric_abbr(char **ptr)
  57. {
  58. const char *begin = *ptr + 1;
  59. // skip '<'
  60. (*ptr)++;
  61. while (**ptr != '\0' && **ptr != '>') {
  62. (*ptr)++;
  63. }
  64. if (**ptr == '\0') {
  65. return NULL;
  66. }
  67. if (**ptr == '>') {
  68. (*ptr)++;
  69. }
  70. // Abbreviation may not be empty
  71. if (*ptr - begin - 1 < 1) {
  72. return NULL;
  73. }
  74. return timelib_strndup(begin, *ptr - begin - 1);
  75. }
  76. /* [A-Z]+ */
  77. static char *read_description_abbr(char **ptr)
  78. {
  79. const char *begin = *ptr;
  80. // Find the end
  81. while ((**ptr >= 'A' && **ptr <= 'Z') || (**ptr >= 'a' && **ptr <= 'z')) {
  82. (*ptr)++;
  83. }
  84. // Abbreviation may not be empty
  85. if (*ptr - begin < 1) {
  86. return NULL;
  87. }
  88. return timelib_strndup(begin, *ptr - begin);
  89. }
  90. /* "<" [+-]? .+? ">" | [A-Z]+ */
  91. static char *read_description(char **ptr)
  92. {
  93. if (**ptr == '<') {
  94. return read_description_numeric_abbr(ptr);
  95. } else {
  96. return read_description_abbr(ptr);
  97. }
  98. }
  99. /* [+-]? */
  100. static int read_sign(char **ptr)
  101. {
  102. int bias = 1;
  103. if (**ptr == '+') {
  104. (*ptr)++;
  105. } else if (**ptr == '-') {
  106. bias = -1;
  107. (*ptr)++;
  108. }
  109. return bias;
  110. }
  111. /* [0-9]+ */
  112. static timelib_sll read_number(char **ptr)
  113. {
  114. const char *begin = *ptr;
  115. int acc = 0;
  116. // skip leading 0's
  117. while (**ptr == '0') {
  118. (*ptr)++;
  119. }
  120. while (**ptr >= '0' && **ptr <= '9') {
  121. acc = acc * 10;
  122. acc += (**ptr) - '0';
  123. (*ptr)++;
  124. }
  125. if (begin == *ptr) {
  126. return TIMELIB_UNSET;
  127. }
  128. return acc;
  129. }
  130. /* [+-]? [0-9]+ ( ":" [0-9]+ ( ":" [0-9]+ )? )? */
  131. static timelib_sll read_offset(char **ptr)
  132. {
  133. const char *begin;
  134. int bias = read_sign(ptr);
  135. int hours = 0;
  136. int minutes = 0;
  137. int seconds = 0;
  138. begin = *ptr;
  139. // read through to : or non-digit for hours
  140. hours = read_number(ptr);
  141. if (hours == TIMELIB_UNSET) {
  142. return hours;
  143. }
  144. // check for optional minutes
  145. if (**ptr == ':') {
  146. (*ptr)++; // skip ':'
  147. minutes = read_number(ptr);
  148. if (minutes == TIMELIB_UNSET) {
  149. return minutes;
  150. }
  151. }
  152. // check for optional seconds
  153. if (**ptr == ':') {
  154. (*ptr)++; // skip ':'
  155. seconds = read_number(ptr);
  156. if (seconds == TIMELIB_UNSET) {
  157. return seconds;
  158. }
  159. }
  160. if (begin == *ptr) {
  161. return TIMELIB_UNSET;
  162. }
  163. // multiplication with -1, because the offset in the identifier is the
  164. // 'wrong' way around as for example EST5 is UTC-5 (and not +5)
  165. return -1 * bias * (hours * 3600 + minutes * 60 + seconds);
  166. }
  167. // Mw.m.d
  168. static timelib_posix_trans_info* read_trans_spec_mwd(char **ptr)
  169. {
  170. timelib_posix_trans_info *tmp = timelib_posix_trans_info_ctor();
  171. tmp->type = TIMELIB_POSIX_TRANS_TYPE_MWD;
  172. // Skip 'M'
  173. (*ptr)++;
  174. tmp->mwd.month = read_number(ptr);
  175. if (tmp->mwd.month == TIMELIB_UNSET) {
  176. goto fail;
  177. }
  178. // check for '.' and skip it
  179. if (**ptr != '.') {
  180. goto fail;
  181. }
  182. (*ptr)++;
  183. tmp->mwd.week = read_number(ptr);
  184. if (tmp->mwd.week == TIMELIB_UNSET) {
  185. goto fail;
  186. }
  187. // check for '.' and skip it
  188. if (**ptr != '.') {
  189. goto fail;
  190. }
  191. (*ptr)++;
  192. tmp->mwd.dow = read_number(ptr);
  193. if (tmp->mwd.dow == TIMELIB_UNSET) {
  194. goto fail;
  195. }
  196. return tmp;
  197. fail:
  198. timelib_posix_trans_info_dtor(tmp);
  199. return NULL;
  200. }
  201. // (Jn | n | Mw.m.d) ( /time )?
  202. static timelib_posix_trans_info* read_transition_spec(char **ptr)
  203. {
  204. timelib_posix_trans_info *tmp;
  205. if (**ptr == 'M') {
  206. tmp = read_trans_spec_mwd(ptr);
  207. if (!tmp) {
  208. return NULL;
  209. }
  210. } else {
  211. tmp = timelib_posix_trans_info_ctor();
  212. if (**ptr == 'J') {
  213. tmp->type = TIMELIB_POSIX_TRANS_TYPE_JULIAN_NO_FEB29;
  214. (*ptr)++;
  215. }
  216. tmp->days = read_number(ptr);
  217. if (tmp->days == TIMELIB_UNSET) {
  218. goto fail;
  219. }
  220. }
  221. // Check for the optional hour
  222. if (**ptr == '/') {
  223. (*ptr)++;
  224. tmp->hour = read_offset(ptr);
  225. if (tmp->hour == TIMELIB_UNSET) {
  226. goto fail;
  227. }
  228. // as the bias for normal offsets = -1, we need to reverse it here
  229. tmp->hour = -tmp->hour;
  230. }
  231. return tmp;
  232. fail:
  233. timelib_posix_trans_info_dtor(tmp);
  234. return NULL;
  235. }
  236. static timelib_posix_trans_info* timelib_posix_trans_info_ctor(void)
  237. {
  238. timelib_posix_trans_info *tmp;
  239. tmp = timelib_calloc(1, sizeof(timelib_posix_trans_info));
  240. tmp->type = TIMELIB_POSIX_TRANS_TYPE_JULIAN_FEB29;
  241. tmp->hour = 2 * 3600;
  242. return tmp;
  243. }
  244. static void timelib_posix_trans_info_dtor(timelib_posix_trans_info* ts)
  245. {
  246. timelib_free(ts);
  247. }
  248. void timelib_posix_str_dtor(timelib_posix_str *ps)
  249. {
  250. if (ps->std) {
  251. timelib_free(ps->std);
  252. }
  253. if (ps->dst) {
  254. timelib_free(ps->dst);
  255. }
  256. if (ps->dst_begin) {
  257. timelib_posix_trans_info_dtor(ps->dst_begin);
  258. }
  259. if (ps->dst_end) {
  260. timelib_posix_trans_info_dtor(ps->dst_end);
  261. }
  262. timelib_free(ps);
  263. }
  264. timelib_posix_str* timelib_parse_posix_str(const char *posix)
  265. {
  266. timelib_posix_str *tmp = timelib_calloc(1, sizeof(timelib_posix_str));
  267. char *ptr = (char*) posix;
  268. // read standard description (ie. EST or <-03>)
  269. tmp->std = read_description(&ptr);
  270. if (!tmp->std) {
  271. timelib_posix_str_dtor(tmp);
  272. return NULL;
  273. }
  274. // read required offset
  275. tmp->std_offset = read_offset(&ptr);
  276. if (tmp->std_offset == TIMELIB_UNSET) {
  277. timelib_posix_str_dtor(tmp);
  278. return NULL;
  279. }
  280. // if we're at the end return, otherwise we'll continue to try to parse
  281. // the dst abbreviation and spec
  282. if (*ptr == '\0') {
  283. return tmp;
  284. }
  285. // assume dst is there, and initialise offset
  286. tmp->dst_offset = tmp->std_offset + 3600;
  287. tmp->dst = read_description(&ptr);
  288. if (!tmp->dst) {
  289. timelib_posix_str_dtor(tmp);
  290. return NULL;
  291. }
  292. // if we have a "," here, then the dst offset is the standard offset +
  293. // 3600 seconds, otherwise, try to parse the dst offset
  294. if (*ptr != ',' && *ptr != '\0') {
  295. tmp->dst_offset = read_offset(&ptr);
  296. if (tmp->dst_offset == TIMELIB_UNSET) {
  297. timelib_posix_str_dtor(tmp);
  298. return NULL;
  299. }
  300. }
  301. // if we *don't* have a "," here, we're missing the dst transitions
  302. // ,start[/time],end[/time]
  303. if (*ptr != ',') {
  304. timelib_posix_str_dtor(tmp);
  305. return NULL;
  306. }
  307. ptr++; // skip ','
  308. // start[/time]
  309. tmp->dst_begin = read_transition_spec(&ptr);
  310. if (!tmp->dst_begin) {
  311. timelib_posix_str_dtor(tmp);
  312. return NULL;
  313. }
  314. // if we *don't* have a "," here, we're missing the dst end transition
  315. // ,end[/time]
  316. if (*ptr != ',') {
  317. timelib_posix_str_dtor(tmp);
  318. return NULL;
  319. }
  320. ptr++; // skip ','
  321. // end[/time]
  322. tmp->dst_end = read_transition_spec(&ptr);
  323. if (!tmp->dst_end) {
  324. timelib_posix_str_dtor(tmp);
  325. return NULL;
  326. }
  327. // make sure there is no trailing data
  328. if (*ptr != '\0') {
  329. timelib_posix_str_dtor(tmp);
  330. return NULL;
  331. }
  332. return tmp;
  333. }
  334. static const int month_lengths[2][MONTHS_PER_YEAR] = {
  335. { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, // normal year
  336. { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 } // leap year
  337. };
  338. /* This function is adapted from the 'localtime.c' function 'transtime' as bundled with the 'tzcode' project
  339. * from IANA, and is public domain licensed. */
  340. static timelib_sll calc_transition(timelib_posix_trans_info *psi, timelib_sll year)
  341. {
  342. int leap_year = timelib_is_leap(year);
  343. switch (psi->type) {
  344. case TIMELIB_POSIX_TRANS_TYPE_JULIAN_NO_FEB29: {
  345. timelib_sll value = (psi->days - 1);
  346. if (leap_year && psi->days >= 60) {
  347. value++;
  348. }
  349. return value * SECS_PER_DAY;
  350. }
  351. case TIMELIB_POSIX_TRANS_TYPE_JULIAN_FEB29: {
  352. return psi->days * SECS_PER_DAY;
  353. }
  354. case TIMELIB_POSIX_TRANS_TYPE_MWD: {
  355. /*
  356. * Mm.n.d - nth "dth day" of month m.
  357. */
  358. int i, d, m1, yy0, yy1, yy2, dow;
  359. timelib_sll value = 0;
  360. /* Use Zeller's Congruence to get day-of-week of first day of
  361. * month. */
  362. m1 = (psi->mwd.month + 9) % 12 + 1;
  363. yy0 = (psi->mwd.month <= 2) ? (year - 1) : year;
  364. yy1 = yy0 / 100;
  365. yy2 = yy0 % 100;
  366. dow = ((26 * m1 - 2) / 10 + 1 + yy2 + yy2 / 4 + yy1 / 4 - 2 * yy1) % 7;
  367. if (dow < 0) {
  368. dow += DAYS_PER_WEEK;
  369. }
  370. /* "dow" is the day-of-week of the first day of the month. Get the
  371. * day-of-month (zero-origin) of the first "dow" day of the month. */
  372. d = psi->mwd.dow - dow;
  373. if (d < 0) {
  374. d += DAYS_PER_WEEK;
  375. }
  376. for (i = 1; i < psi->mwd.week; ++i) {
  377. if (d + DAYS_PER_WEEK >= month_lengths[leap_year][psi->mwd.month - 1]) {
  378. break;
  379. }
  380. d += DAYS_PER_WEEK;
  381. }
  382. /* "d" is the day-of-month (zero-origin) of the day we want. */
  383. value = d * SECS_PER_DAY;
  384. for (i = 0; i < psi->mwd.month - 1; ++i) {
  385. value += month_lengths[leap_year][i] * SECS_PER_DAY;
  386. }
  387. return value;
  388. } break;
  389. }
  390. return 0;
  391. }
  392. static timelib_sll count_leap_years(timelib_sll y)
  393. {
  394. /* Because we want this for Jan 1, the leap day hasn't happend yet, so
  395. * subtract one of year before we calculate */
  396. y--;
  397. return (y/4) - (y/100) + (y/400);
  398. }
  399. timelib_sll timelib_ts_at_start_of_year(timelib_sll year)
  400. {
  401. timelib_sll epoch_leap_years = count_leap_years(1970);
  402. timelib_sll current_leap_years = count_leap_years(year);
  403. return SECS_PER_DAY * (
  404. ((year-1970) * DAYS_PER_YEAR)
  405. + current_leap_years
  406. - epoch_leap_years
  407. );
  408. }
  409. void timelib_get_transitions_for_year(timelib_tzinfo *tz, timelib_sll year, timelib_posix_transitions *transitions)
  410. {
  411. timelib_sll trans_begin; /* Since start of the year */
  412. timelib_sll trans_end;
  413. timelib_sll year_begin_ts = timelib_ts_at_start_of_year(year);
  414. trans_begin = year_begin_ts;
  415. trans_begin += calc_transition(tz->posix_info->dst_begin, year);
  416. trans_begin += tz->posix_info->dst_begin->hour;
  417. trans_begin -= tz->posix_info->std_offset;
  418. trans_end = year_begin_ts;
  419. trans_end += calc_transition(tz->posix_info->dst_end, year);
  420. trans_end += tz->posix_info->dst_end->hour;
  421. trans_end -= tz->posix_info->dst_offset;
  422. if (trans_begin < trans_end) {
  423. transitions->times[transitions->count ] = trans_begin;
  424. transitions->times[transitions->count+1] = trans_end;
  425. transitions->types[transitions->count ] = tz->posix_info->type_index_dst_type;
  426. transitions->types[transitions->count+1] = tz->posix_info->type_index_std_type;
  427. } else {
  428. transitions->times[transitions->count+1] = trans_begin;
  429. transitions->times[transitions->count ] = trans_end;
  430. transitions->types[transitions->count+1] = tz->posix_info->type_index_dst_type;
  431. transitions->types[transitions->count ] = tz->posix_info->type_index_std_type;
  432. }
  433. transitions->count += 2;
  434. }
  435. ttinfo* timelib_fetch_posix_timezone_offset(timelib_tzinfo *tz, timelib_sll ts, timelib_sll *transition_time)
  436. {
  437. timelib_sll year;
  438. timelib_time dummy;
  439. timelib_posix_transitions transitions = { 0 };
  440. size_t i;
  441. /* If there is no second (dst_end) information, the UTC offset is valid for the whole year, so no need to
  442. * do clever logic */
  443. if (!tz->posix_info->dst_end) {
  444. if (transition_time) {
  445. *transition_time = tz->trans[tz->bit64.timecnt - 1];
  446. }
  447. return &(tz->type[tz->posix_info->type_index_std_type]);
  448. }
  449. /* Find 'year' (UTC) for 'ts' */
  450. timelib_unixtime2gmt(&dummy, ts);
  451. year = dummy.y;
  452. /* Calculate transition times for 'year-1', 'year', and 'year+1' */
  453. timelib_get_transitions_for_year(tz, year - 1, &transitions);
  454. timelib_get_transitions_for_year(tz, year, &transitions);
  455. timelib_get_transitions_for_year(tz, year + 1, &transitions);
  456. /* Check where the 'ts' falls in the 4 transitions */
  457. for (i = 1; i < transitions.count; i++) {
  458. if (ts < transitions.times[i]) {
  459. if (transition_time) {
  460. *transition_time = transitions.times[i - 1];
  461. }
  462. return &(tz->type[transitions.types[i - 1]]);
  463. }
  464. }
  465. return NULL;
  466. }