parse_iso_intervals.re 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. /*
  2. * The MIT License (MIT)
  3. *
  4. * Copyright (c) 2015 Derick Rethans
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. /* $Id$ */
  25. #include "timelib.h"
  26. #include <stdio.h>
  27. #include <ctype.h>
  28. #ifdef HAVE_STDLIB_H
  29. #include <stdlib.h>
  30. #endif
  31. #ifdef HAVE_STRING_H
  32. #include <string.h>
  33. #else
  34. #include <strings.h>
  35. #endif
  36. #if defined(_MSC_VER)
  37. # define strtoll(s, f, b) _atoi64(s)
  38. #elif !defined(HAVE_STRTOLL)
  39. # if defined(HAVE_ATOLL)
  40. # define strtoll(s, f, b) atoll(s)
  41. # else
  42. # define strtoll(s, f, b) strtol(s, f, b)
  43. # endif
  44. #endif
  45. #define TIMELIB_UNSET -99999
  46. #define TIMELIB_SECOND 1
  47. #define TIMELIB_MINUTE 2
  48. #define TIMELIB_HOUR 3
  49. #define TIMELIB_DAY 4
  50. #define TIMELIB_MONTH 5
  51. #define TIMELIB_YEAR 6
  52. #define EOI 257
  53. #define TIMELIB_PERIOD 260
  54. #define TIMELIB_ISO_DATE 261
  55. #define TIMELIB_ERROR 999
  56. typedef unsigned char uchar;
  57. #define BSIZE 8192
  58. #define YYCTYPE uchar
  59. #define YYCURSOR cursor
  60. #define YYLIMIT s->lim
  61. #define YYMARKER s->ptr
  62. #define YYFILL(n) return EOI;
  63. #define RET(i) {s->cur = cursor; return i;}
  64. #define timelib_string_free timelib_free
  65. #define TIMELIB_INIT s->cur = cursor; str = timelib_string(s); ptr = str
  66. #define TIMELIB_DEINIT timelib_string_free(str)
  67. #ifdef DEBUG_PARSER
  68. #define DEBUG_OUTPUT(s) printf("%s\n", s);
  69. #define YYDEBUG(s,c) { if (s != -1) { printf("state: %d ", s); printf("[%c]\n", c); } }
  70. #else
  71. #define DEBUG_OUTPUT(s)
  72. #define YYDEBUG(s,c)
  73. #endif
  74. #include "timelib_structs.h"
  75. typedef struct Scanner {
  76. int fd;
  77. uchar *lim, *str, *ptr, *cur, *tok, *pos;
  78. unsigned int line, len;
  79. struct timelib_error_container *errors;
  80. struct timelib_time *begin;
  81. struct timelib_time *end;
  82. struct timelib_rel_time *period;
  83. int recurrences;
  84. int have_period;
  85. int have_recurrences;
  86. int have_date;
  87. int have_begin_date;
  88. int have_end_date;
  89. } Scanner;
  90. static void add_warning(Scanner *s, char *error)
  91. {
  92. s->errors->warning_count++;
  93. s->errors->warning_messages = timelib_realloc(s->errors->warning_messages, s->errors->warning_count * sizeof(timelib_error_message));
  94. s->errors->warning_messages[s->errors->warning_count - 1].position = s->tok ? s->tok - s->str : 0;
  95. s->errors->warning_messages[s->errors->warning_count - 1].character = s->tok ? *s->tok : 0;
  96. s->errors->warning_messages[s->errors->warning_count - 1].message = timelib_strdup(error);
  97. }
  98. static void add_error(Scanner *s, char *error)
  99. {
  100. s->errors->error_count++;
  101. s->errors->error_messages = timelib_realloc(s->errors->error_messages, s->errors->error_count * sizeof(timelib_error_message));
  102. s->errors->error_messages[s->errors->error_count - 1].position = s->tok ? s->tok - s->str : 0;
  103. s->errors->error_messages[s->errors->error_count - 1].character = s->tok ? *s->tok : 0;
  104. s->errors->error_messages[s->errors->error_count - 1].message = timelib_strdup(error);
  105. }
  106. static char *timelib_string(Scanner *s)
  107. {
  108. char *tmp = timelib_calloc(1, s->cur - s->tok + 1);
  109. memcpy(tmp, s->tok, s->cur - s->tok);
  110. return tmp;
  111. }
  112. static timelib_sll timelib_get_nr(char **ptr, int max_length)
  113. {
  114. char *begin, *end, *str;
  115. timelib_sll tmp_nr = TIMELIB_UNSET;
  116. int len = 0;
  117. while ((**ptr < '0') || (**ptr > '9')) {
  118. if (**ptr == '\0') {
  119. return TIMELIB_UNSET;
  120. }
  121. ++*ptr;
  122. }
  123. begin = *ptr;
  124. while ((**ptr >= '0') && (**ptr <= '9') && len < max_length) {
  125. ++*ptr;
  126. ++len;
  127. }
  128. end = *ptr;
  129. str = timelib_calloc(1, end - begin + 1);
  130. memcpy(str, begin, end - begin);
  131. tmp_nr = strtoll(str, NULL, 10);
  132. timelib_free(str);
  133. return tmp_nr;
  134. }
  135. static timelib_ull timelib_get_unsigned_nr(char **ptr, int max_length)
  136. {
  137. timelib_ull dir = 1;
  138. while (((**ptr < '0') || (**ptr > '9')) && (**ptr != '+') && (**ptr != '-')) {
  139. if (**ptr == '\0') {
  140. return TIMELIB_UNSET;
  141. }
  142. ++*ptr;
  143. }
  144. while (**ptr == '+' || **ptr == '-')
  145. {
  146. if (**ptr == '-') {
  147. dir *= -1;
  148. }
  149. ++*ptr;
  150. }
  151. return dir * timelib_get_nr(ptr, max_length);
  152. }
  153. static void timelib_eat_spaces(char **ptr)
  154. {
  155. while (**ptr == ' ' || **ptr == '\t') {
  156. ++*ptr;
  157. }
  158. }
  159. static void timelib_eat_until_separator(char **ptr)
  160. {
  161. while (strchr(" \t.,:;/-0123456789", **ptr) == NULL) {
  162. ++*ptr;
  163. }
  164. }
  165. static timelib_long timelib_get_zone(char **ptr, int *dst, timelib_time *t, int *tz_not_found, const timelib_tzdb *tzdb)
  166. {
  167. timelib_long retval = 0;
  168. *tz_not_found = 0;
  169. while (**ptr == ' ' || **ptr == '\t' || **ptr == '(') {
  170. ++*ptr;
  171. }
  172. if ((*ptr)[0] == 'G' && (*ptr)[1] == 'M' && (*ptr)[2] == 'T' && ((*ptr)[3] == '+' || (*ptr)[3] == '-')) {
  173. *ptr += 3;
  174. }
  175. if (**ptr == '+') {
  176. ++*ptr;
  177. t->is_localtime = 1;
  178. t->zone_type = TIMELIB_ZONETYPE_OFFSET;
  179. *tz_not_found = 0;
  180. t->dst = 0;
  181. retval = -1 * timelib_parse_tz_cor(ptr);
  182. } else if (**ptr == '-') {
  183. ++*ptr;
  184. t->is_localtime = 1;
  185. t->zone_type = TIMELIB_ZONETYPE_OFFSET;
  186. *tz_not_found = 0;
  187. t->dst = 0;
  188. retval = timelib_parse_tz_cor(ptr);
  189. }
  190. while (**ptr == ')') {
  191. ++*ptr;
  192. }
  193. return retval;
  194. }
  195. #define timelib_split_free(arg) { \
  196. int i; \
  197. for (i = 0; i < arg.c; i++) { \
  198. timelib_free(arg.v[i]); \
  199. } \
  200. if (arg.v) { \
  201. timelib_free(arg.v); \
  202. } \
  203. }
  204. /* date parser's scan function too large for VC6 - VC7.x
  205. drop the optimization solves the problem */
  206. #ifdef PHP_WIN32
  207. #pragma optimize( "", off )
  208. #endif
  209. static int scan(Scanner *s)
  210. {
  211. uchar *cursor = s->cur;
  212. char *str, *ptr = NULL;
  213. std:
  214. s->tok = cursor;
  215. s->len = 0;
  216. /*!re2c
  217. /* */
  218. any = [\000-\377];
  219. number = [0-9]+;
  220. hour24lz = [01][0-9] | "2"[0-4];
  221. minutelz = [0-5][0-9];
  222. monthlz = "0" [1-9] | "1" [0-2];
  223. monthlzz = "0" [0-9] | "1" [0-2];
  224. daylz = "0" [1-9] | [1-2][0-9] | "3" [01];
  225. daylzz = "0" [0-9] | [1-2][0-9] | "3" [01];
  226. secondlz = minutelz;
  227. year4 = [0-9]{4};
  228. weekofyear = "0"[1-9] | [1-4][0-9] | "5"[0-3];
  229. space = [ \t]+;
  230. datetimebasic = year4 monthlz daylz "T" hour24lz minutelz secondlz "Z";
  231. datetimeextended = year4 "-" monthlz "-" daylz "T" hour24lz ':' minutelz ':' secondlz "Z";
  232. period = "P" (number "Y")? (number "M")? (number "W")? (number "D")? ("T" (number "H")? (number "M")? (number "S")?)?;
  233. combinedrep = "P" year4 "-" monthlzz "-" daylzz "T" hour24lz ':' minutelz ':' secondlz;
  234. recurrences = "R" number;
  235. isoweekday = year4 "-"? "W" weekofyear "-"? [0-7];
  236. isoweek = year4 "-"? "W" weekofyear;
  237. */
  238. /*!re2c
  239. /* so that vim highlights correctly */
  240. recurrences
  241. {
  242. DEBUG_OUTPUT("recurrences");
  243. TIMELIB_INIT;
  244. ptr++;
  245. s->recurrences = timelib_get_unsigned_nr((char **) &ptr, 9);
  246. TIMELIB_DEINIT;
  247. s->have_recurrences = 1;
  248. return TIMELIB_PERIOD;
  249. }
  250. datetimebasic| datetimeextended
  251. {
  252. timelib_time *current;
  253. if (s->have_date || s->have_period) {
  254. current = s->end;
  255. s->have_end_date = 1;
  256. } else {
  257. current = s->begin;
  258. s->have_begin_date = 1;
  259. }
  260. DEBUG_OUTPUT("datetimebasic | datetimeextended");
  261. TIMELIB_INIT;
  262. current->y = timelib_get_nr((char **) &ptr, 4);
  263. current->m = timelib_get_nr((char **) &ptr, 2);
  264. current->d = timelib_get_nr((char **) &ptr, 2);
  265. current->h = timelib_get_nr((char **) &ptr, 2);
  266. current->i = timelib_get_nr((char **) &ptr, 2);
  267. current->s = timelib_get_nr((char **) &ptr, 2);
  268. s->have_date = 1;
  269. TIMELIB_DEINIT;
  270. return TIMELIB_ISO_DATE;
  271. }
  272. period
  273. {
  274. timelib_sll nr;
  275. int in_time = 0;
  276. DEBUG_OUTPUT("period");
  277. TIMELIB_INIT;
  278. ptr++;
  279. do {
  280. if ( *ptr == 'T' ) {
  281. in_time = 1;
  282. ptr++;
  283. }
  284. if ( *ptr == '\0' ) {
  285. add_error(s, "Missing expected time part");
  286. break;
  287. }
  288. nr = timelib_get_unsigned_nr((char **) &ptr, 12);
  289. switch (*ptr) {
  290. case 'Y': s->period->y = nr; break;
  291. case 'W': s->period->d = nr * 7; break;
  292. case 'D': s->period->d = nr; break;
  293. case 'H': s->period->h = nr; break;
  294. case 'S': s->period->s = nr; break;
  295. case 'M':
  296. if (in_time) {
  297. s->period->i = nr;
  298. } else {
  299. s->period->m = nr;
  300. }
  301. break;
  302. default:
  303. add_error(s, "Undefined period specifier");
  304. break;
  305. }
  306. ptr++;
  307. } while (!s->errors->error_count && *ptr);
  308. s->have_period = 1;
  309. TIMELIB_DEINIT;
  310. return TIMELIB_PERIOD;
  311. }
  312. combinedrep
  313. {
  314. DEBUG_OUTPUT("combinedrep");
  315. TIMELIB_INIT;
  316. s->period->y = timelib_get_unsigned_nr((char **) &ptr, 4);
  317. ptr++;
  318. s->period->m = timelib_get_unsigned_nr((char **) &ptr, 2);
  319. ptr++;
  320. s->period->d = timelib_get_unsigned_nr((char **) &ptr, 2);
  321. ptr++;
  322. s->period->h = timelib_get_unsigned_nr((char **) &ptr, 2);
  323. ptr++;
  324. s->period->i = timelib_get_unsigned_nr((char **) &ptr, 2);
  325. ptr++;
  326. s->period->s = timelib_get_unsigned_nr((char **) &ptr, 2);
  327. s->have_period = 1;
  328. TIMELIB_DEINIT;
  329. return TIMELIB_PERIOD;
  330. }
  331. [ .,\t/]
  332. {
  333. goto std;
  334. }
  335. "\000"|"\n"
  336. {
  337. s->pos = cursor; s->line++;
  338. goto std;
  339. }
  340. any
  341. {
  342. add_error(s, "Unexpected character");
  343. goto std;
  344. }
  345. */
  346. }
  347. #ifdef PHP_WIN32
  348. #pragma optimize( "", on )
  349. #endif
  350. /*!max:re2c */
  351. void timelib_strtointerval(char *s, size_t len,
  352. timelib_time **begin, timelib_time **end,
  353. timelib_rel_time **period, int *recurrences,
  354. struct timelib_error_container **errors)
  355. {
  356. Scanner in;
  357. int t;
  358. char *e = s + len - 1;
  359. memset(&in, 0, sizeof(in));
  360. in.errors = timelib_malloc(sizeof(struct timelib_error_container));
  361. in.errors->warning_count = 0;
  362. in.errors->warning_messages = NULL;
  363. in.errors->error_count = 0;
  364. in.errors->error_messages = NULL;
  365. if (len > 0) {
  366. while (isspace(*s) && s < e) {
  367. s++;
  368. }
  369. while (isspace(*e) && e > s) {
  370. e--;
  371. }
  372. }
  373. if (e - s < 0) {
  374. add_error(&in, "Empty string");
  375. if (errors) {
  376. *errors = in.errors;
  377. } else {
  378. timelib_error_container_dtor(in.errors);
  379. }
  380. return;
  381. }
  382. e++;
  383. /* init cursor */
  384. in.str = timelib_malloc((e - s) + YYMAXFILL);
  385. memset(in.str, 0, (e - s) + YYMAXFILL);
  386. memcpy(in.str, s, (e - s));
  387. in.lim = in.str + (e - s) + YYMAXFILL;
  388. in.cur = in.str;
  389. /* init value containers */
  390. in.begin = timelib_time_ctor();
  391. in.begin->y = TIMELIB_UNSET;
  392. in.begin->d = TIMELIB_UNSET;
  393. in.begin->m = TIMELIB_UNSET;
  394. in.begin->h = TIMELIB_UNSET;
  395. in.begin->i = TIMELIB_UNSET;
  396. in.begin->s = TIMELIB_UNSET;
  397. in.begin->f = 0;
  398. in.begin->z = 0;
  399. in.begin->dst = 0;
  400. in.begin->is_localtime = 0;
  401. in.begin->zone_type = TIMELIB_ZONETYPE_OFFSET;
  402. in.end = timelib_time_ctor();
  403. in.end->y = TIMELIB_UNSET;
  404. in.end->d = TIMELIB_UNSET;
  405. in.end->m = TIMELIB_UNSET;
  406. in.end->h = TIMELIB_UNSET;
  407. in.end->i = TIMELIB_UNSET;
  408. in.end->s = TIMELIB_UNSET;
  409. in.end->f = 0;
  410. in.end->z = 0;
  411. in.end->dst = 0;
  412. in.end->is_localtime = 0;
  413. in.end->zone_type = TIMELIB_ZONETYPE_OFFSET;
  414. in.period = timelib_rel_time_ctor();
  415. in.period->y = 0;
  416. in.period->d = 0;
  417. in.period->m = 0;
  418. in.period->h = 0;
  419. in.period->i = 0;
  420. in.period->s = 0;
  421. in.period->weekday = 0;
  422. in.period->weekday_behavior = 0;
  423. in.period->first_last_day_of = 0;
  424. in.period->days = TIMELIB_UNSET;
  425. in.recurrences = 1;
  426. do {
  427. t = scan(&in);
  428. #ifdef DEBUG_PARSER
  429. printf("%d\n", t);
  430. #endif
  431. } while(t != EOI);
  432. timelib_free(in.str);
  433. if (errors) {
  434. *errors = in.errors;
  435. } else {
  436. timelib_error_container_dtor(in.errors);
  437. }
  438. if (in.have_begin_date) {
  439. *begin = in.begin;
  440. } else {
  441. timelib_time_dtor(in.begin);
  442. }
  443. if (in.have_end_date) {
  444. *end = in.end;
  445. } else {
  446. timelib_time_dtor(in.end);
  447. }
  448. if (in.have_period) {
  449. *period = in.period;
  450. } else {
  451. timelib_rel_time_dtor(in.period);
  452. }
  453. if (in.have_recurrences) {
  454. *recurrences = in.recurrences;
  455. }
  456. }
  457. /*
  458. * vim: syntax=c
  459. */