main.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <sys/types.h>
  4. #include <regex.h>
  5. #include <assert.h>
  6. #include <stdlib.h>
  7. #include "main.ih"
  8. char *progname;
  9. int debug = 0;
  10. int line = 0;
  11. int status = 0;
  12. int copts = REG_EXTENDED;
  13. int eopts = 0;
  14. regoff_t startoff = 0;
  15. regoff_t endoff = 0;
  16. extern int split();
  17. extern void regprint();
  18. /*
  19. - main - do the simple case, hand off to regress() for regression
  20. */
  21. int main(argc, argv)
  22. int argc;
  23. char *argv[];
  24. {
  25. regex_t re;
  26. # define NS 10
  27. regmatch_t subs[NS];
  28. char erbuf[100];
  29. int err;
  30. size_t len;
  31. int c;
  32. int errflg = 0;
  33. register int i;
  34. extern int optind;
  35. extern char *optarg;
  36. progname = argv[0];
  37. while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
  38. switch (c) {
  39. case 'c': /* compile options */
  40. copts = options('c', optarg);
  41. break;
  42. case 'e': /* execute options */
  43. eopts = options('e', optarg);
  44. break;
  45. case 'S': /* start offset */
  46. startoff = (regoff_t)atoi(optarg);
  47. break;
  48. case 'E': /* end offset */
  49. endoff = (regoff_t)atoi(optarg);
  50. break;
  51. case 'x': /* Debugging. */
  52. debug++;
  53. break;
  54. case '?':
  55. default:
  56. errflg++;
  57. break;
  58. }
  59. if (errflg) {
  60. fprintf(stderr, "usage: %s ", progname);
  61. fprintf(stderr, "[-c copt][-C][-d] [re]\n");
  62. exit(2);
  63. }
  64. if (optind >= argc) {
  65. regress(stdin);
  66. exit(status);
  67. }
  68. err = regcomp(&re, argv[optind++], copts);
  69. if (err) {
  70. len = regerror(err, &re, erbuf, sizeof(erbuf));
  71. fprintf(stderr, "error %s, %d/%d `%s'\n",
  72. eprint(err), len, sizeof(erbuf), erbuf);
  73. exit(status);
  74. }
  75. regprint(&re, stdout);
  76. if (optind >= argc) {
  77. regfree(&re);
  78. exit(status);
  79. }
  80. if (eopts&REG_STARTEND) {
  81. subs[0].rm_so = startoff;
  82. subs[0].rm_eo = strlen(argv[optind]) - endoff;
  83. }
  84. err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
  85. if (err) {
  86. len = regerror(err, &re, erbuf, sizeof(erbuf));
  87. fprintf(stderr, "error %s, %d/%d `%s'\n",
  88. eprint(err), len, sizeof(erbuf), erbuf);
  89. exit(status);
  90. }
  91. if (!(copts&REG_NOSUB)) {
  92. len = (int)(subs[0].rm_eo - subs[0].rm_so);
  93. if (subs[0].rm_so != -1) {
  94. if (len != 0)
  95. printf("match `%.*s'\n", (int)len,
  96. argv[optind] + subs[0].rm_so);
  97. else
  98. printf("match `'@%.1s\n",
  99. argv[optind] + subs[0].rm_so);
  100. }
  101. for (i = 1; i < NS; i++)
  102. if (subs[i].rm_so != -1)
  103. printf("(%d) `%.*s'\n", i,
  104. (int)(subs[i].rm_eo - subs[i].rm_so),
  105. argv[optind] + subs[i].rm_so);
  106. }
  107. exit(status);
  108. }
  109. /*
  110. - regress - main loop of regression test
  111. == void regress(FILE *in);
  112. */
  113. void
  114. regress(in)
  115. FILE *in;
  116. {
  117. char inbuf[1000];
  118. # define MAXF 10
  119. char *f[MAXF];
  120. int nf;
  121. int i;
  122. char erbuf[100];
  123. size_t ne;
  124. char *badpat = "invalid regular expression";
  125. # define SHORT 10
  126. char *bpname = "REG_BADPAT";
  127. regex_t re;
  128. while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
  129. line++;
  130. if (inbuf[0] == '#' || inbuf[0] == '\n')
  131. continue; /* NOTE CONTINUE */
  132. inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
  133. if (debug)
  134. fprintf(stdout, "%d:\n", line);
  135. nf = split(inbuf, f, MAXF, "\t\t");
  136. if (nf < 3) {
  137. fprintf(stderr, "bad input, line %d\n", line);
  138. exit(1);
  139. }
  140. for (i = 0; i < nf; i++)
  141. if (strcmp(f[i], "\"\"") == 0)
  142. f[i] = "";
  143. if (nf <= 3)
  144. f[3] = NULL;
  145. if (nf <= 4)
  146. f[4] = NULL;
  147. try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
  148. if (opt('&', f[1])) /* try with either type of RE */
  149. try(f[0], f[1], f[2], f[3], f[4],
  150. options('c', f[1]) &~ REG_EXTENDED);
  151. }
  152. ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
  153. if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
  154. fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
  155. erbuf, badpat);
  156. status = 1;
  157. }
  158. ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
  159. if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
  160. ne != strlen(badpat)+1) {
  161. fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
  162. erbuf, SHORT-1, badpat);
  163. status = 1;
  164. }
  165. ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
  166. if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
  167. fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
  168. erbuf, bpname);
  169. status = 1;
  170. }
  171. re.re_endp = bpname;
  172. ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
  173. if (atoi(erbuf) != (int)REG_BADPAT) {
  174. fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
  175. erbuf, (long)REG_BADPAT);
  176. status = 1;
  177. } else if (ne != strlen(erbuf)+1) {
  178. fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
  179. erbuf, (long)REG_BADPAT);
  180. status = 1;
  181. }
  182. }
  183. /*
  184. - try - try it, and report on problems
  185. == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
  186. */
  187. void
  188. try(f0, f1, f2, f3, f4, opts)
  189. char *f0;
  190. char *f1;
  191. char *f2;
  192. char *f3;
  193. char *f4;
  194. int opts; /* may not match f1 */
  195. {
  196. regex_t re;
  197. # define NSUBS 10
  198. regmatch_t subs[NSUBS];
  199. # define NSHOULD 15
  200. char *should[NSHOULD];
  201. int nshould;
  202. char erbuf[100];
  203. int err;
  204. int len;
  205. char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
  206. register int i;
  207. char *grump;
  208. char f0copy[1000];
  209. char f2copy[1000];
  210. strcpy(f0copy, f0);
  211. re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
  212. fixstr(f0copy);
  213. err = regcomp(&re, f0copy, opts);
  214. if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
  215. /* unexpected error or wrong error */
  216. len = regerror(err, &re, erbuf, sizeof(erbuf));
  217. fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
  218. line, type, eprint(err), len,
  219. sizeof(erbuf), erbuf);
  220. status = 1;
  221. } else if (err == 0 && opt('C', f1)) {
  222. /* unexpected success */
  223. fprintf(stderr, "%d: %s should have given REG_%s\n",
  224. line, type, f2);
  225. status = 1;
  226. err = 1; /* so we won't try regexec */
  227. }
  228. if (err != 0) {
  229. regfree(&re);
  230. return;
  231. }
  232. strcpy(f2copy, f2);
  233. fixstr(f2copy);
  234. if (options('e', f1)&REG_STARTEND) {
  235. if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
  236. fprintf(stderr, "%d: bad STARTEND syntax\n", line);
  237. subs[0].rm_so = strchr(f2, '(') - f2 + 1;
  238. subs[0].rm_eo = strchr(f2, ')') - f2;
  239. }
  240. err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
  241. if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
  242. /* unexpected error or wrong error */
  243. len = regerror(err, &re, erbuf, sizeof(erbuf));
  244. fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
  245. line, type, eprint(err), len,
  246. sizeof(erbuf), erbuf);
  247. status = 1;
  248. } else if (err != 0) {
  249. /* nothing more to check */
  250. } else if (f3 == NULL) {
  251. /* unexpected success */
  252. fprintf(stderr, "%d: %s exec should have failed\n",
  253. line, type);
  254. status = 1;
  255. err = 1; /* just on principle */
  256. } else if (opts&REG_NOSUB) {
  257. /* nothing more to check */
  258. } else if ((grump = check(f2, subs[0], f3)) != NULL) {
  259. fprintf(stderr, "%d: %s %s\n", line, type, grump);
  260. status = 1;
  261. err = 1;
  262. }
  263. if (err != 0 || f4 == NULL) {
  264. regfree(&re);
  265. return;
  266. }
  267. for (i = 1; i < NSHOULD; i++)
  268. should[i] = NULL;
  269. nshould = split(f4, should+1, NSHOULD-1, ",");
  270. if (nshould == 0) {
  271. nshould = 1;
  272. should[1] = "";
  273. }
  274. for (i = 1; i < NSUBS; i++) {
  275. grump = check(f2, subs[i], should[i]);
  276. if (grump != NULL) {
  277. fprintf(stderr, "%d: %s $%d %s\n", line,
  278. type, i, grump);
  279. status = 1;
  280. err = 1;
  281. }
  282. }
  283. regfree(&re);
  284. }
  285. /*
  286. - options - pick options out of a regression-test string
  287. == int options(int type, char *s);
  288. */
  289. int
  290. options(type, s)
  291. int type; /* 'c' compile, 'e' exec */
  292. char *s;
  293. {
  294. register char *p;
  295. register int o = (type == 'c') ? copts : eopts;
  296. register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
  297. for (p = s; *p != '\0'; p++)
  298. if (strchr(legal, *p) != NULL)
  299. switch (*p) {
  300. case 'b':
  301. o &= ~REG_EXTENDED;
  302. break;
  303. case 'i':
  304. o |= REG_ICASE;
  305. break;
  306. case 's':
  307. o |= REG_NOSUB;
  308. break;
  309. case 'n':
  310. o |= REG_NEWLINE;
  311. break;
  312. case 'm':
  313. o &= ~REG_EXTENDED;
  314. o |= REG_NOSPEC;
  315. break;
  316. case 'p':
  317. o |= REG_PEND;
  318. break;
  319. case '^':
  320. o |= REG_NOTBOL;
  321. break;
  322. case '$':
  323. o |= REG_NOTEOL;
  324. break;
  325. case '#':
  326. o |= REG_STARTEND;
  327. break;
  328. case 't': /* trace */
  329. o |= REG_TRACE;
  330. break;
  331. case 'l': /* force long representation */
  332. o |= REG_LARGE;
  333. break;
  334. case 'r': /* force backref use */
  335. o |= REG_BACKR;
  336. break;
  337. }
  338. return(o);
  339. }
  340. /*
  341. - opt - is a particular option in a regression string?
  342. == int opt(int c, char *s);
  343. */
  344. int /* predicate */
  345. opt(c, s)
  346. int c;
  347. char *s;
  348. {
  349. return(strchr(s, c) != NULL);
  350. }
  351. /*
  352. - fixstr - transform magic characters in strings
  353. == void fixstr(register char *p);
  354. */
  355. void
  356. fixstr(p)
  357. register char *p;
  358. {
  359. if (p == NULL)
  360. return;
  361. for (; *p != '\0'; p++)
  362. if (*p == 'N')
  363. *p = '\n';
  364. else if (*p == 'T')
  365. *p = '\t';
  366. else if (*p == 'S')
  367. *p = ' ';
  368. else if (*p == 'Z')
  369. *p = '\0';
  370. }
  371. /*
  372. - check - check a substring match
  373. == char *check(char *str, regmatch_t sub, char *should);
  374. */
  375. char * /* NULL or complaint */
  376. check(str, sub, should)
  377. char *str;
  378. regmatch_t sub;
  379. char *should;
  380. {
  381. register int len;
  382. register int shlen;
  383. register char *p;
  384. static char grump[500];
  385. register char *at = NULL;
  386. if (should != NULL && strcmp(should, "-") == 0)
  387. should = NULL;
  388. if (should != NULL && should[0] == '@') {
  389. at = should + 1;
  390. should = "";
  391. }
  392. /* check rm_so and rm_eo for consistency */
  393. if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
  394. (sub.rm_so != -1 && sub.rm_eo == -1) ||
  395. (sub.rm_so != -1 && sub.rm_so < 0) ||
  396. (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
  397. sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
  398. (long)sub.rm_eo);
  399. return(grump);
  400. }
  401. /* check for no match */
  402. if (sub.rm_so == -1 && should == NULL)
  403. return(NULL);
  404. if (sub.rm_so == -1)
  405. return("did not match");
  406. /* check for in range */
  407. if (sub.rm_eo > strlen(str)) {
  408. sprintf(grump, "start %ld end %ld, past end of string",
  409. (long)sub.rm_so, (long)sub.rm_eo);
  410. return(grump);
  411. }
  412. len = (int)(sub.rm_eo - sub.rm_so);
  413. shlen = (int)strlen(should);
  414. p = str + sub.rm_so;
  415. /* check for not supposed to match */
  416. if (should == NULL) {
  417. sprintf(grump, "matched `%.*s'", len, p);
  418. return(grump);
  419. }
  420. /* check for wrong match */
  421. if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
  422. sprintf(grump, "matched `%.*s' instead", len, p);
  423. return(grump);
  424. }
  425. if (shlen > 0)
  426. return(NULL);
  427. /* check null match in right place */
  428. if (at == NULL)
  429. return(NULL);
  430. shlen = strlen(at);
  431. if (shlen == 0)
  432. shlen = 1; /* force check for end-of-string */
  433. if (strncmp(p, at, shlen) != 0) {
  434. sprintf(grump, "matched null at `%.20s'", p);
  435. return(grump);
  436. }
  437. return(NULL);
  438. }
  439. /*
  440. - eprint - convert error number to name
  441. == static char *eprint(int err);
  442. */
  443. static char *
  444. eprint(err)
  445. int err;
  446. {
  447. static char epbuf[100];
  448. size_t len;
  449. len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
  450. assert(len <= sizeof(epbuf));
  451. return(epbuf);
  452. }
  453. /*
  454. - efind - convert error name to number
  455. == static int efind(char *name);
  456. */
  457. static int
  458. efind(name)
  459. char *name;
  460. {
  461. static char efbuf[100];
  462. regex_t re;
  463. sprintf(efbuf, "REG_%s", name);
  464. assert(strlen(efbuf) < sizeof(efbuf));
  465. re.re_endp = efbuf;
  466. (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
  467. return(atoi(efbuf));
  468. }