pcrecpp_unittest.cc 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316
  1. // -*- coding: utf-8 -*-
  2. //
  3. // Copyright (c) 2005 - 2010, Google Inc.
  4. // All rights reserved.
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. //
  32. // Author: Sanjay Ghemawat
  33. //
  34. // TODO: Test extractions for PartialMatch/Consume
  35. #ifdef HAVE_CONFIG_H
  36. #include "config.h"
  37. #endif
  38. #include <stdio.h>
  39. #include <string.h> /* for memset and strcmp */
  40. #include <cassert>
  41. #include <vector>
  42. #include "pcrecpp.h"
  43. using std::string;
  44. using pcrecpp::StringPiece;
  45. using pcrecpp::RE;
  46. using pcrecpp::RE_Options;
  47. using pcrecpp::Hex;
  48. using pcrecpp::Octal;
  49. using pcrecpp::CRadix;
  50. static bool VERBOSE_TEST = false;
  51. // CHECK dies with a fatal error if condition is not true. It is *not*
  52. // controlled by NDEBUG, so the check will be executed regardless of
  53. // compilation mode. Therefore, it is safe to do things like:
  54. // CHECK_EQ(fp->Write(x), 4)
  55. #define CHECK(condition) do { \
  56. if (!(condition)) { \
  57. fprintf(stderr, "%s:%d: Check failed: %s\n", \
  58. __FILE__, __LINE__, #condition); \
  59. exit(1); \
  60. } \
  61. } while (0)
  62. #define CHECK_EQ(a, b) CHECK(a == b)
  63. static void Timing1(int num_iters) {
  64. // Same pattern lots of times
  65. RE pattern("ruby:\\d+");
  66. StringPiece p("ruby:1234");
  67. for (int j = num_iters; j > 0; j--) {
  68. CHECK(pattern.FullMatch(p));
  69. }
  70. }
  71. static void Timing2(int num_iters) {
  72. // Same pattern lots of times
  73. RE pattern("ruby:(\\d+)");
  74. int i;
  75. for (int j = num_iters; j > 0; j--) {
  76. CHECK(pattern.FullMatch("ruby:1234", &i));
  77. CHECK_EQ(i, 1234);
  78. }
  79. }
  80. static void Timing3(int num_iters) {
  81. string text_string;
  82. for (int j = num_iters; j > 0; j--) {
  83. text_string += "this is another line\n";
  84. }
  85. RE line_matcher(".*\n");
  86. string line;
  87. StringPiece text(text_string);
  88. int counter = 0;
  89. while (line_matcher.Consume(&text)) {
  90. counter++;
  91. }
  92. printf("Matched %d lines\n", counter);
  93. }
  94. #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
  95. static void LeakTest() {
  96. // Check for memory leaks
  97. unsigned long long initial_size = 0;
  98. for (int i = 0; i < 100000; i++) {
  99. if (i == 50000) {
  100. initial_size = VirtualProcessSize();
  101. printf("Size after 50000: %llu\n", initial_size);
  102. }
  103. char buf[100]; // definitely big enough
  104. sprintf(buf, "pat%09d", i);
  105. RE newre(buf);
  106. }
  107. uint64 final_size = VirtualProcessSize();
  108. printf("Size after 100000: %llu\n", final_size);
  109. const double growth = double(final_size - initial_size) / final_size;
  110. printf("Growth: %0.2f%%", growth * 100);
  111. CHECK(growth < 0.02); // Allow < 2% growth
  112. }
  113. #endif
  114. static void RadixTests() {
  115. printf("Testing hex\n");
  116. #define CHECK_HEX(type, value) \
  117. do { \
  118. type v; \
  119. CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
  120. CHECK_EQ(v, 0x ## value); \
  121. CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
  122. CHECK_EQ(v, 0x ## value); \
  123. } while(0)
  124. CHECK_HEX(short, 2bad);
  125. CHECK_HEX(unsigned short, 2badU);
  126. CHECK_HEX(int, dead);
  127. CHECK_HEX(unsigned int, deadU);
  128. CHECK_HEX(long, 7eadbeefL);
  129. CHECK_HEX(unsigned long, deadbeefUL);
  130. #ifdef HAVE_LONG_LONG
  131. CHECK_HEX(long long, 12345678deadbeefLL);
  132. #endif
  133. #ifdef HAVE_UNSIGNED_LONG_LONG
  134. CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
  135. #endif
  136. #undef CHECK_HEX
  137. printf("Testing octal\n");
  138. #define CHECK_OCTAL(type, value) \
  139. do { \
  140. type v; \
  141. CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
  142. CHECK_EQ(v, 0 ## value); \
  143. CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
  144. CHECK_EQ(v, 0 ## value); \
  145. } while(0)
  146. CHECK_OCTAL(short, 77777);
  147. CHECK_OCTAL(unsigned short, 177777U);
  148. CHECK_OCTAL(int, 17777777777);
  149. CHECK_OCTAL(unsigned int, 37777777777U);
  150. CHECK_OCTAL(long, 17777777777L);
  151. CHECK_OCTAL(unsigned long, 37777777777UL);
  152. #ifdef HAVE_LONG_LONG
  153. CHECK_OCTAL(long long, 777777777777777777777LL);
  154. #endif
  155. #ifdef HAVE_UNSIGNED_LONG_LONG
  156. CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
  157. #endif
  158. #undef CHECK_OCTAL
  159. printf("Testing decimal\n");
  160. #define CHECK_DECIMAL(type, value) \
  161. do { \
  162. type v; \
  163. CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
  164. CHECK_EQ(v, value); \
  165. CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
  166. CHECK_EQ(v, value); \
  167. } while(0)
  168. CHECK_DECIMAL(short, -1);
  169. CHECK_DECIMAL(unsigned short, 9999);
  170. CHECK_DECIMAL(int, -1000);
  171. CHECK_DECIMAL(unsigned int, 12345U);
  172. CHECK_DECIMAL(long, -10000000L);
  173. CHECK_DECIMAL(unsigned long, 3083324652U);
  174. #ifdef HAVE_LONG_LONG
  175. CHECK_DECIMAL(long long, -100000000000000LL);
  176. #endif
  177. #ifdef HAVE_UNSIGNED_LONG_LONG
  178. CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
  179. #endif
  180. #undef CHECK_DECIMAL
  181. }
  182. static void TestReplace() {
  183. printf("Testing Replace\n");
  184. struct ReplaceTest {
  185. const char *regexp;
  186. const char *rewrite;
  187. const char *original;
  188. const char *single;
  189. const char *global;
  190. int global_count; // the expected return value from ReplaceAll
  191. };
  192. static const ReplaceTest tests[] = {
  193. { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
  194. "\\2\\1ay",
  195. "the quick brown fox jumps over the lazy dogs.",
  196. "ethay quick brown fox jumps over the lazy dogs.",
  197. "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
  198. 9 },
  199. { "\\w+",
  200. "\\0-NOSPAM",
  201. "paul.haahr@google.com",
  202. "paul-NOSPAM.haahr@google.com",
  203. "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
  204. 4 },
  205. { "^",
  206. "(START)",
  207. "foo",
  208. "(START)foo",
  209. "(START)foo",
  210. 1 },
  211. { "^",
  212. "(START)",
  213. "",
  214. "(START)",
  215. "(START)",
  216. 1 },
  217. { "$",
  218. "(END)",
  219. "",
  220. "(END)",
  221. "(END)",
  222. 1 },
  223. { "b",
  224. "bb",
  225. "ababababab",
  226. "abbabababab",
  227. "abbabbabbabbabb",
  228. 5 },
  229. { "b",
  230. "bb",
  231. "bbbbbb",
  232. "bbbbbbb",
  233. "bbbbbbbbbbbb",
  234. 6 },
  235. { "b+",
  236. "bb",
  237. "bbbbbb",
  238. "bb",
  239. "bb",
  240. 1 },
  241. { "b*",
  242. "bb",
  243. "bbbbbb",
  244. "bb",
  245. "bbbb",
  246. 2 },
  247. { "b*",
  248. "bb",
  249. "aaaaa",
  250. "bbaaaaa",
  251. "bbabbabbabbabbabb",
  252. 6 },
  253. { "b*",
  254. "bb",
  255. "aa\naa\n",
  256. "bbaa\naa\n",
  257. "bbabbabb\nbbabbabb\nbb",
  258. 7 },
  259. { "b*",
  260. "bb",
  261. "aa\raa\r",
  262. "bbaa\raa\r",
  263. "bbabbabb\rbbabbabb\rbb",
  264. 7 },
  265. { "b*",
  266. "bb",
  267. "aa\r\naa\r\n",
  268. "bbaa\r\naa\r\n",
  269. "bbabbabb\r\nbbabbabb\r\nbb",
  270. 7 },
  271. // Check empty-string matching (it's tricky!)
  272. { "aa|b*",
  273. "@",
  274. "aa",
  275. "@",
  276. "@@",
  277. 2 },
  278. { "b*|aa",
  279. "@",
  280. "aa",
  281. "@aa",
  282. "@@@",
  283. 3 },
  284. #ifdef SUPPORT_UTF
  285. { "b*",
  286. "bb",
  287. "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
  288. "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
  289. "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
  290. 5 },
  291. { "b*",
  292. "bb",
  293. "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
  294. "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
  295. ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
  296. "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
  297. 9 },
  298. #endif
  299. { "", NULL, NULL, NULL, NULL, 0 }
  300. };
  301. #ifdef SUPPORT_UTF
  302. const bool support_utf8 = true;
  303. #else
  304. const bool support_utf8 = false;
  305. #endif
  306. for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
  307. RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
  308. assert(re.error().empty());
  309. string one(t->original);
  310. CHECK(re.Replace(t->rewrite, &one));
  311. CHECK_EQ(one, t->single);
  312. string all(t->original);
  313. const int replace_count = re.GlobalReplace(t->rewrite, &all);
  314. CHECK_EQ(all, t->global);
  315. CHECK_EQ(replace_count, t->global_count);
  316. }
  317. // One final test: test \r\n replacement when we're not in CRLF mode
  318. {
  319. RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
  320. assert(re.error().empty());
  321. string all("aa\r\naa\r\n");
  322. CHECK_EQ(re.GlobalReplace("bb", &all), 9);
  323. CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
  324. }
  325. {
  326. RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
  327. assert(re.error().empty());
  328. string all("aa\r\naa\r\n");
  329. CHECK_EQ(re.GlobalReplace("bb", &all), 9);
  330. CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
  331. }
  332. // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
  333. // Alas, the answer depends on how pcre was compiled.
  334. }
  335. static void TestExtract() {
  336. printf("Testing Extract\n");
  337. string s;
  338. CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
  339. CHECK_EQ(s, "kremvax!boris");
  340. // check the RE interface as well
  341. CHECK(RE(".*").Extract("'\\0'", "foo", &s));
  342. CHECK_EQ(s, "'foo'");
  343. CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
  344. CHECK_EQ(s, "'foo'");
  345. }
  346. static void TestConsume() {
  347. printf("Testing Consume\n");
  348. string word;
  349. string s(" aaa b!@#$@#$cccc");
  350. StringPiece input(s);
  351. RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
  352. CHECK(r.Consume(&input, &word));
  353. CHECK_EQ(word, "aaa");
  354. CHECK(r.Consume(&input, &word));
  355. CHECK_EQ(word, "b");
  356. CHECK(! r.Consume(&input, &word));
  357. }
  358. static void TestFindAndConsume() {
  359. printf("Testing FindAndConsume\n");
  360. string word;
  361. string s(" aaa b!@#$@#$cccc");
  362. StringPiece input(s);
  363. RE r("(\\w+)"); // matches a word
  364. CHECK(r.FindAndConsume(&input, &word));
  365. CHECK_EQ(word, "aaa");
  366. CHECK(r.FindAndConsume(&input, &word));
  367. CHECK_EQ(word, "b");
  368. CHECK(r.FindAndConsume(&input, &word));
  369. CHECK_EQ(word, "cccc");
  370. CHECK(! r.FindAndConsume(&input, &word));
  371. }
  372. static void TestMatchNumberPeculiarity() {
  373. printf("Testing match-number peculiarity\n");
  374. string word1;
  375. string word2;
  376. string word3;
  377. RE r("(foo)|(bar)|(baz)");
  378. CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
  379. CHECK_EQ(word1, "foo");
  380. CHECK_EQ(word2, "");
  381. CHECK_EQ(word3, "");
  382. CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
  383. CHECK_EQ(word1, "");
  384. CHECK_EQ(word2, "bar");
  385. CHECK_EQ(word3, "");
  386. CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
  387. CHECK_EQ(word1, "");
  388. CHECK_EQ(word2, "");
  389. CHECK_EQ(word3, "baz");
  390. CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
  391. string a;
  392. CHECK(RE("(foo)|hello").FullMatch("hello", &a));
  393. CHECK_EQ(a, "");
  394. }
  395. static void TestRecursion() {
  396. printf("Testing recursion\n");
  397. // Get one string that passes (sometimes), one that never does.
  398. string text_good("abcdefghijk");
  399. string text_bad("acdefghijkl");
  400. // According to pcretest, matching text_good against (\w+)*b
  401. // requires match_limit of at least 8192, and match_recursion_limit
  402. // of at least 37.
  403. RE_Options options_ml;
  404. options_ml.set_match_limit(8192);
  405. RE re("(\\w+)*b", options_ml);
  406. CHECK(re.PartialMatch(text_good) == true);
  407. CHECK(re.PartialMatch(text_bad) == false);
  408. CHECK(re.FullMatch(text_good) == false);
  409. CHECK(re.FullMatch(text_bad) == false);
  410. options_ml.set_match_limit(1024);
  411. RE re2("(\\w+)*b", options_ml);
  412. CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
  413. CHECK(re2.PartialMatch(text_bad) == false);
  414. CHECK(re2.FullMatch(text_good) == false);
  415. CHECK(re2.FullMatch(text_bad) == false);
  416. RE_Options options_mlr;
  417. options_mlr.set_match_limit_recursion(50);
  418. RE re3("(\\w+)*b", options_mlr);
  419. CHECK(re3.PartialMatch(text_good) == true);
  420. CHECK(re3.PartialMatch(text_bad) == false);
  421. CHECK(re3.FullMatch(text_good) == false);
  422. CHECK(re3.FullMatch(text_bad) == false);
  423. options_mlr.set_match_limit_recursion(10);
  424. RE re4("(\\w+)*b", options_mlr);
  425. CHECK(re4.PartialMatch(text_good) == false);
  426. CHECK(re4.PartialMatch(text_bad) == false);
  427. CHECK(re4.FullMatch(text_good) == false);
  428. CHECK(re4.FullMatch(text_bad) == false);
  429. }
  430. // A meta-quoted string, interpreted as a pattern, should always match
  431. // the original unquoted string.
  432. static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
  433. string quoted = RE::QuoteMeta(unquoted);
  434. RE re(quoted, options);
  435. CHECK(re.FullMatch(unquoted));
  436. }
  437. // A string containing meaningful regexp characters, which is then meta-
  438. // quoted, should not generally match a string the unquoted string does.
  439. static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
  440. RE_Options options = RE_Options()) {
  441. string quoted = RE::QuoteMeta(unquoted);
  442. RE re(quoted, options);
  443. CHECK(!re.FullMatch(should_not_match));
  444. }
  445. // Tests that quoted meta characters match their original strings,
  446. // and that a few things that shouldn't match indeed do not.
  447. static void TestQuotaMetaSimple() {
  448. TestQuoteMeta("foo");
  449. TestQuoteMeta("foo.bar");
  450. TestQuoteMeta("foo\\.bar");
  451. TestQuoteMeta("[1-9]");
  452. TestQuoteMeta("1.5-2.0?");
  453. TestQuoteMeta("\\d");
  454. TestQuoteMeta("Who doesn't like ice cream?");
  455. TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
  456. TestQuoteMeta("((?!)xxx).*yyy");
  457. TestQuoteMeta("([");
  458. TestQuoteMeta(string("foo\0bar", 7));
  459. }
  460. static void TestQuoteMetaSimpleNegative() {
  461. NegativeTestQuoteMeta("foo", "bar");
  462. NegativeTestQuoteMeta("...", "bar");
  463. NegativeTestQuoteMeta("\\.", ".");
  464. NegativeTestQuoteMeta("\\.", "..");
  465. NegativeTestQuoteMeta("(a)", "a");
  466. NegativeTestQuoteMeta("(a|b)", "a");
  467. NegativeTestQuoteMeta("(a|b)", "(a)");
  468. NegativeTestQuoteMeta("(a|b)", "a|b");
  469. NegativeTestQuoteMeta("[0-9]", "0");
  470. NegativeTestQuoteMeta("[0-9]", "0-9");
  471. NegativeTestQuoteMeta("[0-9]", "[9]");
  472. NegativeTestQuoteMeta("((?!)xxx)", "xxx");
  473. }
  474. static void TestQuoteMetaLatin1() {
  475. TestQuoteMeta("3\xb2 = 9");
  476. }
  477. static void TestQuoteMetaUtf8() {
  478. #ifdef SUPPORT_UTF
  479. TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
  480. TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
  481. TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
  482. TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
  483. TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
  484. TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
  485. TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
  486. NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
  487. "27\\\xc2\\\xb0",
  488. pcrecpp::UTF8());
  489. #endif
  490. }
  491. static void TestQuoteMetaAll() {
  492. printf("Testing QuoteMeta\n");
  493. TestQuotaMetaSimple();
  494. TestQuoteMetaSimpleNegative();
  495. TestQuoteMetaLatin1();
  496. TestQuoteMetaUtf8();
  497. }
  498. //
  499. // Options tests contributed by
  500. // Giuseppe Maxia, CTO, Stardata s.r.l.
  501. // July 2005
  502. //
  503. static void GetOneOptionResult(
  504. const char *option_name,
  505. const char *regex,
  506. const char *str,
  507. RE_Options options,
  508. bool full,
  509. string expected) {
  510. printf("Testing Option <%s>\n", option_name);
  511. if(VERBOSE_TEST)
  512. printf("/%s/ finds \"%s\" within \"%s\" \n",
  513. regex,
  514. expected.c_str(),
  515. str);
  516. string captured("");
  517. if (full)
  518. RE(regex,options).FullMatch(str, &captured);
  519. else
  520. RE(regex,options).PartialMatch(str, &captured);
  521. CHECK_EQ(captured, expected);
  522. }
  523. static void TestOneOption(
  524. const char *option_name,
  525. const char *regex,
  526. const char *str,
  527. RE_Options options,
  528. bool full,
  529. bool assertive = true) {
  530. printf("Testing Option <%s>\n", option_name);
  531. if (VERBOSE_TEST)
  532. printf("'%s' %s /%s/ \n",
  533. str,
  534. (assertive? "matches" : "doesn't match"),
  535. regex);
  536. if (assertive) {
  537. if (full)
  538. CHECK(RE(regex,options).FullMatch(str));
  539. else
  540. CHECK(RE(regex,options).PartialMatch(str));
  541. } else {
  542. if (full)
  543. CHECK(!RE(regex,options).FullMatch(str));
  544. else
  545. CHECK(!RE(regex,options).PartialMatch(str));
  546. }
  547. }
  548. static void Test_CASELESS() {
  549. RE_Options options;
  550. RE_Options options2;
  551. options.set_caseless(true);
  552. TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
  553. TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
  554. TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
  555. TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
  556. TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
  557. options.set_caseless(false);
  558. TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
  559. }
  560. static void Test_MULTILINE() {
  561. RE_Options options;
  562. RE_Options options2;
  563. const char *str = "HELLO\n" "cruel\n" "world\n";
  564. options.set_multiline(true);
  565. TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
  566. TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
  567. TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
  568. options.set_multiline(false);
  569. TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
  570. }
  571. static void Test_DOTALL() {
  572. RE_Options options;
  573. RE_Options options2;
  574. const char *str = "HELLO\n" "cruel\n" "world";
  575. options.set_dotall(true);
  576. TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
  577. TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
  578. TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
  579. options.set_dotall(false);
  580. TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
  581. }
  582. static void Test_DOLLAR_ENDONLY() {
  583. RE_Options options;
  584. RE_Options options2;
  585. const char *str = "HELLO world\n";
  586. TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
  587. options.set_dollar_endonly(true);
  588. TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
  589. TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
  590. }
  591. static void Test_EXTRA() {
  592. RE_Options options;
  593. const char *str = "HELLO";
  594. options.set_extra(true);
  595. TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
  596. TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
  597. options.set_extra(false);
  598. TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
  599. }
  600. static void Test_EXTENDED() {
  601. RE_Options options;
  602. RE_Options options2;
  603. const char *str = "HELLO world";
  604. options.set_extended(true);
  605. TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
  606. TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
  607. TestOneOption("EXTENDED (class)",
  608. "^ HE L{2} O "
  609. "\\s+ "
  610. "\\w+ $ ",
  611. str,
  612. options,
  613. false);
  614. TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
  615. TestOneOption("EXTENDED (function)",
  616. "^ HE L{2} O "
  617. "\\s+ "
  618. "\\w+ $ ",
  619. str,
  620. pcrecpp::EXTENDED(),
  621. false);
  622. options.set_extended(false);
  623. TestOneOption("no EXTENDED", "HELLO world", str, options, false);
  624. }
  625. static void Test_NO_AUTO_CAPTURE() {
  626. RE_Options options;
  627. const char *str = "HELLO world";
  628. string captured;
  629. printf("Testing Option <no NO_AUTO_CAPTURE>\n");
  630. if (VERBOSE_TEST)
  631. printf("parentheses capture text\n");
  632. RE re("(world|universe)$", options);
  633. CHECK(re.Extract("\\1", str , &captured));
  634. CHECK_EQ(captured, "world");
  635. options.set_no_auto_capture(true);
  636. printf("testing Option <NO_AUTO_CAPTURE>\n");
  637. if (VERBOSE_TEST)
  638. printf("parentheses do not capture text\n");
  639. re.Extract("\\1",str, &captured );
  640. CHECK_EQ(captured, "world");
  641. }
  642. static void Test_UNGREEDY() {
  643. RE_Options options;
  644. const char *str = "HELLO, 'this' is the 'world'";
  645. options.set_ungreedy(true);
  646. GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
  647. GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
  648. GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
  649. options.set_ungreedy(false);
  650. GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
  651. GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
  652. }
  653. static void Test_all_options() {
  654. const char *str = "HELLO\n" "cruel\n" "world";
  655. RE_Options options;
  656. options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
  657. TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
  658. options.set_all_options(0);
  659. TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
  660. options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
  661. TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
  662. TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
  663. " ^ c r u e l $ ",
  664. str,
  665. RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
  666. false);
  667. TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
  668. " ^ c r u e l $ ",
  669. str,
  670. RE_Options()
  671. .set_multiline(true)
  672. .set_extended(true),
  673. false);
  674. options.set_all_options(0);
  675. TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
  676. }
  677. static void TestOptions() {
  678. printf("Testing Options\n");
  679. Test_CASELESS();
  680. Test_MULTILINE();
  681. Test_DOTALL();
  682. Test_DOLLAR_ENDONLY();
  683. Test_EXTENDED();
  684. Test_NO_AUTO_CAPTURE();
  685. Test_UNGREEDY();
  686. Test_EXTRA();
  687. Test_all_options();
  688. }
  689. static void TestConstructors() {
  690. printf("Testing constructors\n");
  691. RE_Options options;
  692. options.set_dotall(true);
  693. const char *str = "HELLO\n" "cruel\n" "world";
  694. RE orig("HELLO.*world", options);
  695. CHECK(orig.FullMatch(str));
  696. RE copy1(orig);
  697. CHECK(copy1.FullMatch(str));
  698. RE copy2("not a match");
  699. CHECK(!copy2.FullMatch(str));
  700. copy2 = copy1;
  701. CHECK(copy2.FullMatch(str));
  702. copy2 = orig;
  703. CHECK(copy2.FullMatch(str));
  704. // Make sure when we assign to ourselves, nothing bad happens
  705. orig = orig;
  706. copy1 = copy1;
  707. copy2 = copy2;
  708. CHECK(orig.FullMatch(str));
  709. CHECK(copy1.FullMatch(str));
  710. CHECK(copy2.FullMatch(str));
  711. }
  712. int main(int argc, char** argv) {
  713. // Treat any flag as --help
  714. if (argc > 1 && argv[1][0] == '-') {
  715. printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
  716. " If 'timingX ###' is specified, run the given timing test\n"
  717. " with the given number of iterations, rather than running\n"
  718. " the default corectness test.\n", argv[0]);
  719. return 0;
  720. }
  721. if (argc > 1) {
  722. if ( argc == 2 || atoi(argv[2]) == 0) {
  723. printf("timing mode needs a num-iters argument\n");
  724. return 1;
  725. }
  726. if (!strcmp(argv[1], "timing1"))
  727. Timing1(atoi(argv[2]));
  728. else if (!strcmp(argv[1], "timing2"))
  729. Timing2(atoi(argv[2]));
  730. else if (!strcmp(argv[1], "timing3"))
  731. Timing3(atoi(argv[2]));
  732. else
  733. printf("Unknown argument '%s'\n", argv[1]);
  734. return 0;
  735. }
  736. printf("PCRE C++ wrapper tests\n");
  737. printf("Testing FullMatch\n");
  738. int i;
  739. string s;
  740. /***** FullMatch with no args *****/
  741. CHECK(RE("h.*o").FullMatch("hello"));
  742. CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
  743. CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
  744. CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
  745. CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
  746. CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
  747. /***** FullMatch with args *****/
  748. // Zero-arg
  749. CHECK(RE("\\d+").FullMatch("1001"));
  750. // Single-arg
  751. CHECK(RE("(\\d+)").FullMatch("1001", &i));
  752. CHECK_EQ(i, 1001);
  753. CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
  754. CHECK_EQ(i, -123);
  755. CHECK(!RE("()\\d+").FullMatch("10", &i));
  756. CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
  757. &i));
  758. // Digits surrounding integer-arg
  759. CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
  760. CHECK_EQ(i, 23);
  761. CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
  762. CHECK_EQ(i, 1);
  763. CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
  764. CHECK_EQ(i, -1);
  765. CHECK(RE("(\\d)").PartialMatch("1234", &i));
  766. CHECK_EQ(i, 1);
  767. CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
  768. CHECK_EQ(i, -1);
  769. // String-arg
  770. CHECK(RE("h(.*)o").FullMatch("hello", &s));
  771. CHECK_EQ(s, string("ell"));
  772. // StringPiece-arg
  773. StringPiece sp;
  774. CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
  775. CHECK_EQ(sp.size(), 4);
  776. CHECK(memcmp(sp.data(), "ruby", 4) == 0);
  777. CHECK_EQ(i, 1234);
  778. // Multi-arg
  779. CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
  780. CHECK_EQ(s, string("ruby"));
  781. CHECK_EQ(i, 1234);
  782. // Ignore non-void* NULL arg
  783. CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
  784. CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
  785. CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
  786. CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
  787. #ifdef HAVE_LONG_LONG
  788. CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
  789. #endif
  790. CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
  791. CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
  792. // Fail on non-void* NULL arg if the match doesn't parse for the given type.
  793. CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
  794. CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
  795. CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
  796. CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
  797. CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
  798. // Ignored arg
  799. CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
  800. CHECK_EQ(s, string("ruby"));
  801. CHECK_EQ(i, 1234);
  802. // Type tests
  803. {
  804. char c;
  805. CHECK(RE("(H)ello").FullMatch("Hello", &c));
  806. CHECK_EQ(c, 'H');
  807. }
  808. {
  809. unsigned char c;
  810. CHECK(RE("(H)ello").FullMatch("Hello", &c));
  811. CHECK_EQ(c, static_cast<unsigned char>('H'));
  812. }
  813. {
  814. short v;
  815. CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
  816. CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
  817. CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
  818. CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
  819. CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
  820. CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
  821. }
  822. {
  823. unsigned short v;
  824. CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
  825. CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
  826. CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
  827. CHECK(!RE("(\\d+)").FullMatch("65536", &v));
  828. }
  829. {
  830. int v;
  831. static const int max_value = 0x7fffffff;
  832. static const int min_value = -max_value - 1;
  833. CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
  834. CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
  835. CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
  836. CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
  837. CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
  838. CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
  839. }
  840. {
  841. unsigned int v;
  842. static const unsigned int max_value = 0xfffffffful;
  843. CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
  844. CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
  845. CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
  846. }
  847. #ifdef HAVE_LONG_LONG
  848. # if defined(__MINGW__) || defined(__MINGW32__)
  849. # define LLD "%I64d"
  850. # define LLU "%I64u"
  851. # else
  852. # define LLD "%lld"
  853. # define LLU "%llu"
  854. # endif
  855. {
  856. long long v;
  857. static const long long max_value = 0x7fffffffffffffffLL;
  858. static const long long min_value = -max_value - 1;
  859. char buf[32]; // definitely big enough for a long long
  860. CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
  861. CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
  862. sprintf(buf, LLD, max_value);
  863. CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
  864. sprintf(buf, LLD, min_value);
  865. CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
  866. sprintf(buf, LLD, max_value);
  867. assert(buf[strlen(buf)-1] != '9');
  868. buf[strlen(buf)-1]++;
  869. CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
  870. sprintf(buf, LLD, min_value);
  871. assert(buf[strlen(buf)-1] != '9');
  872. buf[strlen(buf)-1]++;
  873. CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
  874. }
  875. #endif
  876. #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
  877. {
  878. unsigned long long v;
  879. long long v2;
  880. static const unsigned long long max_value = 0xffffffffffffffffULL;
  881. char buf[32]; // definitely big enough for a unsigned long long
  882. CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
  883. CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
  884. sprintf(buf, LLU, max_value);
  885. CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
  886. assert(buf[strlen(buf)-1] != '9');
  887. buf[strlen(buf)-1]++;
  888. CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
  889. }
  890. #endif
  891. {
  892. float v;
  893. CHECK(RE("(.*)").FullMatch("100", &v));
  894. CHECK(RE("(.*)").FullMatch("-100.", &v));
  895. CHECK(RE("(.*)").FullMatch("1e23", &v));
  896. }
  897. {
  898. double v;
  899. CHECK(RE("(.*)").FullMatch("100", &v));
  900. CHECK(RE("(.*)").FullMatch("-100.", &v));
  901. CHECK(RE("(.*)").FullMatch("1e23", &v));
  902. }
  903. // Check that matching is fully anchored
  904. CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
  905. CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
  906. CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
  907. CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
  908. // Braces
  909. CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
  910. CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
  911. CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
  912. // Complicated RE
  913. CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
  914. CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
  915. CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
  916. CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
  917. // Check full-match handling (needs '$' tacked on internally)
  918. CHECK(RE("fo|foo").FullMatch("fo"));
  919. CHECK(RE("fo|foo").FullMatch("foo"));
  920. CHECK(RE("fo|foo$").FullMatch("fo"));
  921. CHECK(RE("fo|foo$").FullMatch("foo"));
  922. CHECK(RE("foo$").FullMatch("foo"));
  923. CHECK(!RE("foo\\$").FullMatch("foo$bar"));
  924. CHECK(!RE("fo|bar").FullMatch("fox"));
  925. // Uncomment the following if we change the handling of '$' to
  926. // prevent it from matching a trailing newline
  927. if (false) {
  928. // Check that we don't get bitten by pcre's special handling of a
  929. // '\n' at the end of the string matching '$'
  930. CHECK(!RE("foo$").PartialMatch("foo\n"));
  931. }
  932. // Number of args
  933. int a[16];
  934. CHECK(RE("").FullMatch(""));
  935. memset(a, 0, sizeof(0));
  936. CHECK(RE("(\\d){1}").FullMatch("1",
  937. &a[0]));
  938. CHECK_EQ(a[0], 1);
  939. memset(a, 0, sizeof(0));
  940. CHECK(RE("(\\d)(\\d)").FullMatch("12",
  941. &a[0], &a[1]));
  942. CHECK_EQ(a[0], 1);
  943. CHECK_EQ(a[1], 2);
  944. memset(a, 0, sizeof(0));
  945. CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
  946. &a[0], &a[1], &a[2]));
  947. CHECK_EQ(a[0], 1);
  948. CHECK_EQ(a[1], 2);
  949. CHECK_EQ(a[2], 3);
  950. memset(a, 0, sizeof(0));
  951. CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
  952. &a[0], &a[1], &a[2], &a[3]));
  953. CHECK_EQ(a[0], 1);
  954. CHECK_EQ(a[1], 2);
  955. CHECK_EQ(a[2], 3);
  956. CHECK_EQ(a[3], 4);
  957. memset(a, 0, sizeof(0));
  958. CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
  959. &a[0], &a[1], &a[2],
  960. &a[3], &a[4]));
  961. CHECK_EQ(a[0], 1);
  962. CHECK_EQ(a[1], 2);
  963. CHECK_EQ(a[2], 3);
  964. CHECK_EQ(a[3], 4);
  965. CHECK_EQ(a[4], 5);
  966. memset(a, 0, sizeof(0));
  967. CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
  968. &a[0], &a[1], &a[2],
  969. &a[3], &a[4], &a[5]));
  970. CHECK_EQ(a[0], 1);
  971. CHECK_EQ(a[1], 2);
  972. CHECK_EQ(a[2], 3);
  973. CHECK_EQ(a[3], 4);
  974. CHECK_EQ(a[4], 5);
  975. CHECK_EQ(a[5], 6);
  976. memset(a, 0, sizeof(0));
  977. CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
  978. &a[0], &a[1], &a[2], &a[3],
  979. &a[4], &a[5], &a[6]));
  980. CHECK_EQ(a[0], 1);
  981. CHECK_EQ(a[1], 2);
  982. CHECK_EQ(a[2], 3);
  983. CHECK_EQ(a[3], 4);
  984. CHECK_EQ(a[4], 5);
  985. CHECK_EQ(a[5], 6);
  986. CHECK_EQ(a[6], 7);
  987. memset(a, 0, sizeof(0));
  988. CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
  989. "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
  990. "1234567890123456",
  991. &a[0], &a[1], &a[2], &a[3],
  992. &a[4], &a[5], &a[6], &a[7],
  993. &a[8], &a[9], &a[10], &a[11],
  994. &a[12], &a[13], &a[14], &a[15]));
  995. CHECK_EQ(a[0], 1);
  996. CHECK_EQ(a[1], 2);
  997. CHECK_EQ(a[2], 3);
  998. CHECK_EQ(a[3], 4);
  999. CHECK_EQ(a[4], 5);
  1000. CHECK_EQ(a[5], 6);
  1001. CHECK_EQ(a[6], 7);
  1002. CHECK_EQ(a[7], 8);
  1003. CHECK_EQ(a[8], 9);
  1004. CHECK_EQ(a[9], 0);
  1005. CHECK_EQ(a[10], 1);
  1006. CHECK_EQ(a[11], 2);
  1007. CHECK_EQ(a[12], 3);
  1008. CHECK_EQ(a[13], 4);
  1009. CHECK_EQ(a[14], 5);
  1010. CHECK_EQ(a[15], 6);
  1011. /***** PartialMatch *****/
  1012. printf("Testing PartialMatch\n");
  1013. CHECK(RE("h.*o").PartialMatch("hello"));
  1014. CHECK(RE("h.*o").PartialMatch("othello"));
  1015. CHECK(RE("h.*o").PartialMatch("hello!"));
  1016. CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
  1017. /***** other tests *****/
  1018. RadixTests();
  1019. TestReplace();
  1020. TestExtract();
  1021. TestConsume();
  1022. TestFindAndConsume();
  1023. TestQuoteMetaAll();
  1024. TestMatchNumberPeculiarity();
  1025. // Check the pattern() accessor
  1026. {
  1027. const string kPattern = "http://([^/]+)/.*";
  1028. const RE re(kPattern);
  1029. CHECK_EQ(kPattern, re.pattern());
  1030. }
  1031. // Check RE error field.
  1032. {
  1033. RE re("foo");
  1034. CHECK(re.error().empty()); // Must have no error
  1035. }
  1036. #ifdef SUPPORT_UTF
  1037. // Check UTF-8 handling
  1038. {
  1039. printf("Testing UTF-8 handling\n");
  1040. // Three Japanese characters (nihongo)
  1041. const unsigned char utf8_string[] = {
  1042. 0xe6, 0x97, 0xa5, // 65e5
  1043. 0xe6, 0x9c, 0xac, // 627c
  1044. 0xe8, 0xaa, 0x9e, // 8a9e
  1045. 0
  1046. };
  1047. const unsigned char utf8_pattern[] = {
  1048. '.',
  1049. 0xe6, 0x9c, 0xac, // 627c
  1050. '.',
  1051. 0
  1052. };
  1053. // Both should match in either mode, bytes or UTF-8
  1054. RE re_test1(".........");
  1055. CHECK(re_test1.FullMatch(utf8_string));
  1056. RE re_test2("...", pcrecpp::UTF8());
  1057. CHECK(re_test2.FullMatch(utf8_string));
  1058. // PH added these tests for leading option settings
  1059. RE re_testZ0("(*CR)(*NO_START_OPT).........");
  1060. CHECK(re_testZ0.FullMatch(utf8_string));
  1061. #ifdef SUPPORT_UTF
  1062. RE re_testZ1("(*UTF8)...");
  1063. CHECK(re_testZ1.FullMatch(utf8_string));
  1064. RE re_testZ2("(*UTF)...");
  1065. CHECK(re_testZ2.FullMatch(utf8_string));
  1066. #ifdef SUPPORT_UCP
  1067. RE re_testZ3("(*UCP)(*UTF)...");
  1068. CHECK(re_testZ3.FullMatch(utf8_string));
  1069. RE re_testZ4("(*UCP)(*LIMIT_MATCH=1000)(*UTF)...");
  1070. CHECK(re_testZ4.FullMatch(utf8_string));
  1071. RE re_testZ5("(*UCP)(*LIMIT_MATCH=1000)(*ANY)(*UTF)...");
  1072. CHECK(re_testZ5.FullMatch(utf8_string));
  1073. #endif
  1074. #endif
  1075. // Check that '.' matches one byte or UTF-8 character
  1076. // according to the mode.
  1077. string ss;
  1078. RE re_test3("(.)");
  1079. CHECK(re_test3.PartialMatch(utf8_string, &ss));
  1080. CHECK_EQ(ss, string("\xe6"));
  1081. RE re_test4("(.)", pcrecpp::UTF8());
  1082. CHECK(re_test4.PartialMatch(utf8_string, &ss));
  1083. CHECK_EQ(ss, string("\xe6\x97\xa5"));
  1084. // Check that string matches itself in either mode
  1085. RE re_test5(utf8_string);
  1086. CHECK(re_test5.FullMatch(utf8_string));
  1087. RE re_test6(utf8_string, pcrecpp::UTF8());
  1088. CHECK(re_test6.FullMatch(utf8_string));
  1089. // Check that pattern matches string only in UTF8 mode
  1090. RE re_test7(utf8_pattern);
  1091. CHECK(!re_test7.FullMatch(utf8_string));
  1092. RE re_test8(utf8_pattern, pcrecpp::UTF8());
  1093. CHECK(re_test8.FullMatch(utf8_string));
  1094. }
  1095. // Check that ungreedy, UTF8 regular expressions don't match when they
  1096. // oughtn't -- see bug 82246.
  1097. {
  1098. // This code always worked.
  1099. const char* pattern = "\\w+X";
  1100. const string target = "a aX";
  1101. RE match_sentence(pattern);
  1102. RE match_sentence_re(pattern, pcrecpp::UTF8());
  1103. CHECK(!match_sentence.FullMatch(target));
  1104. CHECK(!match_sentence_re.FullMatch(target));
  1105. }
  1106. {
  1107. const char* pattern = "(?U)\\w+X";
  1108. const string target = "a aX";
  1109. RE match_sentence(pattern);
  1110. RE match_sentence_re(pattern, pcrecpp::UTF8());
  1111. CHECK(!match_sentence.FullMatch(target));
  1112. CHECK(!match_sentence_re.FullMatch(target));
  1113. }
  1114. #endif /* def SUPPORT_UTF */
  1115. printf("Testing error reporting\n");
  1116. { RE re("a\\1"); CHECK(!re.error().empty()); }
  1117. {
  1118. RE re("a[x");
  1119. CHECK(!re.error().empty());
  1120. }
  1121. {
  1122. RE re("a[z-a]");
  1123. CHECK(!re.error().empty());
  1124. }
  1125. {
  1126. RE re("a[[:foobar:]]");
  1127. CHECK(!re.error().empty());
  1128. }
  1129. {
  1130. RE re("a(b");
  1131. CHECK(!re.error().empty());
  1132. }
  1133. {
  1134. RE re("a\\");
  1135. CHECK(!re.error().empty());
  1136. }
  1137. // Test that recursion is stopped
  1138. TestRecursion();
  1139. // Test Options
  1140. if (getenv("VERBOSE_TEST") != NULL)
  1141. VERBOSE_TEST = true;
  1142. TestOptions();
  1143. // Test the constructors
  1144. TestConstructors();
  1145. // Done
  1146. printf("OK\n");
  1147. return 0;
  1148. }