archive_pathmatch.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer
  10. * in this position and unchanged.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  16. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  19. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  20. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  24. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "archive_platform.h"
  27. __FBSDID("$FreeBSD$");
  28. #ifdef HAVE_STRING_H
  29. #include <string.h>
  30. #endif
  31. #ifdef HAVE_WCHAR_H
  32. #include <wchar.h>
  33. #endif
  34. #include "archive_pathmatch.h"
  35. /*
  36. * Check whether a character 'c' is matched by a list specification [...]:
  37. * * Leading '!' or '^' negates the class.
  38. * * <char>-<char> is a range of characters
  39. * * \<char> removes any special meaning for <char>
  40. *
  41. * Some interesting boundary cases:
  42. * a-d-e is one range (a-d) followed by two single characters - and e.
  43. * \a-\d is same as a-d
  44. * a\-d is three single characters: a, d, -
  45. * Trailing - is not special (so [a-] is two characters a and -).
  46. * Initial - is not special ([a-] is same as [-a] is same as [\\-a])
  47. * This function never sees a trailing \.
  48. * [] always fails
  49. * [!] always succeeds
  50. */
  51. static int
  52. pm_list(const char *start, const char *end, const char c, int flags)
  53. {
  54. const char *p = start;
  55. char rangeStart = '\0', nextRangeStart;
  56. int match = 1, nomatch = 0;
  57. /* This will be used soon... */
  58. (void)flags; /* UNUSED */
  59. /* If this is a negated class, return success for nomatch. */
  60. if ((*p == '!' || *p == '^') && p < end) {
  61. match = 0;
  62. nomatch = 1;
  63. ++p;
  64. }
  65. while (p < end) {
  66. nextRangeStart = '\0';
  67. switch (*p) {
  68. case '-':
  69. /* Trailing or initial '-' is not special. */
  70. if ((rangeStart == '\0') || (p == end - 1)) {
  71. if (*p == c)
  72. return (match);
  73. } else {
  74. char rangeEnd = *++p;
  75. if (rangeEnd == '\\')
  76. rangeEnd = *++p;
  77. if ((rangeStart <= c) && (c <= rangeEnd))
  78. return (match);
  79. }
  80. break;
  81. case '\\':
  82. ++p;
  83. /* Fall through */
  84. default:
  85. if (*p == c)
  86. return (match);
  87. nextRangeStart = *p; /* Possible start of range. */
  88. }
  89. rangeStart = nextRangeStart;
  90. ++p;
  91. }
  92. return (nomatch);
  93. }
  94. static int
  95. pm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags)
  96. {
  97. const wchar_t *p = start;
  98. wchar_t rangeStart = L'\0', nextRangeStart;
  99. int match = 1, nomatch = 0;
  100. /* This will be used soon... */
  101. (void)flags; /* UNUSED */
  102. /* If this is a negated class, return success for nomatch. */
  103. if ((*p == L'!' || *p == L'^') && p < end) {
  104. match = 0;
  105. nomatch = 1;
  106. ++p;
  107. }
  108. while (p < end) {
  109. nextRangeStart = L'\0';
  110. switch (*p) {
  111. case L'-':
  112. /* Trailing or initial '-' is not special. */
  113. if ((rangeStart == L'\0') || (p == end - 1)) {
  114. if (*p == c)
  115. return (match);
  116. } else {
  117. wchar_t rangeEnd = *++p;
  118. if (rangeEnd == L'\\')
  119. rangeEnd = *++p;
  120. if ((rangeStart <= c) && (c <= rangeEnd))
  121. return (match);
  122. }
  123. break;
  124. case L'\\':
  125. ++p;
  126. /* Fall through */
  127. default:
  128. if (*p == c)
  129. return (match);
  130. nextRangeStart = *p; /* Possible start of range. */
  131. }
  132. rangeStart = nextRangeStart;
  133. ++p;
  134. }
  135. return (nomatch);
  136. }
  137. /*
  138. * If s is pointing to "./", ".//", "./././" or the like, skip it.
  139. */
  140. static const char *
  141. pm_slashskip(const char *s) {
  142. while ((*s == '/')
  143. || (s[0] == '.' && s[1] == '/')
  144. || (s[0] == '.' && s[1] == '\0'))
  145. ++s;
  146. return (s);
  147. }
  148. static const wchar_t *
  149. pm_slashskip_w(const wchar_t *s) {
  150. while ((*s == L'/')
  151. || (s[0] == L'.' && s[1] == L'/')
  152. || (s[0] == L'.' && s[1] == L'\0'))
  153. ++s;
  154. return (s);
  155. }
  156. static int
  157. pm(const char *p, const char *s, int flags)
  158. {
  159. const char *end;
  160. /*
  161. * Ignore leading './', './/', '././', etc.
  162. */
  163. if (s[0] == '.' && s[1] == '/')
  164. s = pm_slashskip(s + 1);
  165. if (p[0] == '.' && p[1] == '/')
  166. p = pm_slashskip(p + 1);
  167. for (;;) {
  168. switch (*p) {
  169. case '\0':
  170. if (s[0] == '/') {
  171. if (flags & PATHMATCH_NO_ANCHOR_END)
  172. return (1);
  173. /* "dir" == "dir/" == "dir/." */
  174. s = pm_slashskip(s);
  175. }
  176. return (*s == '\0');
  177. case '?':
  178. /* ? always succeeds, unless we hit end of 's' */
  179. if (*s == '\0')
  180. return (0);
  181. break;
  182. case '*':
  183. /* "*" == "**" == "***" ... */
  184. while (*p == '*')
  185. ++p;
  186. /* Trailing '*' always succeeds. */
  187. if (*p == '\0')
  188. return (1);
  189. while (*s) {
  190. if (archive_pathmatch(p, s, flags))
  191. return (1);
  192. ++s;
  193. }
  194. return (0);
  195. case '[':
  196. /*
  197. * Find the end of the [...] character class,
  198. * ignoring \] that might occur within the class.
  199. */
  200. end = p + 1;
  201. while (*end != '\0' && *end != ']') {
  202. if (*end == '\\' && end[1] != '\0')
  203. ++end;
  204. ++end;
  205. }
  206. if (*end == ']') {
  207. /* We found [...], try to match it. */
  208. if (!pm_list(p + 1, end, *s, flags))
  209. return (0);
  210. p = end; /* Jump to trailing ']' char. */
  211. break;
  212. } else
  213. /* No final ']', so just match '['. */
  214. if (*p != *s)
  215. return (0);
  216. break;
  217. case '\\':
  218. /* Trailing '\\' matches itself. */
  219. if (p[1] == '\0') {
  220. if (*s != '\\')
  221. return (0);
  222. } else {
  223. ++p;
  224. if (*p != *s)
  225. return (0);
  226. }
  227. break;
  228. case '/':
  229. if (*s != '/' && *s != '\0')
  230. return (0);
  231. /* Note: pattern "/\./" won't match "/";
  232. * pm_slashskip() correctly stops at backslash. */
  233. p = pm_slashskip(p);
  234. s = pm_slashskip(s);
  235. if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END))
  236. return (1);
  237. --p; /* Counteract the increment below. */
  238. --s;
  239. break;
  240. case '$':
  241. /* '$' is special only at end of pattern and only
  242. * if PATHMATCH_NO_ANCHOR_END is specified. */
  243. if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
  244. /* "dir" == "dir/" == "dir/." */
  245. return (*pm_slashskip(s) == '\0');
  246. }
  247. /* Otherwise, '$' is not special. */
  248. /* FALL THROUGH */
  249. default:
  250. if (*p != *s)
  251. return (0);
  252. break;
  253. }
  254. ++p;
  255. ++s;
  256. }
  257. }
  258. static int
  259. pm_w(const wchar_t *p, const wchar_t *s, int flags)
  260. {
  261. const wchar_t *end;
  262. /*
  263. * Ignore leading './', './/', '././', etc.
  264. */
  265. if (s[0] == L'.' && s[1] == L'/')
  266. s = pm_slashskip_w(s + 1);
  267. if (p[0] == L'.' && p[1] == L'/')
  268. p = pm_slashskip_w(p + 1);
  269. for (;;) {
  270. switch (*p) {
  271. case L'\0':
  272. if (s[0] == L'/') {
  273. if (flags & PATHMATCH_NO_ANCHOR_END)
  274. return (1);
  275. /* "dir" == "dir/" == "dir/." */
  276. s = pm_slashskip_w(s);
  277. }
  278. return (*s == L'\0');
  279. case L'?':
  280. /* ? always succeeds, unless we hit end of 's' */
  281. if (*s == L'\0')
  282. return (0);
  283. break;
  284. case L'*':
  285. /* "*" == "**" == "***" ... */
  286. while (*p == L'*')
  287. ++p;
  288. /* Trailing '*' always succeeds. */
  289. if (*p == L'\0')
  290. return (1);
  291. while (*s) {
  292. if (archive_pathmatch_w(p, s, flags))
  293. return (1);
  294. ++s;
  295. }
  296. return (0);
  297. case L'[':
  298. /*
  299. * Find the end of the [...] character class,
  300. * ignoring \] that might occur within the class.
  301. */
  302. end = p + 1;
  303. while (*end != L'\0' && *end != L']') {
  304. if (*end == L'\\' && end[1] != L'\0')
  305. ++end;
  306. ++end;
  307. }
  308. if (*end == L']') {
  309. /* We found [...], try to match it. */
  310. if (!pm_list_w(p + 1, end, *s, flags))
  311. return (0);
  312. p = end; /* Jump to trailing ']' char. */
  313. break;
  314. } else
  315. /* No final ']', so just match '['. */
  316. if (*p != *s)
  317. return (0);
  318. break;
  319. case L'\\':
  320. /* Trailing '\\' matches itself. */
  321. if (p[1] == L'\0') {
  322. if (*s != L'\\')
  323. return (0);
  324. } else {
  325. ++p;
  326. if (*p != *s)
  327. return (0);
  328. }
  329. break;
  330. case L'/':
  331. if (*s != L'/' && *s != L'\0')
  332. return (0);
  333. /* Note: pattern "/\./" won't match "/";
  334. * pm_slashskip() correctly stops at backslash. */
  335. p = pm_slashskip_w(p);
  336. s = pm_slashskip_w(s);
  337. if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END))
  338. return (1);
  339. --p; /* Counteract the increment below. */
  340. --s;
  341. break;
  342. case L'$':
  343. /* '$' is special only at end of pattern and only
  344. * if PATHMATCH_NO_ANCHOR_END is specified. */
  345. if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
  346. /* "dir" == "dir/" == "dir/." */
  347. return (*pm_slashskip_w(s) == L'\0');
  348. }
  349. /* Otherwise, '$' is not special. */
  350. /* FALL THROUGH */
  351. default:
  352. if (*p != *s)
  353. return (0);
  354. break;
  355. }
  356. ++p;
  357. ++s;
  358. }
  359. }
  360. /* Main entry point. */
  361. int
  362. __archive_pathmatch(const char *p, const char *s, int flags)
  363. {
  364. /* Empty pattern only matches the empty string. */
  365. if (p == NULL || *p == '\0')
  366. return (s == NULL || *s == '\0');
  367. /* Leading '^' anchors the start of the pattern. */
  368. if (*p == '^') {
  369. ++p;
  370. flags &= ~PATHMATCH_NO_ANCHOR_START;
  371. }
  372. if (*p == '/' && *s != '/')
  373. return (0);
  374. /* Certain patterns anchor implicitly. */
  375. if (*p == '*' || *p == '/') {
  376. while (*p == '/')
  377. ++p;
  378. while (*s == '/')
  379. ++s;
  380. return (pm(p, s, flags));
  381. }
  382. /* If start is unanchored, try to match start of each path element. */
  383. if (flags & PATHMATCH_NO_ANCHOR_START) {
  384. for ( ; s != NULL; s = strchr(s, '/')) {
  385. if (*s == '/')
  386. s++;
  387. if (pm(p, s, flags))
  388. return (1);
  389. }
  390. return (0);
  391. }
  392. /* Default: Match from beginning. */
  393. return (pm(p, s, flags));
  394. }
  395. int
  396. __archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags)
  397. {
  398. /* Empty pattern only matches the empty string. */
  399. if (p == NULL || *p == L'\0')
  400. return (s == NULL || *s == L'\0');
  401. /* Leading '^' anchors the start of the pattern. */
  402. if (*p == L'^') {
  403. ++p;
  404. flags &= ~PATHMATCH_NO_ANCHOR_START;
  405. }
  406. if (*p == L'/' && *s != L'/')
  407. return (0);
  408. /* Certain patterns anchor implicitly. */
  409. if (*p == L'*' || *p == L'/') {
  410. while (*p == L'/')
  411. ++p;
  412. while (*s == L'/')
  413. ++s;
  414. return (pm_w(p, s, flags));
  415. }
  416. /* If start is unanchored, try to match start of each path element. */
  417. if (flags & PATHMATCH_NO_ANCHOR_START) {
  418. for ( ; s != NULL; s = wcschr(s, L'/')) {
  419. if (*s == L'/')
  420. s++;
  421. if (pm_w(p, s, flags))
  422. return (1);
  423. }
  424. return (0);
  425. }
  426. /* Default: Match from beginning. */
  427. return (pm_w(p, s, flags));
  428. }