Glob.cxx 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  2. file Copyright.txt or https://cmake.org/licensing#kwsys for details. */
  3. #include "kwsysPrivate.h"
  4. #include KWSYS_HEADER(Glob.hxx)
  5. #include KWSYS_HEADER(Configure.hxx)
  6. #include KWSYS_HEADER(RegularExpression.hxx)
  7. #include KWSYS_HEADER(SystemTools.hxx)
  8. #include KWSYS_HEADER(Directory.hxx)
  9. // Work-around CMake dependency scanning limitation. This must
  10. // duplicate the above list of headers.
  11. #if 0
  12. #include "Configure.hxx.in"
  13. #include "Directory.hxx.in"
  14. #include "Glob.hxx.in"
  15. #include "RegularExpression.hxx.in"
  16. #include "SystemTools.hxx.in"
  17. #endif
  18. #include <algorithm>
  19. #include <string>
  20. #include <vector>
  21. #include <ctype.h>
  22. #include <stdio.h>
  23. #include <string.h>
  24. namespace KWSYS_NAMESPACE {
  25. #if defined(_WIN32) || defined(__APPLE__) || defined(__CYGWIN__)
  26. // On Windows and Apple, no difference between lower and upper case
  27. #define KWSYS_GLOB_CASE_INDEPENDENT
  28. #endif
  29. #if defined(_WIN32) || defined(__CYGWIN__)
  30. // Handle network paths
  31. #define KWSYS_GLOB_SUPPORT_NETWORK_PATHS
  32. #endif
  33. class GlobInternals
  34. {
  35. public:
  36. std::vector<std::string> Files;
  37. std::vector<kwsys::RegularExpression> Expressions;
  38. };
  39. Glob::Glob()
  40. {
  41. this->Internals = new GlobInternals;
  42. this->Recurse = false;
  43. this->Relative = "";
  44. this->RecurseThroughSymlinks = true;
  45. // RecurseThroughSymlinks is true by default for backwards compatibility,
  46. // not because it's a good idea...
  47. this->FollowedSymlinkCount = 0;
  48. // Keep separate variables for directory listing for back compatibility
  49. this->ListDirs = true;
  50. this->RecurseListDirs = false;
  51. }
  52. Glob::~Glob()
  53. {
  54. delete this->Internals;
  55. }
  56. std::vector<std::string>& Glob::GetFiles()
  57. {
  58. return this->Internals->Files;
  59. }
  60. std::string Glob::PatternToRegex(const std::string& pattern,
  61. bool require_whole_string, bool preserve_case)
  62. {
  63. // Incrementally build the regular expression from the pattern.
  64. std::string regex = require_whole_string ? "^" : "";
  65. std::string::const_iterator pattern_first = pattern.begin();
  66. std::string::const_iterator pattern_last = pattern.end();
  67. for (std::string::const_iterator i = pattern_first; i != pattern_last; ++i) {
  68. int c = *i;
  69. if (c == '*') {
  70. // A '*' (not between brackets) matches any string.
  71. // We modify this to not match slashes since the original glob
  72. // pattern documentation was meant for matching file name
  73. // components separated by slashes.
  74. regex += "[^/]*";
  75. } else if (c == '?') {
  76. // A '?' (not between brackets) matches any single character.
  77. // We modify this to not match slashes since the original glob
  78. // pattern documentation was meant for matching file name
  79. // components separated by slashes.
  80. regex += "[^/]";
  81. } else if (c == '[') {
  82. // Parse out the bracket expression. It begins just after the
  83. // opening character.
  84. std::string::const_iterator bracket_first = i + 1;
  85. std::string::const_iterator bracket_last = bracket_first;
  86. // The first character may be complementation '!' or '^'.
  87. if (bracket_last != pattern_last &&
  88. (*bracket_last == '!' || *bracket_last == '^')) {
  89. ++bracket_last;
  90. }
  91. // If the next character is a ']' it is included in the brackets
  92. // because the bracket string may not be empty.
  93. if (bracket_last != pattern_last && *bracket_last == ']') {
  94. ++bracket_last;
  95. }
  96. // Search for the closing ']'.
  97. while (bracket_last != pattern_last && *bracket_last != ']') {
  98. ++bracket_last;
  99. }
  100. // Check whether we have a complete bracket string.
  101. if (bracket_last == pattern_last) {
  102. // The bracket string did not end, so it was opened simply by
  103. // a '[' that is supposed to be matched literally.
  104. regex += "\\[";
  105. } else {
  106. // Convert the bracket string to its regex equivalent.
  107. std::string::const_iterator k = bracket_first;
  108. // Open the regex block.
  109. regex += "[";
  110. // A regex range complement uses '^' instead of '!'.
  111. if (k != bracket_last && *k == '!') {
  112. regex += "^";
  113. ++k;
  114. }
  115. // Convert the remaining characters.
  116. for (; k != bracket_last; ++k) {
  117. // Backslashes must be escaped.
  118. if (*k == '\\') {
  119. regex += "\\";
  120. }
  121. // Store this character.
  122. regex += *k;
  123. }
  124. // Close the regex block.
  125. regex += "]";
  126. // Jump to the end of the bracket string.
  127. i = bracket_last;
  128. }
  129. } else {
  130. // A single character matches itself.
  131. int ch = c;
  132. if (!(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
  133. ('0' <= ch && ch <= '9'))) {
  134. // Escape the non-alphanumeric character.
  135. regex += "\\";
  136. }
  137. #if defined(KWSYS_GLOB_CASE_INDEPENDENT)
  138. else {
  139. // On case-insensitive systems file names are converted to lower
  140. // case before matching.
  141. if (!preserve_case) {
  142. ch = tolower(ch);
  143. }
  144. }
  145. #endif
  146. (void)preserve_case;
  147. // Store the character.
  148. regex.append(1, static_cast<char>(ch));
  149. }
  150. }
  151. if (require_whole_string) {
  152. regex += "$";
  153. }
  154. return regex;
  155. }
  156. bool Glob::RecurseDirectory(std::string::size_type start,
  157. const std::string& dir, GlobMessages* messages)
  158. {
  159. kwsys::Directory d;
  160. if (!d.Load(dir)) {
  161. return true;
  162. }
  163. unsigned long cc;
  164. std::string realname;
  165. std::string fname;
  166. for (cc = 0; cc < d.GetNumberOfFiles(); cc++) {
  167. fname = d.GetFile(cc);
  168. if (fname == "." || fname == "..") {
  169. continue;
  170. }
  171. if (start == 0) {
  172. realname = dir + fname;
  173. } else {
  174. realname = dir + "/" + fname;
  175. }
  176. #if defined(KWSYS_GLOB_CASE_INDEPENDENT)
  177. // On Windows and Apple, no difference between lower and upper case
  178. fname = kwsys::SystemTools::LowerCase(fname);
  179. #endif
  180. bool isDir = kwsys::SystemTools::FileIsDirectory(realname);
  181. bool isSymLink = kwsys::SystemTools::FileIsSymlink(realname);
  182. if (isDir && (!isSymLink || this->RecurseThroughSymlinks)) {
  183. if (isSymLink) {
  184. ++this->FollowedSymlinkCount;
  185. std::string realPathErrorMessage;
  186. std::string canonicalPath(
  187. SystemTools::GetRealPath(dir, &realPathErrorMessage));
  188. if (!realPathErrorMessage.empty()) {
  189. if (messages) {
  190. messages->push_back(Message(
  191. Glob::error, "Canonical path generation from path '" + dir +
  192. "' failed! Reason: '" + realPathErrorMessage + "'"));
  193. }
  194. return false;
  195. }
  196. if (std::find(this->VisitedSymlinks.begin(),
  197. this->VisitedSymlinks.end(),
  198. canonicalPath) == this->VisitedSymlinks.end()) {
  199. if (this->RecurseListDirs) {
  200. // symlinks are treated as directories
  201. this->AddFile(this->Internals->Files, realname);
  202. }
  203. this->VisitedSymlinks.push_back(canonicalPath);
  204. if (!this->RecurseDirectory(start + 1, realname, messages)) {
  205. this->VisitedSymlinks.pop_back();
  206. return false;
  207. }
  208. this->VisitedSymlinks.pop_back();
  209. }
  210. // else we have already visited this symlink - prevent cyclic recursion
  211. else if (messages) {
  212. std::string message;
  213. for (std::vector<std::string>::const_iterator pathIt =
  214. std::find(this->VisitedSymlinks.begin(),
  215. this->VisitedSymlinks.end(), canonicalPath);
  216. pathIt != this->VisitedSymlinks.end(); ++pathIt) {
  217. message += *pathIt + "\n";
  218. }
  219. message += canonicalPath + "/" + fname;
  220. messages->push_back(Message(Glob::cyclicRecursion, message));
  221. }
  222. } else {
  223. if (this->RecurseListDirs) {
  224. this->AddFile(this->Internals->Files, realname);
  225. }
  226. if (!this->RecurseDirectory(start + 1, realname, messages)) {
  227. return false;
  228. }
  229. }
  230. } else {
  231. if (!this->Internals->Expressions.empty() &&
  232. this->Internals->Expressions.rbegin()->find(fname)) {
  233. this->AddFile(this->Internals->Files, realname);
  234. }
  235. }
  236. }
  237. return true;
  238. }
  239. void Glob::ProcessDirectory(std::string::size_type start,
  240. const std::string& dir, GlobMessages* messages)
  241. {
  242. // std::cout << "ProcessDirectory: " << dir << std::endl;
  243. bool last = (start == this->Internals->Expressions.size() - 1);
  244. if (last && this->Recurse) {
  245. this->RecurseDirectory(start, dir, messages);
  246. return;
  247. }
  248. if (start >= this->Internals->Expressions.size()) {
  249. return;
  250. }
  251. kwsys::Directory d;
  252. if (!d.Load(dir)) {
  253. return;
  254. }
  255. unsigned long cc;
  256. std::string realname;
  257. std::string fname;
  258. for (cc = 0; cc < d.GetNumberOfFiles(); cc++) {
  259. fname = d.GetFile(cc);
  260. if (fname == "." || fname == "..") {
  261. continue;
  262. }
  263. if (start == 0) {
  264. realname = dir + fname;
  265. } else {
  266. realname = dir + "/" + fname;
  267. }
  268. #if defined(KWSYS_GLOB_CASE_INDEPENDENT)
  269. // On case-insensitive file systems convert to lower case for matching.
  270. fname = kwsys::SystemTools::LowerCase(fname);
  271. #endif
  272. // std::cout << "Look at file: " << fname << std::endl;
  273. // std::cout << "Match: "
  274. // << this->Internals->TextExpressions[start].c_str() << std::endl;
  275. // std::cout << "Real name: " << realname << std::endl;
  276. if ((!last && !kwsys::SystemTools::FileIsDirectory(realname)) ||
  277. (!this->ListDirs && last &&
  278. kwsys::SystemTools::FileIsDirectory(realname))) {
  279. continue;
  280. }
  281. if (this->Internals->Expressions[start].find(fname)) {
  282. if (last) {
  283. this->AddFile(this->Internals->Files, realname);
  284. } else {
  285. this->ProcessDirectory(start + 1, realname, messages);
  286. }
  287. }
  288. }
  289. }
  290. bool Glob::FindFiles(const std::string& inexpr, GlobMessages* messages)
  291. {
  292. std::string cexpr;
  293. std::string::size_type cc;
  294. std::string expr = inexpr;
  295. this->Internals->Expressions.clear();
  296. this->Internals->Files.clear();
  297. if (!kwsys::SystemTools::FileIsFullPath(expr)) {
  298. expr = kwsys::SystemTools::GetCurrentWorkingDirectory();
  299. expr += "/" + inexpr;
  300. }
  301. std::string fexpr = expr;
  302. std::string::size_type skip = 0;
  303. std::string::size_type last_slash = 0;
  304. for (cc = 0; cc < expr.size(); cc++) {
  305. if (cc > 0 && expr[cc] == '/' && expr[cc - 1] != '\\') {
  306. last_slash = cc;
  307. }
  308. if (cc > 0 && (expr[cc] == '[' || expr[cc] == '?' || expr[cc] == '*') &&
  309. expr[cc - 1] != '\\') {
  310. break;
  311. }
  312. }
  313. if (last_slash > 0) {
  314. // std::cout << "I can skip: " << fexpr.substr(0, last_slash)
  315. // << std::endl;
  316. skip = last_slash;
  317. }
  318. if (skip == 0) {
  319. #if defined(KWSYS_GLOB_SUPPORT_NETWORK_PATHS)
  320. // Handle network paths
  321. if (expr[0] == '/' && expr[1] == '/') {
  322. int cnt = 0;
  323. for (cc = 2; cc < expr.size(); cc++) {
  324. if (expr[cc] == '/') {
  325. cnt++;
  326. if (cnt == 2) {
  327. break;
  328. }
  329. }
  330. }
  331. skip = int(cc + 1);
  332. } else
  333. #endif
  334. // Handle drive letters on Windows
  335. if (expr[1] == ':' && expr[0] != '/') {
  336. skip = 2;
  337. }
  338. }
  339. if (skip > 0) {
  340. expr = expr.substr(skip);
  341. }
  342. cexpr = "";
  343. for (cc = 0; cc < expr.size(); cc++) {
  344. int ch = expr[cc];
  345. if (ch == '/') {
  346. if (!cexpr.empty()) {
  347. this->AddExpression(cexpr);
  348. }
  349. cexpr = "";
  350. } else {
  351. cexpr.append(1, static_cast<char>(ch));
  352. }
  353. }
  354. if (!cexpr.empty()) {
  355. this->AddExpression(cexpr);
  356. }
  357. // Handle network paths
  358. if (skip > 0) {
  359. this->ProcessDirectory(0, fexpr.substr(0, skip) + "/", messages);
  360. } else {
  361. this->ProcessDirectory(0, "/", messages);
  362. }
  363. return true;
  364. }
  365. void Glob::AddExpression(const std::string& expr)
  366. {
  367. this->Internals->Expressions.push_back(
  368. kwsys::RegularExpression(this->PatternToRegex(expr)));
  369. }
  370. void Glob::SetRelative(const char* dir)
  371. {
  372. if (!dir) {
  373. this->Relative = "";
  374. return;
  375. }
  376. this->Relative = dir;
  377. }
  378. const char* Glob::GetRelative()
  379. {
  380. if (this->Relative.empty()) {
  381. return KWSYS_NULLPTR;
  382. }
  383. return this->Relative.c_str();
  384. }
  385. void Glob::AddFile(std::vector<std::string>& files, const std::string& file)
  386. {
  387. if (!this->Relative.empty()) {
  388. files.push_back(kwsys::SystemTools::RelativePath(this->Relative, file));
  389. } else {
  390. files.push_back(file);
  391. }
  392. }
  393. } // namespace KWSYS_NAMESPACE