Find.pm 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151
  1. package File::Find;
  2. use 5.006;
  3. use strict;
  4. use warnings;
  5. use warnings::register;
  6. our $VERSION = '1.29';
  7. require Exporter;
  8. require Cwd;
  9. our @ISA = qw(Exporter);
  10. our @EXPORT = qw(find finddepth);
  11. use strict;
  12. my $Is_VMS;
  13. my $Is_Win32;
  14. require File::Basename;
  15. require File::Spec;
  16. # Should ideally be my() not our() but local() currently
  17. # refuses to operate on lexicals
  18. our %SLnkSeen;
  19. our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
  20. $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
  21. $pre_process, $post_process, $dangling_symlinks);
  22. sub contract_name {
  23. my ($cdir,$fn) = @_;
  24. return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
  25. $cdir = substr($cdir,0,rindex($cdir,'/')+1);
  26. $fn =~ s|^\./||;
  27. my $abs_name= $cdir . $fn;
  28. if (substr($fn,0,3) eq '../') {
  29. 1 while $abs_name =~ s!/[^/]*/\.\./+!/!;
  30. }
  31. return $abs_name;
  32. }
  33. sub PathCombine($$) {
  34. my ($Base,$Name) = @_;
  35. my $AbsName;
  36. if (substr($Name,0,1) eq '/') {
  37. $AbsName= $Name;
  38. }
  39. else {
  40. $AbsName= contract_name($Base,$Name);
  41. }
  42. # (simple) check for recursion
  43. my $newlen= length($AbsName);
  44. if ($newlen <= length($Base)) {
  45. if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
  46. && $AbsName eq substr($Base,0,$newlen))
  47. {
  48. return undef;
  49. }
  50. }
  51. return $AbsName;
  52. }
  53. sub Follow_SymLink($) {
  54. my ($AbsName) = @_;
  55. my ($NewName,$DEV, $INO);
  56. ($DEV, $INO)= lstat $AbsName;
  57. while (-l _) {
  58. if ($SLnkSeen{$DEV, $INO}++) {
  59. if ($follow_skip < 2) {
  60. die "$AbsName is encountered a second time";
  61. }
  62. else {
  63. return undef;
  64. }
  65. }
  66. $NewName= PathCombine($AbsName, readlink($AbsName));
  67. unless(defined $NewName) {
  68. if ($follow_skip < 2) {
  69. die "$AbsName is a recursive symbolic link";
  70. }
  71. else {
  72. return undef;
  73. }
  74. }
  75. else {
  76. $AbsName= $NewName;
  77. }
  78. ($DEV, $INO) = lstat($AbsName);
  79. return undef unless defined $DEV; # dangling symbolic link
  80. }
  81. if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
  82. if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
  83. die "$AbsName encountered a second time";
  84. }
  85. else {
  86. return undef;
  87. }
  88. }
  89. return $AbsName;
  90. }
  91. our($dir, $name, $fullname, $prune);
  92. sub _find_dir_symlnk($$$);
  93. sub _find_dir($$$);
  94. # check whether or not a scalar variable is tainted
  95. # (code straight from the Camel, 3rd ed., page 561)
  96. sub is_tainted_pp {
  97. my $arg = shift;
  98. my $nada = substr($arg, 0, 0); # zero-length
  99. local $@;
  100. eval { eval "# $nada" };
  101. return length($@) != 0;
  102. }
  103. sub _find_opt {
  104. my $wanted = shift;
  105. die "invalid top directory" unless defined $_[0];
  106. # This function must local()ize everything because callbacks may
  107. # call find() or finddepth()
  108. local %SLnkSeen;
  109. local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
  110. $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
  111. $pre_process, $post_process, $dangling_symlinks);
  112. local($dir, $name, $fullname, $prune);
  113. local *_ = \my $a;
  114. my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
  115. if ($Is_VMS) {
  116. # VMS returns this by default in VMS format which just doesn't
  117. # work for the rest of this module.
  118. $cwd = VMS::Filespec::unixpath($cwd);
  119. # Apparently this is not expected to have a trailing space.
  120. # To attempt to make VMS/UNIX conversions mostly reversible,
  121. # a trailing slash is needed. The run-time functions ignore the
  122. # resulting double slash, but it causes the perl tests to fail.
  123. $cwd =~ s#/\z##;
  124. # This comes up in upper case now, but should be lower.
  125. # In the future this could be exact case, no need to change.
  126. }
  127. my $cwd_untainted = $cwd;
  128. my $check_t_cwd = 1;
  129. $wanted_callback = $wanted->{wanted};
  130. $bydepth = $wanted->{bydepth};
  131. $pre_process = $wanted->{preprocess};
  132. $post_process = $wanted->{postprocess};
  133. $no_chdir = $wanted->{no_chdir};
  134. $full_check = $Is_Win32 ? 0 : $wanted->{follow};
  135. $follow = $Is_Win32 ? 0 :
  136. $full_check || $wanted->{follow_fast};
  137. $follow_skip = $wanted->{follow_skip};
  138. $untaint = $wanted->{untaint};
  139. $untaint_pat = $wanted->{untaint_pattern};
  140. $untaint_skip = $wanted->{untaint_skip};
  141. $dangling_symlinks = $wanted->{dangling_symlinks};
  142. # for compatibility reasons (find.pl, find2perl)
  143. local our ($topdir, $topdev, $topino, $topmode, $topnlink);
  144. # a symbolic link to a directory doesn't increase the link count
  145. $avoid_nlink = $follow || $File::Find::dont_use_nlink;
  146. my ($abs_dir, $Is_Dir);
  147. Proc_Top_Item:
  148. foreach my $TOP (@_) {
  149. my $top_item = $TOP;
  150. $top_item = VMS::Filespec::unixify($top_item) if $Is_VMS;
  151. ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
  152. if ($Is_Win32) {
  153. $top_item =~ s|[/\\]\z||
  154. unless $top_item =~ m{^(?:\w:)?[/\\]$};
  155. }
  156. else {
  157. $top_item =~ s|/\z|| unless $top_item eq '/';
  158. }
  159. $Is_Dir= 0;
  160. if ($follow) {
  161. if (substr($top_item,0,1) eq '/') {
  162. $abs_dir = $top_item;
  163. }
  164. elsif ($top_item eq $File::Find::current_dir) {
  165. $abs_dir = $cwd;
  166. }
  167. else { # care about any ../
  168. $top_item =~ s/\.dir\z//i if $Is_VMS;
  169. $abs_dir = contract_name("$cwd/",$top_item);
  170. }
  171. $abs_dir= Follow_SymLink($abs_dir);
  172. unless (defined $abs_dir) {
  173. if ($dangling_symlinks) {
  174. if (ref $dangling_symlinks eq 'CODE') {
  175. $dangling_symlinks->($top_item, $cwd);
  176. } else {
  177. warnings::warnif "$top_item is a dangling symbolic link\n";
  178. }
  179. }
  180. next Proc_Top_Item;
  181. }
  182. if (-d _) {
  183. $top_item =~ s/\.dir\z//i if $Is_VMS;
  184. _find_dir_symlnk($wanted, $abs_dir, $top_item);
  185. $Is_Dir= 1;
  186. }
  187. }
  188. else { # no follow
  189. $topdir = $top_item;
  190. unless (defined $topnlink) {
  191. warnings::warnif "Can't stat $top_item: $!\n";
  192. next Proc_Top_Item;
  193. }
  194. if (-d _) {
  195. $top_item =~ s/\.dir\z//i if $Is_VMS;
  196. _find_dir($wanted, $top_item, $topnlink);
  197. $Is_Dir= 1;
  198. }
  199. else {
  200. $abs_dir= $top_item;
  201. }
  202. }
  203. unless ($Is_Dir) {
  204. unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
  205. ($dir,$_) = ('./', $top_item);
  206. }
  207. $abs_dir = $dir;
  208. if (( $untaint ) && (is_tainted($dir) )) {
  209. ( $abs_dir ) = $dir =~ m|$untaint_pat|;
  210. unless (defined $abs_dir) {
  211. if ($untaint_skip == 0) {
  212. die "directory $dir is still tainted";
  213. }
  214. else {
  215. next Proc_Top_Item;
  216. }
  217. }
  218. }
  219. unless ($no_chdir || chdir $abs_dir) {
  220. warnings::warnif "Couldn't chdir $abs_dir: $!\n";
  221. next Proc_Top_Item;
  222. }
  223. $name = $abs_dir . $_; # $File::Find::name
  224. $_ = $name if $no_chdir;
  225. { $wanted_callback->() }; # protect against wild "next"
  226. }
  227. unless ( $no_chdir ) {
  228. if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
  229. ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
  230. unless (defined $cwd_untainted) {
  231. die "insecure cwd in find(depth)";
  232. }
  233. $check_t_cwd = 0;
  234. }
  235. unless (chdir $cwd_untainted) {
  236. die "Can't cd to $cwd: $!\n";
  237. }
  238. }
  239. }
  240. }
  241. # API:
  242. # $wanted
  243. # $p_dir : "parent directory"
  244. # $nlink : what came back from the stat
  245. # preconditions:
  246. # chdir (if not no_chdir) to dir
  247. sub _find_dir($$$) {
  248. my ($wanted, $p_dir, $nlink) = @_;
  249. my ($CdLvl,$Level) = (0,0);
  250. my @Stack;
  251. my @filenames;
  252. my ($subcount,$sub_nlink);
  253. my $SE= [];
  254. my $dir_name= $p_dir;
  255. my $dir_pref;
  256. my $dir_rel = $File::Find::current_dir;
  257. my $tainted = 0;
  258. my $no_nlink;
  259. if ($Is_Win32) {
  260. $dir_pref
  261. = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" );
  262. } elsif ($Is_VMS) {
  263. # VMS is returning trailing .dir on directories
  264. # and trailing . on files and symbolic links
  265. # in UNIX syntax.
  266. #
  267. $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.';
  268. $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" );
  269. }
  270. else {
  271. $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
  272. }
  273. local ($dir, $name, $prune, *DIR);
  274. unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
  275. my $udir = $p_dir;
  276. if (( $untaint ) && (is_tainted($p_dir) )) {
  277. ( $udir ) = $p_dir =~ m|$untaint_pat|;
  278. unless (defined $udir) {
  279. if ($untaint_skip == 0) {
  280. die "directory $p_dir is still tainted";
  281. }
  282. else {
  283. return;
  284. }
  285. }
  286. }
  287. unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
  288. warnings::warnif "Can't cd to $udir: $!\n";
  289. return;
  290. }
  291. }
  292. # push the starting directory
  293. push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
  294. while (defined $SE) {
  295. unless ($bydepth) {
  296. $dir= $p_dir; # $File::Find::dir
  297. $name= $dir_name; # $File::Find::name
  298. $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
  299. # prune may happen here
  300. $prune= 0;
  301. { $wanted_callback->() }; # protect against wild "next"
  302. next if $prune;
  303. }
  304. # change to that directory
  305. unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
  306. my $udir= $dir_rel;
  307. if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
  308. ( $udir ) = $dir_rel =~ m|$untaint_pat|;
  309. unless (defined $udir) {
  310. if ($untaint_skip == 0) {
  311. die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
  312. } else { # $untaint_skip == 1
  313. next;
  314. }
  315. }
  316. }
  317. unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
  318. warnings::warnif "Can't cd to (" .
  319. ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
  320. next;
  321. }
  322. $CdLvl++;
  323. }
  324. $dir= $dir_name; # $File::Find::dir
  325. # Get the list of files in the current directory.
  326. unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
  327. warnings::warnif "Can't opendir($dir_name): $!\n";
  328. next;
  329. }
  330. @filenames = readdir DIR;
  331. closedir(DIR);
  332. @filenames = $pre_process->(@filenames) if $pre_process;
  333. push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
  334. # default: use whatever was specified
  335. # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
  336. $no_nlink = $avoid_nlink;
  337. # if dir has wrong nlink count, force switch to slower stat method
  338. $no_nlink = 1 if ($nlink < 2);
  339. if ($nlink == 2 && !$no_nlink) {
  340. # This dir has no subdirectories.
  341. for my $FN (@filenames) {
  342. if ($Is_VMS) {
  343. # Big hammer here - Compensate for VMS trailing . and .dir
  344. # No win situation until this is changed, but this
  345. # will handle the majority of the cases with breaking the fewest
  346. $FN =~ s/\.dir\z//i;
  347. $FN =~ s#\.$## if ($FN ne '.');
  348. }
  349. next if $FN =~ $File::Find::skip_pattern;
  350. $name = $dir_pref . $FN; # $File::Find::name
  351. $_ = ($no_chdir ? $name : $FN); # $_
  352. { $wanted_callback->() }; # protect against wild "next"
  353. }
  354. }
  355. else {
  356. # This dir has subdirectories.
  357. $subcount = $nlink - 2;
  358. # HACK: insert directories at this position. so as to preserve
  359. # the user pre-processed ordering of files.
  360. # EG: directory traversal is in user sorted order, not at random.
  361. my $stack_top = @Stack;
  362. for my $FN (@filenames) {
  363. next if $FN =~ $File::Find::skip_pattern;
  364. if ($subcount > 0 || $no_nlink) {
  365. # Seen all the subdirs?
  366. # check for directoriness.
  367. # stat is faster for a file in the current directory
  368. $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
  369. if (-d _) {
  370. --$subcount;
  371. $FN =~ s/\.dir\z//i if $Is_VMS;
  372. # HACK: replace push to preserve dir traversal order
  373. #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
  374. splice @Stack, $stack_top, 0,
  375. [$CdLvl,$dir_name,$FN,$sub_nlink];
  376. }
  377. else {
  378. $name = $dir_pref . $FN; # $File::Find::name
  379. $_= ($no_chdir ? $name : $FN); # $_
  380. { $wanted_callback->() }; # protect against wild "next"
  381. }
  382. }
  383. else {
  384. $name = $dir_pref . $FN; # $File::Find::name
  385. $_= ($no_chdir ? $name : $FN); # $_
  386. { $wanted_callback->() }; # protect against wild "next"
  387. }
  388. }
  389. }
  390. }
  391. continue {
  392. while ( defined ($SE = pop @Stack) ) {
  393. ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
  394. if ($CdLvl > $Level && !$no_chdir) {
  395. my $tmp;
  396. if ($Is_VMS) {
  397. $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']';
  398. }
  399. else {
  400. $tmp = join('/',('..') x ($CdLvl-$Level));
  401. }
  402. die "Can't cd to $tmp from $dir_name: $!"
  403. unless chdir ($tmp);
  404. $CdLvl = $Level;
  405. }
  406. if ($Is_Win32) {
  407. $dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$}
  408. ? "$p_dir$dir_rel" : "$p_dir/$dir_rel");
  409. $dir_pref = "$dir_name/";
  410. }
  411. elsif ($^O eq 'VMS') {
  412. if ($p_dir =~ m/[\]>]+$/) {
  413. $dir_name = $p_dir;
  414. $dir_name =~ s/([\]>]+)$/.$dir_rel$1/;
  415. $dir_pref = $dir_name;
  416. }
  417. else {
  418. $dir_name = "$p_dir/$dir_rel";
  419. $dir_pref = "$dir_name/";
  420. }
  421. }
  422. else {
  423. $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
  424. $dir_pref = "$dir_name/";
  425. }
  426. if ( $nlink == -2 ) {
  427. $name = $dir = $p_dir; # $File::Find::name / dir
  428. $_ = $File::Find::current_dir;
  429. $post_process->(); # End-of-directory processing
  430. }
  431. elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
  432. $name = $dir_name;
  433. if ( substr($name,-2) eq '/.' ) {
  434. substr($name, length($name) == 2 ? -1 : -2) = '';
  435. }
  436. $dir = $p_dir;
  437. $_ = ($no_chdir ? $dir_name : $dir_rel );
  438. if ( substr($_,-2) eq '/.' ) {
  439. substr($_, length($_) == 2 ? -1 : -2) = '';
  440. }
  441. { $wanted_callback->() }; # protect against wild "next"
  442. }
  443. else {
  444. push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
  445. last;
  446. }
  447. }
  448. }
  449. }
  450. # API:
  451. # $wanted
  452. # $dir_loc : absolute location of a dir
  453. # $p_dir : "parent directory"
  454. # preconditions:
  455. # chdir (if not no_chdir) to dir
  456. sub _find_dir_symlnk($$$) {
  457. my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
  458. my @Stack;
  459. my @filenames;
  460. my $new_loc;
  461. my $updir_loc = $dir_loc; # untainted parent directory
  462. my $SE = [];
  463. my $dir_name = $p_dir;
  464. my $dir_pref;
  465. my $loc_pref;
  466. my $dir_rel = $File::Find::current_dir;
  467. my $byd_flag; # flag for pending stack entry if $bydepth
  468. my $tainted = 0;
  469. my $ok = 1;
  470. $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
  471. $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
  472. local ($dir, $name, $fullname, $prune, *DIR);
  473. unless ($no_chdir) {
  474. # untaint the topdir
  475. if (( $untaint ) && (is_tainted($dir_loc) )) {
  476. ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
  477. # once untainted, $updir_loc is pushed on the stack (as parent directory);
  478. # hence, we don't need to untaint the parent directory every time we chdir
  479. # to it later
  480. unless (defined $updir_loc) {
  481. if ($untaint_skip == 0) {
  482. die "directory $dir_loc is still tainted";
  483. }
  484. else {
  485. return;
  486. }
  487. }
  488. }
  489. $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
  490. unless ($ok) {
  491. warnings::warnif "Can't cd to $updir_loc: $!\n";
  492. return;
  493. }
  494. }
  495. push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
  496. while (defined $SE) {
  497. unless ($bydepth) {
  498. # change (back) to parent directory (always untainted)
  499. unless ($no_chdir) {
  500. unless (chdir $updir_loc) {
  501. warnings::warnif "Can't cd to $updir_loc: $!\n";
  502. next;
  503. }
  504. }
  505. $dir= $p_dir; # $File::Find::dir
  506. $name= $dir_name; # $File::Find::name
  507. $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
  508. $fullname= $dir_loc; # $File::Find::fullname
  509. # prune may happen here
  510. $prune= 0;
  511. lstat($_); # make sure file tests with '_' work
  512. { $wanted_callback->() }; # protect against wild "next"
  513. next if $prune;
  514. }
  515. # change to that directory
  516. unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
  517. $updir_loc = $dir_loc;
  518. if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
  519. # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
  520. ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
  521. unless (defined $updir_loc) {
  522. if ($untaint_skip == 0) {
  523. die "directory $dir_loc is still tainted";
  524. }
  525. else {
  526. next;
  527. }
  528. }
  529. }
  530. unless (chdir $updir_loc) {
  531. warnings::warnif "Can't cd to $updir_loc: $!\n";
  532. next;
  533. }
  534. }
  535. $dir = $dir_name; # $File::Find::dir
  536. # Get the list of files in the current directory.
  537. unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
  538. warnings::warnif "Can't opendir($dir_loc): $!\n";
  539. next;
  540. }
  541. @filenames = readdir DIR;
  542. closedir(DIR);
  543. for my $FN (@filenames) {
  544. if ($Is_VMS) {
  545. # Big hammer here - Compensate for VMS trailing . and .dir
  546. # No win situation until this is changed, but this
  547. # will handle the majority of the cases with breaking the fewest.
  548. $FN =~ s/\.dir\z//i;
  549. $FN =~ s#\.$## if ($FN ne '.');
  550. }
  551. next if $FN =~ $File::Find::skip_pattern;
  552. # follow symbolic links / do an lstat
  553. $new_loc = Follow_SymLink($loc_pref.$FN);
  554. # ignore if invalid symlink
  555. unless (defined $new_loc) {
  556. if (!defined -l _ && $dangling_symlinks) {
  557. $fullname = undef;
  558. if (ref $dangling_symlinks eq 'CODE') {
  559. $dangling_symlinks->($FN, $dir_pref);
  560. } else {
  561. warnings::warnif "$dir_pref$FN is a dangling symbolic link\n";
  562. }
  563. }
  564. else {
  565. $fullname = $loc_pref . $FN;
  566. }
  567. $name = $dir_pref . $FN;
  568. $_ = ($no_chdir ? $name : $FN);
  569. { $wanted_callback->() };
  570. next;
  571. }
  572. if (-d _) {
  573. if ($Is_VMS) {
  574. $FN =~ s/\.dir\z//i;
  575. $FN =~ s#\.$## if ($FN ne '.');
  576. $new_loc =~ s/\.dir\z//i;
  577. $new_loc =~ s#\.$## if ($new_loc ne '.');
  578. }
  579. push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
  580. }
  581. else {
  582. $fullname = $new_loc; # $File::Find::fullname
  583. $name = $dir_pref . $FN; # $File::Find::name
  584. $_ = ($no_chdir ? $name : $FN); # $_
  585. { $wanted_callback->() }; # protect against wild "next"
  586. }
  587. }
  588. }
  589. continue {
  590. while (defined($SE = pop @Stack)) {
  591. ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
  592. $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
  593. $dir_pref = "$dir_name/";
  594. $loc_pref = "$dir_loc/";
  595. if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
  596. unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
  597. unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
  598. warnings::warnif "Can't cd to $updir_loc: $!\n";
  599. next;
  600. }
  601. }
  602. $fullname = $dir_loc; # $File::Find::fullname
  603. $name = $dir_name; # $File::Find::name
  604. if ( substr($name,-2) eq '/.' ) {
  605. substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
  606. }
  607. $dir = $p_dir; # $File::Find::dir
  608. $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
  609. if ( substr($_,-2) eq '/.' ) {
  610. substr($_, length($_) == 2 ? -1 : -2) = '';
  611. }
  612. lstat($_); # make sure file tests with '_' work
  613. { $wanted_callback->() }; # protect against wild "next"
  614. }
  615. else {
  616. push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
  617. last;
  618. }
  619. }
  620. }
  621. }
  622. sub wrap_wanted {
  623. my $wanted = shift;
  624. if ( ref($wanted) eq 'HASH' ) {
  625. # RT #122547
  626. my %valid_options = map {$_ => 1} qw(
  627. wanted
  628. bydepth
  629. preprocess
  630. postprocess
  631. follow
  632. follow_fast
  633. follow_skip
  634. dangling_symlinks
  635. no_chdir
  636. untaint
  637. untaint_pattern
  638. untaint_skip
  639. );
  640. my @invalid_options = ();
  641. for my $v (keys %{$wanted}) {
  642. push @invalid_options, $v unless exists $valid_options{$v};
  643. }
  644. warn "Invalid option(s): @invalid_options" if @invalid_options;
  645. unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) {
  646. die 'no &wanted subroutine given';
  647. }
  648. if ( $wanted->{follow} || $wanted->{follow_fast}) {
  649. $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
  650. }
  651. if ( $wanted->{untaint} ) {
  652. $wanted->{untaint_pattern} = $File::Find::untaint_pattern
  653. unless defined $wanted->{untaint_pattern};
  654. $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
  655. }
  656. return $wanted;
  657. }
  658. elsif( ref( $wanted ) eq 'CODE' ) {
  659. return { wanted => $wanted };
  660. }
  661. else {
  662. die 'no &wanted subroutine given';
  663. }
  664. }
  665. sub find {
  666. my $wanted = shift;
  667. _find_opt(wrap_wanted($wanted), @_);
  668. }
  669. sub finddepth {
  670. my $wanted = wrap_wanted(shift);
  671. $wanted->{bydepth} = 1;
  672. _find_opt($wanted, @_);
  673. }
  674. # default
  675. $File::Find::skip_pattern = qr/^\.{1,2}\z/;
  676. $File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
  677. # These are hard-coded for now, but may move to hint files.
  678. if ($^O eq 'VMS') {
  679. $Is_VMS = 1;
  680. $File::Find::dont_use_nlink = 1;
  681. }
  682. elsif ($^O eq 'MSWin32') {
  683. $Is_Win32 = 1;
  684. }
  685. # this _should_ work properly on all platforms
  686. # where File::Find can be expected to work
  687. $File::Find::current_dir = File::Spec->curdir || '.';
  688. $File::Find::dont_use_nlink = 1
  689. if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $Is_Win32 ||
  690. $^O eq 'interix' || $^O eq 'cygwin' || $^O eq 'qnx' || $^O eq 'nto';
  691. # Set dont_use_nlink in your hint file if your system's stat doesn't
  692. # report the number of links in a directory as an indication
  693. # of the number of files.
  694. # See, e.g. hints/machten.sh for MachTen 2.2.
  695. unless ($File::Find::dont_use_nlink) {
  696. require Config;
  697. $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
  698. }
  699. # We need a function that checks if a scalar is tainted. Either use the
  700. # Scalar::Util module's tainted() function or our (slower) pure Perl
  701. # fallback is_tainted_pp()
  702. {
  703. local $@;
  704. eval { require Scalar::Util };
  705. *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
  706. }
  707. 1;
  708. __END__
  709. #
  710. # Modified to ensure sub-directory traversal order is not inverted by stack
  711. # push and pops. That is remains in the same order as in the directory file,
  712. # or user pre-processing (EG:sorted).
  713. #
  714. =head1 NAME
  715. File::Find - Traverse a directory tree.
  716. =head1 SYNOPSIS
  717. use File::Find;
  718. find(\&wanted, @directories_to_search);
  719. sub wanted { ... }
  720. use File::Find;
  721. finddepth(\&wanted, @directories_to_search);
  722. sub wanted { ... }
  723. use File::Find;
  724. find({ wanted => \&process, follow => 1 }, '.');
  725. =head1 DESCRIPTION
  726. These are functions for searching through directory trees doing work
  727. on each file found similar to the Unix I<find> command. File::Find
  728. exports two functions, C<find> and C<finddepth>. They work similarly
  729. but have subtle differences.
  730. =over 4
  731. =item B<find>
  732. find(\&wanted, @directories);
  733. find(\%options, @directories);
  734. C<find()> does a depth-first search over the given C<@directories> in
  735. the order they are given. For each file or directory found, it calls
  736. the C<&wanted> subroutine. (See below for details on how to use the
  737. C<&wanted> function). Additionally, for each directory found, it will
  738. C<chdir()> into that directory and continue the search, invoking the
  739. C<&wanted> function on each file or subdirectory in the directory.
  740. =item B<finddepth>
  741. finddepth(\&wanted, @directories);
  742. finddepth(\%options, @directories);
  743. C<finddepth()> works just like C<find()> except that it invokes the
  744. C<&wanted> function for a directory I<after> invoking it for the
  745. directory's contents. It does a postorder traversal instead of a
  746. preorder traversal, working from the bottom of the directory tree up
  747. where C<find()> works from the top of the tree down.
  748. =back
  749. =head2 %options
  750. The first argument to C<find()> is either a code reference to your
  751. C<&wanted> function, or a hash reference describing the operations
  752. to be performed for each file. The
  753. code reference is described in L<The wanted function> below.
  754. Here are the possible keys for the hash:
  755. =over 3
  756. =item C<wanted>
  757. The value should be a code reference. This code reference is
  758. described in L<The wanted function> below. The C<&wanted> subroutine is
  759. mandatory.
  760. =item C<bydepth>
  761. Reports the name of a directory only AFTER all its entries
  762. have been reported. Entry point C<finddepth()> is a shortcut for
  763. specifying C<< { bydepth => 1 } >> in the first argument of C<find()>.
  764. =item C<preprocess>
  765. The value should be a code reference. This code reference is used to
  766. preprocess the current directory. The name of the currently processed
  767. directory is in C<$File::Find::dir>. Your preprocessing function is
  768. called after C<readdir()>, but before the loop that calls the C<wanted()>
  769. function. It is called with a list of strings (actually file/directory
  770. names) and is expected to return a list of strings. The code can be
  771. used to sort the file/directory names alphabetically, numerically,
  772. or to filter out directory entries based on their name alone. When
  773. I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
  774. =item C<postprocess>
  775. The value should be a code reference. It is invoked just before leaving
  776. the currently processed directory. It is called in void context with no
  777. arguments. The name of the current directory is in C<$File::Find::dir>. This
  778. hook is handy for summarizing a directory, such as calculating its disk
  779. usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
  780. no-op.
  781. =item C<follow>
  782. Causes symbolic links to be followed. Since directory trees with symbolic
  783. links (followed) may contain files more than once and may even have
  784. cycles, a hash has to be built up with an entry for each file.
  785. This might be expensive both in space and time for a large
  786. directory tree. See L</follow_fast> and L</follow_skip> below.
  787. If either I<follow> or I<follow_fast> is in effect:
  788. =over 6
  789. =item *
  790. It is guaranteed that an I<lstat> has been called before the user's
  791. C<wanted()> function is called. This enables fast file checks involving S<_>.
  792. Note that this guarantee no longer holds if I<follow> or I<follow_fast>
  793. are not set.
  794. =item *
  795. There is a variable C<$File::Find::fullname> which holds the absolute
  796. pathname of the file with all symbolic links resolved. If the link is
  797. a dangling symbolic link, then fullname will be set to C<undef>.
  798. =back
  799. This is a no-op on Win32.
  800. =item C<follow_fast>
  801. This is similar to I<follow> except that it may report some files more
  802. than once. It does detect cycles, however. Since only symbolic links
  803. have to be hashed, this is much cheaper both in space and time. If
  804. processing a file more than once (by the user's C<wanted()> function)
  805. is worse than just taking time, the option I<follow> should be used.
  806. This is also a no-op on Win32.
  807. =item C<follow_skip>
  808. C<follow_skip==1>, which is the default, causes all files which are
  809. neither directories nor symbolic links to be ignored if they are about
  810. to be processed a second time. If a directory or a symbolic link
  811. are about to be processed a second time, File::Find dies.
  812. C<follow_skip==0> causes File::Find to die if any file is about to be
  813. processed a second time.
  814. C<follow_skip==2> causes File::Find to ignore any duplicate files and
  815. directories but to proceed normally otherwise.
  816. =item C<dangling_symlinks>
  817. If true and a code reference, will be called with the symbolic link
  818. name and the directory it lives in as arguments. Otherwise, if true
  819. and warnings are on, warning "symbolic_link_name is a dangling
  820. symbolic link\n" will be issued. If false, the dangling symbolic link
  821. will be silently ignored.
  822. =item C<no_chdir>
  823. Does not C<chdir()> to each directory as it recurses. The C<wanted()>
  824. function will need to be aware of this, of course. In this case,
  825. C<$_> will be the same as C<$File::Find::name>.
  826. =item C<untaint>
  827. If find is used in taint-mode (-T command line switch or if EUID != UID
  828. or if EGID != GID) then internally directory names have to be untainted
  829. before they can be chdir'ed to. Therefore they are checked against a regular
  830. expression I<untaint_pattern>. Note that all names passed to the user's
  831. I<wanted()> function are still tainted. If this option is used while
  832. not in taint-mode, C<untaint> is a no-op.
  833. =item C<untaint_pattern>
  834. See above. This should be set using the C<qr> quoting operator.
  835. The default is set to C<qr|^([-+@\w./]+)$|>.
  836. Note that the parentheses are vital.
  837. =item C<untaint_skip>
  838. If set, a directory which fails the I<untaint_pattern> is skipped,
  839. including all its sub-directories. The default is to 'die' in such a case.
  840. =back
  841. =head2 The wanted function
  842. The C<wanted()> function does whatever verifications you want on
  843. each file and directory. Note that despite its name, the C<wanted()>
  844. function is a generic callback function, and does B<not> tell
  845. File::Find if a file is "wanted" or not. In fact, its return value
  846. is ignored.
  847. The wanted function takes no arguments but rather does its work
  848. through a collection of variables.
  849. =over 4
  850. =item C<$File::Find::dir> is the current directory name,
  851. =item C<$_> is the current filename within that directory
  852. =item C<$File::Find::name> is the complete pathname to the file.
  853. =back
  854. The above variables have all been localized and may be changed without
  855. affecting data outside of the wanted function.
  856. For example, when examining the file F</some/path/foo.ext> you will have:
  857. $File::Find::dir = /some/path/
  858. $_ = foo.ext
  859. $File::Find::name = /some/path/foo.ext
  860. You are chdir()'d to C<$File::Find::dir> when the function is called,
  861. unless C<no_chdir> was specified. Note that when changing to
  862. directories is in effect the root directory (F</>) is a somewhat
  863. special case inasmuch as the concatenation of C<$File::Find::dir>,
  864. C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
  865. table below summarizes all variants:
  866. $File::Find::name $File::Find::dir $_
  867. default / / .
  868. no_chdir=>0 /etc / etc
  869. /etc/x /etc x
  870. no_chdir=>1 / / /
  871. /etc / /etc
  872. /etc/x /etc /etc/x
  873. When C<follow> or C<follow_fast> are in effect, there is
  874. also a C<$File::Find::fullname>. The function may set
  875. C<$File::Find::prune> to prune the tree unless C<bydepth> was
  876. specified. Unless C<follow> or C<follow_fast> is specified, for
  877. compatibility reasons (find.pl, find2perl) there are in addition the
  878. following globals available: C<$File::Find::topdir>,
  879. C<$File::Find::topdev>, C<$File::Find::topino>,
  880. C<$File::Find::topmode> and C<$File::Find::topnlink>.
  881. This library is useful for the C<find2perl> tool, which when fed,
  882. find2perl / -name .nfs\* -mtime +7 \
  883. -exec rm -f {} \; -o -fstype nfs -prune
  884. produces something like:
  885. sub wanted {
  886. /^\.nfs.*\z/s &&
  887. (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
  888. int(-M _) > 7 &&
  889. unlink($_)
  890. ||
  891. ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
  892. $dev < 0 &&
  893. ($File::Find::prune = 1);
  894. }
  895. Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
  896. filehandle that caches the information from the preceding
  897. C<stat()>, C<lstat()>, or filetest.
  898. Here's another interesting wanted function. It will find all symbolic
  899. links that don't resolve:
  900. sub wanted {
  901. -l && !-e && print "bogus link: $File::Find::name\n";
  902. }
  903. Note that you may mix directories and (non-directory) files in the list of
  904. directories to be searched by the C<wanted()> function.
  905. find(\&wanted, "./foo", "./bar", "./baz/epsilon");
  906. In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be
  907. evaluated by C<wanted()>.
  908. See also the script C<pfind> on CPAN for a nice application of this
  909. module.
  910. =head1 WARNINGS
  911. If you run your program with the C<-w> switch, or if you use the
  912. C<warnings> pragma, File::Find will report warnings for several weird
  913. situations. You can disable these warnings by putting the statement
  914. no warnings 'File::Find';
  915. in the appropriate scope. See L<warnings> for more info about lexical
  916. warnings.
  917. =head1 CAVEAT
  918. =over 2
  919. =item $dont_use_nlink
  920. You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
  921. force File::Find to always stat directories. This was used for file systems
  922. that do not have an C<nlink> count matching the number of sub-directories.
  923. Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
  924. system) and a couple of others.
  925. You shouldn't need to set this variable, since File::Find should now detect
  926. such file systems on-the-fly and switch itself to using stat. This works even
  927. for parts of your file system, like a mounted CD-ROM.
  928. If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
  929. =item symlinks
  930. Be aware that the option to follow symbolic links can be dangerous.
  931. Depending on the structure of the directory tree (including symbolic
  932. links to directories) you might traverse a given (physical) directory
  933. more than once (only if C<follow_fast> is in effect).
  934. Furthermore, deleting or changing files in a symbolically linked directory
  935. might cause very unpleasant surprises, since you delete or change files
  936. in an unknown directory.
  937. =back
  938. =head1 BUGS AND CAVEATS
  939. Despite the name of the C<finddepth()> function, both C<find()> and
  940. C<finddepth()> perform a depth-first search of the directory
  941. hierarchy.
  942. =head1 HISTORY
  943. File::Find used to produce incorrect results if called recursively.
  944. During the development of perl 5.8 this bug was fixed.
  945. The first fixed version of File::Find was 1.01.
  946. =head1 SEE ALSO
  947. find, find2perl.
  948. =cut