Basename.pm 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. =head1 NAME
  2. File::Basename - Parse file paths into directory, filename and suffix.
  3. =head1 SYNOPSIS
  4. use File::Basename;
  5. ($name,$path,$suffix) = fileparse($fullname,@suffixlist);
  6. $name = fileparse($fullname,@suffixlist);
  7. $basename = basename($fullname,@suffixlist);
  8. $dirname = dirname($fullname);
  9. =head1 DESCRIPTION
  10. These routines allow you to parse file paths into their directory, filename
  11. and suffix.
  12. B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and
  13. quirks, of the shell and C functions of the same name. See each
  14. function's documentation for details. If your concern is just parsing
  15. paths it is safer to use L<File::Spec>'s C<splitpath()> and
  16. C<splitdir()> methods.
  17. It is guaranteed that
  18. # Where $path_separator is / for Unix, \ for Windows, etc...
  19. dirname($path) . $path_separator . basename($path);
  20. is equivalent to the original path for all systems but VMS.
  21. =cut
  22. package File::Basename;
  23. # File::Basename is used during the Perl build, when the re extension may
  24. # not be available, but we only actually need it if running under tainting.
  25. BEGIN {
  26. if (${^TAINT}) {
  27. require re;
  28. re->import('taint');
  29. }
  30. }
  31. use strict;
  32. use 5.006;
  33. use warnings;
  34. our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase);
  35. require Exporter;
  36. @ISA = qw(Exporter);
  37. @EXPORT = qw(fileparse fileparse_set_fstype basename dirname);
  38. $VERSION = "2.85";
  39. fileparse_set_fstype($^O);
  40. =over 4
  41. =item C<fileparse>
  42. X<fileparse>
  43. my($filename, $dirs, $suffix) = fileparse($path);
  44. my($filename, $dirs, $suffix) = fileparse($path, @suffixes);
  45. my $filename = fileparse($path, @suffixes);
  46. The C<fileparse()> routine divides a file path into its $dirs, $filename
  47. and (optionally) the filename $suffix.
  48. $dirs contains everything up to and including the last
  49. directory separator in the $path including the volume (if applicable).
  50. The remainder of the $path is the $filename.
  51. # On Unix returns ("baz", "/foo/bar/", "")
  52. fileparse("/foo/bar/baz");
  53. # On Windows returns ("baz", 'C:\foo\bar\', "")
  54. fileparse('C:\foo\bar\baz');
  55. # On Unix returns ("", "/foo/bar/baz/", "")
  56. fileparse("/foo/bar/baz/");
  57. If @suffixes are given each element is a pattern (either a string or a
  58. C<qr//>) matched against the end of the $filename. The matching
  59. portion is removed and becomes the $suffix.
  60. # On Unix returns ("baz", "/foo/bar/", ".txt")
  61. fileparse("/foo/bar/baz.txt", qr/\.[^.]*/);
  62. If type is non-Unix (see L</fileparse_set_fstype>) then the pattern
  63. matching for suffix removal is performed case-insensitively, since
  64. those systems are not case-sensitive when opening existing files.
  65. You are guaranteed that C<$dirs . $filename . $suffix> will
  66. denote the same location as the original $path.
  67. =cut
  68. sub fileparse {
  69. my($fullname,@suffices) = @_;
  70. unless (defined $fullname) {
  71. require Carp;
  72. Carp::croak("fileparse(): need a valid pathname");
  73. }
  74. my $orig_type = '';
  75. my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
  76. my($taint) = substr($fullname,0,0); # Is $fullname tainted?
  77. if ($type eq "VMS" and $fullname =~ m{/} ) {
  78. # We're doing Unix emulation
  79. $orig_type = $type;
  80. $type = 'Unix';
  81. }
  82. my($dirpath, $basename);
  83. if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) {
  84. ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s);
  85. $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/;
  86. }
  87. elsif ($type eq "OS2") {
  88. ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s);
  89. $dirpath = './' unless $dirpath; # Can't be 0
  90. $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#;
  91. }
  92. elsif ($type eq "MacOS") {
  93. ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s);
  94. $dirpath = ':' unless $dirpath;
  95. }
  96. elsif ($type eq "AmigaOS") {
  97. ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s);
  98. $dirpath = './' unless $dirpath;
  99. }
  100. elsif ($type eq 'VMS' ) {
  101. ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s);
  102. $dirpath ||= ''; # should always be defined
  103. }
  104. else { # Default to Unix semantics.
  105. ($dirpath,$basename) = ($fullname =~ m{^(.*/)?(.*)}s);
  106. if ($orig_type eq 'VMS' and $fullname =~ m{^(/[^/]+/000000(/|$))(.*)}) {
  107. # dev:[000000] is top of VMS tree, similar to Unix '/'
  108. # so strip it off and treat the rest as "normal"
  109. my $devspec = $1;
  110. my $remainder = $3;
  111. ($dirpath,$basename) = ($remainder =~ m{^(.*/)?(.*)}s);
  112. $dirpath ||= ''; # should always be defined
  113. $dirpath = $devspec.$dirpath;
  114. }
  115. $dirpath = './' unless $dirpath;
  116. }
  117. my $tail = '';
  118. my $suffix = '';
  119. if (@suffices) {
  120. foreach $suffix (@suffices) {
  121. my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$";
  122. if ($basename =~ s/$pat//s) {
  123. $taint .= substr($suffix,0,0);
  124. $tail = $1 . $tail;
  125. }
  126. }
  127. }
  128. # Ensure taint is propagated from the path to its pieces.
  129. $tail .= $taint;
  130. wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail)
  131. : ($basename .= $taint);
  132. }
  133. =item C<basename>
  134. X<basename> X<filename>
  135. my $filename = basename($path);
  136. my $filename = basename($path, @suffixes);
  137. This function is provided for compatibility with the Unix shell command
  138. C<basename(1)>. It does B<NOT> always return the file name portion of a
  139. path as you might expect. To be safe, if you want the file name portion of
  140. a path use C<fileparse()>.
  141. C<basename()> returns the last level of a filepath even if the last
  142. level is clearly directory. In effect, it is acting like C<pop()> for
  143. paths. This differs from C<fileparse()>'s behaviour.
  144. # Both return "bar"
  145. basename("/foo/bar");
  146. basename("/foo/bar/");
  147. @suffixes work as in C<fileparse()> except all regex metacharacters are
  148. quoted.
  149. # These two function calls are equivalent.
  150. my $filename = basename("/foo/bar/baz.txt", ".txt");
  151. my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/);
  152. Also note that in order to be compatible with the shell command,
  153. C<basename()> does not strip off a suffix if it is identical to the
  154. remaining characters in the filename.
  155. =cut
  156. sub basename {
  157. my($path) = shift;
  158. # From BSD basename(1)
  159. # The basename utility deletes any prefix ending with the last slash '/'
  160. # character present in string (after first stripping trailing slashes)
  161. _strip_trailing_sep($path);
  162. my($basename, $dirname, $suffix) = fileparse( $path, map("\Q$_\E",@_) );
  163. # From BSD basename(1)
  164. # The suffix is not stripped if it is identical to the remaining
  165. # characters in string.
  166. if( length $suffix and !length $basename ) {
  167. $basename = $suffix;
  168. }
  169. # Ensure that basename '/' == '/'
  170. if( !length $basename ) {
  171. $basename = $dirname;
  172. }
  173. return $basename;
  174. }
  175. =item C<dirname>
  176. X<dirname>
  177. This function is provided for compatibility with the Unix shell
  178. command C<dirname(1)> and has inherited some of its quirks. In spite of
  179. its name it does B<NOT> always return the directory name as you might
  180. expect. To be safe, if you want the directory name of a path use
  181. C<fileparse()>.
  182. Only on VMS (where there is no ambiguity between the file and directory
  183. portions of a path) and AmigaOS (possibly due to an implementation quirk in
  184. this module) does C<dirname()> work like C<fileparse($path)>, returning just the
  185. $dirs.
  186. # On VMS and AmigaOS
  187. my $dirs = dirname($path);
  188. When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function
  189. which is subtly different from how C<fileparse()> works. It returns all but
  190. the last level of a file path even if the last level is clearly a directory.
  191. In effect, it is not returning the directory portion but simply the path one
  192. level up acting like C<chop()> for file paths.
  193. Also unlike C<fileparse()>, C<dirname()> does not include a trailing slash on
  194. its returned path.
  195. # returns /foo/bar. fileparse() would return /foo/bar/
  196. dirname("/foo/bar/baz");
  197. # also returns /foo/bar despite the fact that baz is clearly a
  198. # directory. fileparse() would return /foo/bar/baz/
  199. dirname("/foo/bar/baz/");
  200. # returns '.'. fileparse() would return 'foo/'
  201. dirname("foo/");
  202. Under VMS, if there is no directory information in the $path, then the
  203. current default device and directory is used.
  204. =cut
  205. sub dirname {
  206. my $path = shift;
  207. my($type) = $Fileparse_fstype;
  208. if( $type eq 'VMS' and $path =~ m{/} ) {
  209. # Parse as Unix
  210. local($File::Basename::Fileparse_fstype) = '';
  211. return dirname($path);
  212. }
  213. my($basename, $dirname) = fileparse($path);
  214. if ($type eq 'VMS') {
  215. $dirname ||= $ENV{DEFAULT};
  216. }
  217. elsif ($type eq 'MacOS') {
  218. if( !length($basename) && $dirname !~ /^[^:]+:\z/) {
  219. _strip_trailing_sep($dirname);
  220. ($basename,$dirname) = fileparse $dirname;
  221. }
  222. $dirname .= ":" unless $dirname =~ /:\z/;
  223. }
  224. elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
  225. _strip_trailing_sep($dirname);
  226. unless( length($basename) ) {
  227. ($basename,$dirname) = fileparse $dirname;
  228. _strip_trailing_sep($dirname);
  229. }
  230. }
  231. elsif ($type eq 'AmigaOS') {
  232. if ( $dirname =~ /:\z/) { return $dirname }
  233. chop $dirname;
  234. $dirname =~ s{[^:/]+\z}{} unless length($basename);
  235. }
  236. else {
  237. _strip_trailing_sep($dirname);
  238. unless( length($basename) ) {
  239. ($basename,$dirname) = fileparse $dirname;
  240. _strip_trailing_sep($dirname);
  241. }
  242. }
  243. $dirname;
  244. }
  245. # Strip the trailing path separator.
  246. sub _strip_trailing_sep {
  247. my $type = $Fileparse_fstype;
  248. if ($type eq 'MacOS') {
  249. $_[0] =~ s/([^:]):\z/$1/s;
  250. }
  251. elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
  252. $_[0] =~ s/([^:])[\\\/]*\z/$1/;
  253. }
  254. else {
  255. $_[0] =~ s{(.)/*\z}{$1}s;
  256. }
  257. }
  258. =item C<fileparse_set_fstype>
  259. X<filesystem>
  260. my $type = fileparse_set_fstype();
  261. my $previous_type = fileparse_set_fstype($type);
  262. Normally File::Basename will assume a file path type native to your current
  263. operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...).
  264. With this function you can override that assumption.
  265. Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS",
  266. "MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility),
  267. "Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is
  268. given "Unix" will be assumed.
  269. If you've selected VMS syntax, and the file specification you pass to
  270. one of these routines contains a "/", they assume you are using Unix
  271. emulation and apply the Unix syntax rules instead, for that function
  272. call only.
  273. =back
  274. =cut
  275. BEGIN {
  276. my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc);
  277. my @Types = (@Ignore_Case, qw(Unix));
  278. sub fileparse_set_fstype {
  279. my $old = $Fileparse_fstype;
  280. if (@_) {
  281. my $new_type = shift;
  282. $Fileparse_fstype = 'Unix'; # default
  283. foreach my $type (@Types) {
  284. $Fileparse_fstype = $type if $new_type =~ /^$type/i;
  285. }
  286. $Fileparse_igncase =
  287. (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0;
  288. }
  289. return $old;
  290. }
  291. }
  292. 1;
  293. =head1 SEE ALSO
  294. L<dirname(1)>, L<basename(1)>, L<File::Spec>