123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402 |
- =head1 NAME
- File::Basename - Parse file paths into directory, filename and suffix.
- =head1 SYNOPSIS
- use File::Basename;
- ($name,$path,$suffix) = fileparse($fullname,@suffixlist);
- $name = fileparse($fullname,@suffixlist);
- $basename = basename($fullname,@suffixlist);
- $dirname = dirname($fullname);
- =head1 DESCRIPTION
- These routines allow you to parse file paths into their directory, filename
- and suffix.
- B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and
- quirks, of the shell and C functions of the same name. See each
- function's documentation for details. If your concern is just parsing
- paths it is safer to use L<File::Spec>'s C<splitpath()> and
- C<splitdir()> methods.
- It is guaranteed that
- # Where $path_separator is / for Unix, \ for Windows, etc...
- dirname($path) . $path_separator . basename($path);
- is equivalent to the original path for all systems but VMS.
- =cut
- package File::Basename;
- # File::Basename is used during the Perl build, when the re extension may
- # not be available, but we only actually need it if running under tainting.
- BEGIN {
- if (${^TAINT}) {
- require re;
- re->import('taint');
- }
- }
- use strict;
- use 5.006;
- use warnings;
- our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase);
- require Exporter;
- @ISA = qw(Exporter);
- @EXPORT = qw(fileparse fileparse_set_fstype basename dirname);
- $VERSION = "2.85";
- fileparse_set_fstype($^O);
- =over 4
- =item C<fileparse>
- X<fileparse>
- my($filename, $dirs, $suffix) = fileparse($path);
- my($filename, $dirs, $suffix) = fileparse($path, @suffixes);
- my $filename = fileparse($path, @suffixes);
- The C<fileparse()> routine divides a file path into its $dirs, $filename
- and (optionally) the filename $suffix.
- $dirs contains everything up to and including the last
- directory separator in the $path including the volume (if applicable).
- The remainder of the $path is the $filename.
- # On Unix returns ("baz", "/foo/bar/", "")
- fileparse("/foo/bar/baz");
- # On Windows returns ("baz", 'C:\foo\bar\', "")
- fileparse('C:\foo\bar\baz');
- # On Unix returns ("", "/foo/bar/baz/", "")
- fileparse("/foo/bar/baz/");
- If @suffixes are given each element is a pattern (either a string or a
- C<qr//>) matched against the end of the $filename. The matching
- portion is removed and becomes the $suffix.
- # On Unix returns ("baz", "/foo/bar/", ".txt")
- fileparse("/foo/bar/baz.txt", qr/\.[^.]*/);
- If type is non-Unix (see L</fileparse_set_fstype>) then the pattern
- matching for suffix removal is performed case-insensitively, since
- those systems are not case-sensitive when opening existing files.
- You are guaranteed that C<$dirs . $filename . $suffix> will
- denote the same location as the original $path.
- =cut
- sub fileparse {
- my($fullname,@suffices) = @_;
- unless (defined $fullname) {
- require Carp;
- Carp::croak("fileparse(): need a valid pathname");
- }
- my $orig_type = '';
- my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
- my($taint) = substr($fullname,0,0); # Is $fullname tainted?
- if ($type eq "VMS" and $fullname =~ m{/} ) {
- # We're doing Unix emulation
- $orig_type = $type;
- $type = 'Unix';
- }
- my($dirpath, $basename);
- if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) {
- ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s);
- $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/;
- }
- elsif ($type eq "OS2") {
- ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s);
- $dirpath = './' unless $dirpath; # Can't be 0
- $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#;
- }
- elsif ($type eq "MacOS") {
- ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s);
- $dirpath = ':' unless $dirpath;
- }
- elsif ($type eq "AmigaOS") {
- ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s);
- $dirpath = './' unless $dirpath;
- }
- elsif ($type eq 'VMS' ) {
- ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s);
- $dirpath ||= ''; # should always be defined
- }
- else { # Default to Unix semantics.
- ($dirpath,$basename) = ($fullname =~ m{^(.*/)?(.*)}s);
- if ($orig_type eq 'VMS' and $fullname =~ m{^(/[^/]+/000000(/|$))(.*)}) {
- # dev:[000000] is top of VMS tree, similar to Unix '/'
- # so strip it off and treat the rest as "normal"
- my $devspec = $1;
- my $remainder = $3;
- ($dirpath,$basename) = ($remainder =~ m{^(.*/)?(.*)}s);
- $dirpath ||= ''; # should always be defined
- $dirpath = $devspec.$dirpath;
- }
- $dirpath = './' unless $dirpath;
- }
-
- my $tail = '';
- my $suffix = '';
- if (@suffices) {
- foreach $suffix (@suffices) {
- my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$";
- if ($basename =~ s/$pat//s) {
- $taint .= substr($suffix,0,0);
- $tail = $1 . $tail;
- }
- }
- }
- # Ensure taint is propagated from the path to its pieces.
- $tail .= $taint;
- wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail)
- : ($basename .= $taint);
- }
- =item C<basename>
- X<basename> X<filename>
- my $filename = basename($path);
- my $filename = basename($path, @suffixes);
- This function is provided for compatibility with the Unix shell command
- C<basename(1)>. It does B<NOT> always return the file name portion of a
- path as you might expect. To be safe, if you want the file name portion of
- a path use C<fileparse()>.
- C<basename()> returns the last level of a filepath even if the last
- level is clearly directory. In effect, it is acting like C<pop()> for
- paths. This differs from C<fileparse()>'s behaviour.
- # Both return "bar"
- basename("/foo/bar");
- basename("/foo/bar/");
- @suffixes work as in C<fileparse()> except all regex metacharacters are
- quoted.
- # These two function calls are equivalent.
- my $filename = basename("/foo/bar/baz.txt", ".txt");
- my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/);
- Also note that in order to be compatible with the shell command,
- C<basename()> does not strip off a suffix if it is identical to the
- remaining characters in the filename.
- =cut
- sub basename {
- my($path) = shift;
- # From BSD basename(1)
- # The basename utility deletes any prefix ending with the last slash '/'
- # character present in string (after first stripping trailing slashes)
- _strip_trailing_sep($path);
- my($basename, $dirname, $suffix) = fileparse( $path, map("\Q$_\E",@_) );
- # From BSD basename(1)
- # The suffix is not stripped if it is identical to the remaining
- # characters in string.
- if( length $suffix and !length $basename ) {
- $basename = $suffix;
- }
-
- # Ensure that basename '/' == '/'
- if( !length $basename ) {
- $basename = $dirname;
- }
- return $basename;
- }
- =item C<dirname>
- X<dirname>
- This function is provided for compatibility with the Unix shell
- command C<dirname(1)> and has inherited some of its quirks. In spite of
- its name it does B<NOT> always return the directory name as you might
- expect. To be safe, if you want the directory name of a path use
- C<fileparse()>.
- Only on VMS (where there is no ambiguity between the file and directory
- portions of a path) and AmigaOS (possibly due to an implementation quirk in
- this module) does C<dirname()> work like C<fileparse($path)>, returning just the
- $dirs.
- # On VMS and AmigaOS
- my $dirs = dirname($path);
- When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function
- which is subtly different from how C<fileparse()> works. It returns all but
- the last level of a file path even if the last level is clearly a directory.
- In effect, it is not returning the directory portion but simply the path one
- level up acting like C<chop()> for file paths.
- Also unlike C<fileparse()>, C<dirname()> does not include a trailing slash on
- its returned path.
- # returns /foo/bar. fileparse() would return /foo/bar/
- dirname("/foo/bar/baz");
- # also returns /foo/bar despite the fact that baz is clearly a
- # directory. fileparse() would return /foo/bar/baz/
- dirname("/foo/bar/baz/");
- # returns '.'. fileparse() would return 'foo/'
- dirname("foo/");
- Under VMS, if there is no directory information in the $path, then the
- current default device and directory is used.
- =cut
- sub dirname {
- my $path = shift;
- my($type) = $Fileparse_fstype;
- if( $type eq 'VMS' and $path =~ m{/} ) {
- # Parse as Unix
- local($File::Basename::Fileparse_fstype) = '';
- return dirname($path);
- }
- my($basename, $dirname) = fileparse($path);
- if ($type eq 'VMS') {
- $dirname ||= $ENV{DEFAULT};
- }
- elsif ($type eq 'MacOS') {
- if( !length($basename) && $dirname !~ /^[^:]+:\z/) {
- _strip_trailing_sep($dirname);
- ($basename,$dirname) = fileparse $dirname;
- }
- $dirname .= ":" unless $dirname =~ /:\z/;
- }
- elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
- _strip_trailing_sep($dirname);
- unless( length($basename) ) {
- ($basename,$dirname) = fileparse $dirname;
- _strip_trailing_sep($dirname);
- }
- }
- elsif ($type eq 'AmigaOS') {
- if ( $dirname =~ /:\z/) { return $dirname }
- chop $dirname;
- $dirname =~ s{[^:/]+\z}{} unless length($basename);
- }
- else {
- _strip_trailing_sep($dirname);
- unless( length($basename) ) {
- ($basename,$dirname) = fileparse $dirname;
- _strip_trailing_sep($dirname);
- }
- }
- $dirname;
- }
- # Strip the trailing path separator.
- sub _strip_trailing_sep {
- my $type = $Fileparse_fstype;
- if ($type eq 'MacOS') {
- $_[0] =~ s/([^:]):\z/$1/s;
- }
- elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
- $_[0] =~ s/([^:])[\\\/]*\z/$1/;
- }
- else {
- $_[0] =~ s{(.)/*\z}{$1}s;
- }
- }
- =item C<fileparse_set_fstype>
- X<filesystem>
- my $type = fileparse_set_fstype();
- my $previous_type = fileparse_set_fstype($type);
- Normally File::Basename will assume a file path type native to your current
- operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...).
- With this function you can override that assumption.
- Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS",
- "MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility),
- "Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is
- given "Unix" will be assumed.
- If you've selected VMS syntax, and the file specification you pass to
- one of these routines contains a "/", they assume you are using Unix
- emulation and apply the Unix syntax rules instead, for that function
- call only.
- =back
- =cut
- BEGIN {
- my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc);
- my @Types = (@Ignore_Case, qw(Unix));
- sub fileparse_set_fstype {
- my $old = $Fileparse_fstype;
- if (@_) {
- my $new_type = shift;
- $Fileparse_fstype = 'Unix'; # default
- foreach my $type (@Types) {
- $Fileparse_fstype = $type if $new_type =~ /^$type/i;
- }
- $Fileparse_igncase =
- (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0;
- }
- return $old;
- }
- }
- 1;
- =head1 SEE ALSO
- L<dirname(1)>, L<basename(1)>, L<File::Spec>
|