glob.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. """Filename globbing utility."""
  2. import os
  3. import re
  4. import fnmatch
  5. __all__ = ["glob", "iglob", "escape"]
  6. def glob(pathname, *, recursive=False):
  7. """Return a list of paths matching a pathname pattern.
  8. The pattern may contain simple shell-style wildcards a la
  9. fnmatch. However, unlike fnmatch, filenames starting with a
  10. dot are special cases that are not matched by '*' and '?'
  11. patterns.
  12. If recursive is true, the pattern '**' will match any files and
  13. zero or more directories and subdirectories.
  14. """
  15. return list(iglob(pathname, recursive=recursive))
  16. def iglob(pathname, *, recursive=False):
  17. """Return an iterator which yields the paths matching a pathname pattern.
  18. The pattern may contain simple shell-style wildcards a la
  19. fnmatch. However, unlike fnmatch, filenames starting with a
  20. dot are special cases that are not matched by '*' and '?'
  21. patterns.
  22. If recursive is true, the pattern '**' will match any files and
  23. zero or more directories and subdirectories.
  24. """
  25. it = _iglob(pathname, recursive)
  26. if recursive and _isrecursive(pathname):
  27. s = next(it) # skip empty string
  28. assert not s
  29. return it
  30. def _iglob(pathname, recursive):
  31. dirname, basename = os.path.split(pathname)
  32. if not has_magic(pathname):
  33. if basename:
  34. if os.path.lexists(pathname):
  35. yield pathname
  36. else:
  37. # Patterns ending with a slash should match only directories
  38. if os.path.isdir(dirname):
  39. yield pathname
  40. return
  41. if not dirname:
  42. if recursive and _isrecursive(basename):
  43. yield from glob2(dirname, basename)
  44. else:
  45. yield from glob1(dirname, basename)
  46. return
  47. # `os.path.split()` returns the argument itself as a dirname if it is a
  48. # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
  49. # contains magic characters (i.e. r'\\?\C:').
  50. if dirname != pathname and has_magic(dirname):
  51. dirs = _iglob(dirname, recursive)
  52. else:
  53. dirs = [dirname]
  54. if has_magic(basename):
  55. if recursive and _isrecursive(basename):
  56. glob_in_dir = glob2
  57. else:
  58. glob_in_dir = glob1
  59. else:
  60. glob_in_dir = glob0
  61. for dirname in dirs:
  62. for name in glob_in_dir(dirname, basename):
  63. yield os.path.join(dirname, name)
  64. # These 2 helper functions non-recursively glob inside a literal directory.
  65. # They return a list of basenames. `glob1` accepts a pattern while `glob0`
  66. # takes a literal basename (so it only has to check for its existence).
  67. def glob1(dirname, pattern):
  68. if not dirname:
  69. if isinstance(pattern, bytes):
  70. dirname = bytes(os.curdir, 'ASCII')
  71. else:
  72. dirname = os.curdir
  73. try:
  74. names = os.listdir(dirname)
  75. except OSError:
  76. return []
  77. if not _ishidden(pattern):
  78. names = [x for x in names if not _ishidden(x)]
  79. return fnmatch.filter(names, pattern)
  80. def glob0(dirname, basename):
  81. if not basename:
  82. # `os.path.split()` returns an empty basename for paths ending with a
  83. # directory separator. 'q*x/' should match only directories.
  84. if os.path.isdir(dirname):
  85. return [basename]
  86. else:
  87. if os.path.lexists(os.path.join(dirname, basename)):
  88. return [basename]
  89. return []
  90. # This helper function recursively yields relative pathnames inside a literal
  91. # directory.
  92. def glob2(dirname, pattern):
  93. assert _isrecursive(pattern)
  94. yield pattern[:0]
  95. yield from _rlistdir(dirname)
  96. # Recursively yields relative pathnames inside a literal directory.
  97. def _rlistdir(dirname):
  98. if not dirname:
  99. if isinstance(dirname, bytes):
  100. dirname = bytes(os.curdir, 'ASCII')
  101. else:
  102. dirname = os.curdir
  103. try:
  104. names = os.listdir(dirname)
  105. except os.error:
  106. return
  107. for x in names:
  108. if not _ishidden(x):
  109. yield x
  110. path = os.path.join(dirname, x) if dirname else x
  111. for y in _rlistdir(path):
  112. yield os.path.join(x, y)
  113. magic_check = re.compile('([*?[])')
  114. magic_check_bytes = re.compile(b'([*?[])')
  115. def has_magic(s):
  116. if isinstance(s, bytes):
  117. match = magic_check_bytes.search(s)
  118. else:
  119. match = magic_check.search(s)
  120. return match is not None
  121. def _ishidden(path):
  122. return path[0] in ('.', b'.'[0])
  123. def _isrecursive(pattern):
  124. if isinstance(pattern, bytes):
  125. return pattern == b'**'
  126. else:
  127. return pattern == '**'
  128. def escape(pathname):
  129. """Escape all special characters.
  130. """
  131. # Escaping is done by wrapping any of "*?[" between square brackets.
  132. # Metacharacters do not work in the drive part and shouldn't be escaped.
  133. drive, pathname = os.path.splitdrive(pathname)
  134. if isinstance(pathname, bytes):
  135. pathname = magic_check_bytes.sub(br'[\1]', pathname)
  136. else:
  137. pathname = magic_check.sub(r'[\1]', pathname)
  138. return drive + pathname