linknamespace.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. #!/usr/bin/python
  2. # Check that use of symbols declared in a given header does not result
  3. # in any symbols being brought in that are not reserved with external
  4. # linkage for the given standard.
  5. # Copyright (C) 2014-2019 Free Software Foundation, Inc.
  6. # This file is part of the GNU C Library.
  7. #
  8. # The GNU C Library is free software; you can redistribute it and/or
  9. # modify it under the terms of the GNU Lesser General Public
  10. # License as published by the Free Software Foundation; either
  11. # version 2.1 of the License, or (at your option) any later version.
  12. #
  13. # The GNU C Library is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. # Lesser General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU Lesser General Public
  19. # License along with the GNU C Library; if not, see
  20. # <http://www.gnu.org/licenses/>.
  21. import argparse
  22. from collections import defaultdict
  23. import os.path
  24. import re
  25. import subprocess
  26. import sys
  27. import tempfile
  28. import glibcconform
  29. # The following whitelisted symbols are also allowed for now.
  30. #
  31. # * Bug 17576: stdin, stdout, stderr only reserved with external
  32. # linkage when stdio.h included (and possibly not then), not
  33. # generally.
  34. #
  35. # * Bug 18442: re_syntax_options wrongly brought in by regcomp and
  36. # used by re_comp.
  37. #
  38. WHITELIST = {'stdin', 'stdout', 'stderr', 're_syntax_options'}
  39. def list_syms(filename):
  40. """Return information about GLOBAL and WEAK symbols listed in readelf
  41. -s output."""
  42. ret = []
  43. cur_file = filename
  44. with open(filename, 'r') as syms_file:
  45. for line in syms_file:
  46. line = line.rstrip()
  47. if line.startswith('File: '):
  48. cur_file = line[len('File: '):]
  49. cur_file = cur_file.split('/')[-1]
  50. continue
  51. # Architecture-specific st_other bits appear inside [] and
  52. # disrupt the format of readelf output.
  53. line = re.sub(r'\[.*?\]', '', line)
  54. fields = line.split()
  55. if len(fields) < 8:
  56. continue
  57. bind = fields[4]
  58. ndx = fields[6]
  59. sym = fields[7]
  60. if bind not in ('GLOBAL', 'WEAK'):
  61. continue
  62. if not re.fullmatch('[A-Za-z0-9_]+', sym):
  63. continue
  64. ret.append((cur_file, sym, bind, ndx != 'UND'))
  65. return ret
  66. def main():
  67. """The main entry point."""
  68. parser = argparse.ArgumentParser(description='Check link-time namespace.')
  69. parser.add_argument('--header', metavar='HEADER',
  70. help='name of header')
  71. parser.add_argument('--standard', metavar='STD',
  72. help='standard to use when processing header')
  73. parser.add_argument('--cc', metavar='CC',
  74. help='C compiler to use')
  75. parser.add_argument('--flags', metavar='CFLAGS',
  76. help='Compiler flags to use with CC')
  77. parser.add_argument('--stdsyms', metavar='FILE',
  78. help='File with list of standard symbols')
  79. parser.add_argument('--libsyms', metavar='FILE',
  80. help='File with symbol information from libraries')
  81. parser.add_argument('--readelf', metavar='READELF',
  82. help='readelf program to use')
  83. args = parser.parse_args()
  84. # Load the list of symbols that are OK.
  85. stdsyms = set()
  86. with open(args.stdsyms, 'r') as stdsyms_file:
  87. for line in stdsyms_file:
  88. stdsyms.add(line.rstrip())
  89. stdsyms |= WHITELIST
  90. # Load information about GLOBAL and WEAK symbols defined or used
  91. # in the standard libraries.
  92. # Symbols from a given object, except for weak defined symbols.
  93. seen_syms = defaultdict(list)
  94. # Strong undefined symbols from a given object.
  95. strong_undef_syms = defaultdict(list)
  96. # Objects defining a given symbol (strongly or weakly).
  97. sym_objs = defaultdict(list)
  98. for file, name, bind, defined in list_syms(args.libsyms):
  99. if defined:
  100. sym_objs[name].append(file)
  101. if bind == 'GLOBAL' or not defined:
  102. seen_syms[file].append(name)
  103. if bind == 'GLOBAL' and not defined:
  104. strong_undef_syms[file].append(name)
  105. # Determine what ELF-level symbols are brought in by use of C-level
  106. # symbols declared in the given header.
  107. #
  108. # The rules followed are heuristic and so may produce false
  109. # positives and false negatives.
  110. #
  111. # * All undefined symbols are considered of signficance, but it is
  112. # possible that (a) any standard library definition is weak, so
  113. # can be overridden by the user's definition, and (b) the symbol
  114. # is only used conditionally and not if the program is limited to
  115. # standard functionality.
  116. #
  117. # * If a symbol reference is only brought in by the user using a
  118. # data symbol rather than a function from the standard library,
  119. # this will not be detected.
  120. #
  121. # * If a symbol reference is only brought in by crt*.o or libgcc,
  122. # this will not be detected.
  123. #
  124. # * If a symbol reference is only brought in through __builtin_foo
  125. # in a standard macro being compiled to call foo, this will not be
  126. # detected.
  127. #
  128. # * Header inclusions should be compiled several times with
  129. # different options such as -O2, -D_FORTIFY_SOURCE and
  130. # -D_FILE_OFFSET_BITS=64 to find out what symbols are undefined
  131. # from such a compilation; this is not yet implemented.
  132. #
  133. # * This script finds symbols referenced through use of macros on
  134. # the basis that if a macro calls an internal function, that
  135. # function must also be declared in the header. However, the
  136. # header might also declare implementation-namespace functions
  137. # that are not called by any standard macro in the header,
  138. # resulting in false positives for any symbols brought in only
  139. # through use of those implementation-namespace functions.
  140. #
  141. # * Namespace issues can apply for dynamic linking as well as
  142. # static linking, when a call is from one shared library to
  143. # another or uses a PLT entry for a call within a shared library;
  144. # such issues are only detected by this script if the same
  145. # namespace issue applies for static linking.
  146. seen_where = {}
  147. files_seen = set()
  148. all_undef = {}
  149. current_undef = {}
  150. compiler = '%s %s' % (args.cc, args.flags)
  151. c_syms = glibcconform.list_exported_functions(compiler, args.standard,
  152. args.header)
  153. with tempfile.TemporaryDirectory() as temp_dir:
  154. cincfile_name = os.path.join(temp_dir, 'undef.c')
  155. cincfile_o_name = os.path.join(temp_dir, 'undef.o')
  156. cincfile_sym_name = os.path.join(temp_dir, 'undef.sym')
  157. cincfile_text = ('#include <%s>\n%s\n'
  158. % (args.header,
  159. '\n'.join('void *__glibc_test_%s = (void *) &%s;'
  160. % (sym, sym) for sym in sorted(c_syms))))
  161. with open(cincfile_name, 'w') as cincfile:
  162. cincfile.write(cincfile_text)
  163. cmd = ('%s %s -D_ISOMAC %s -c %s -o %s'
  164. % (args.cc, args.flags, glibcconform.CFLAGS[args.standard],
  165. cincfile_name, cincfile_o_name))
  166. subprocess.check_call(cmd, shell=True)
  167. cmd = ('LC_ALL=C %s -W -s %s > %s'
  168. % (args.readelf, cincfile_o_name, cincfile_sym_name))
  169. subprocess.check_call(cmd, shell=True)
  170. for file, name, bind, defined in list_syms(cincfile_sym_name):
  171. if bind == 'GLOBAL' and not defined:
  172. sym_text = '[initial] %s' % name
  173. seen_where[name] = sym_text
  174. all_undef[name] = sym_text
  175. current_undef[name] = sym_text
  176. while current_undef:
  177. new_undef = {}
  178. for sym, cu_sym in sorted(current_undef.items()):
  179. for file in sym_objs[sym]:
  180. if file in files_seen:
  181. continue
  182. files_seen.add(file)
  183. for ssym in seen_syms[file]:
  184. if ssym not in seen_where:
  185. seen_where[ssym] = ('%s -> [%s] %s'
  186. % (cu_sym, file, ssym))
  187. for usym in strong_undef_syms[file]:
  188. if usym not in all_undef:
  189. usym_text = '%s -> [%s] %s' % (cu_sym, file, usym)
  190. all_undef[usym] = usym_text
  191. new_undef[usym] = usym_text
  192. current_undef = new_undef
  193. ret = 0
  194. for sym in sorted(seen_where):
  195. if sym.startswith('_'):
  196. continue
  197. if sym in stdsyms:
  198. continue
  199. print(seen_where[sym])
  200. ret = 1
  201. sys.exit(ret)
  202. if __name__ == '__main__':
  203. main()