cpu-tunables.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. /* x86 CPU feature tuning.
  2. This file is part of the GNU C Library.
  3. Copyright (C) 2017-2019 Free Software Foundation, Inc.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 2.1 of the License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with the GNU C Library; if not, see
  14. <http://www.gnu.org/licenses/>. */
  15. #if HAVE_TUNABLES
  16. # define TUNABLE_NAMESPACE cpu
  17. # include <stdbool.h>
  18. # include <stdint.h>
  19. # include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */
  20. # include <elf/dl-tunables.h>
  21. # include <string.h>
  22. # include <cpu-features.h>
  23. # include <ldsodefs.h>
  24. /* We can't use IFUNC memcmp nor strlen in init_cpu_features from libc.a
  25. since IFUNC must be set up by init_cpu_features. */
  26. # if defined USE_MULTIARCH && !defined SHARED
  27. # ifdef __x86_64__
  28. # define DEFAULT_MEMCMP __memcmp_sse2
  29. # else
  30. # define DEFAULT_MEMCMP __memcmp_ia32
  31. # endif
  32. extern __typeof (memcmp) DEFAULT_MEMCMP;
  33. # else
  34. # define DEFAULT_MEMCMP memcmp
  35. # endif
  36. # define CHECK_GLIBC_IFUNC_CPU_OFF(f, cpu_features, name, len) \
  37. _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
  38. if (!DEFAULT_MEMCMP (f, #name, len)) \
  39. { \
  40. cpu_features->cpuid[index_cpu_##name].reg_##name \
  41. &= ~bit_cpu_##name; \
  42. break; \
  43. }
  44. /* Disable an ARCH feature NAME. We don't enable an ARCH feature which
  45. isn't available. */
  46. # define CHECK_GLIBC_IFUNC_ARCH_OFF(f, cpu_features, name, len) \
  47. _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
  48. if (!DEFAULT_MEMCMP (f, #name, len)) \
  49. { \
  50. cpu_features->feature[index_arch_##name] \
  51. &= ~bit_arch_##name; \
  52. break; \
  53. }
  54. /* Enable/disable an ARCH feature NAME. */
  55. # define CHECK_GLIBC_IFUNC_ARCH_BOTH(f, cpu_features, name, disable, \
  56. len) \
  57. _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
  58. if (!DEFAULT_MEMCMP (f, #name, len)) \
  59. { \
  60. if (disable) \
  61. cpu_features->feature[index_arch_##name] \
  62. &= ~bit_arch_##name; \
  63. else \
  64. cpu_features->feature[index_arch_##name] \
  65. |= bit_arch_##name; \
  66. break; \
  67. }
  68. /* Enable/disable an ARCH feature NAME. Enable an ARCH feature only
  69. if the ARCH feature NEED is also enabled. */
  70. # define CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH(f, cpu_features, name, \
  71. need, disable, len) \
  72. _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
  73. if (!DEFAULT_MEMCMP (f, #name, len)) \
  74. { \
  75. if (disable) \
  76. cpu_features->feature[index_arch_##name] \
  77. &= ~bit_arch_##name; \
  78. else if (CPU_FEATURES_ARCH_P (cpu_features, need)) \
  79. cpu_features->feature[index_arch_##name] \
  80. |= bit_arch_##name; \
  81. break; \
  82. }
  83. /* Enable/disable an ARCH feature NAME. Enable an ARCH feature only
  84. if the CPU feature NEED is also enabled. */
  85. # define CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH(f, cpu_features, name, \
  86. need, disable, len) \
  87. _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
  88. if (!DEFAULT_MEMCMP (f, #name, len)) \
  89. { \
  90. if (disable) \
  91. cpu_features->feature[index_arch_##name] \
  92. &= ~bit_arch_##name; \
  93. else if (CPU_FEATURES_CPU_P (cpu_features, need)) \
  94. cpu_features->feature[index_arch_##name] \
  95. |= bit_arch_##name; \
  96. break; \
  97. }
  98. attribute_hidden
  99. void
  100. TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
  101. {
  102. /* The current IFUNC selection is based on microbenchmarks in glibc.
  103. It should give the best performance for most workloads. But other
  104. choices may have better performance for a particular workload or on
  105. the hardware which wasn't available when the selection was made.
  106. The environment variable:
  107. GLIBC_TUNABLES=glibc.cpu.hwcaps=-xxx,yyy,-zzz,....
  108. can be used to enable CPU/ARCH feature yyy, disable CPU/ARCH feature
  109. yyy and zzz, where the feature name is case-sensitive and has to
  110. match the ones in cpu-features.h. It can be used by glibc developers
  111. to tune for a new processor or override the IFUNC selection to
  112. improve performance for a particular workload.
  113. NOTE: the IFUNC selection may change over time. Please check all
  114. multiarch implementations when experimenting. */
  115. const char *p = valp->strval;
  116. struct cpu_features *cpu_features = &GLRO(dl_x86_cpu_features);
  117. size_t len;
  118. do
  119. {
  120. const char *c, *n;
  121. bool disable;
  122. size_t nl;
  123. for (c = p; *c != ','; c++)
  124. if (*c == '\0')
  125. break;
  126. len = c - p;
  127. disable = *p == '-';
  128. if (disable)
  129. {
  130. n = p + 1;
  131. nl = len - 1;
  132. }
  133. else
  134. {
  135. n = p;
  136. nl = len;
  137. }
  138. switch (nl)
  139. {
  140. default:
  141. break;
  142. case 3:
  143. if (disable)
  144. {
  145. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX, 3);
  146. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, CX8, 3);
  147. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, FMA, 3);
  148. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, HTT, 3);
  149. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, IBT, 3);
  150. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, RTM, 3);
  151. }
  152. break;
  153. case 4:
  154. if (disable)
  155. {
  156. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX2, 4);
  157. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, BMI1, 4);
  158. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, BMI2, 4);
  159. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, CMOV, 4);
  160. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, ERMS, 4);
  161. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, FMA4, 4);
  162. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE2, 4);
  163. CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, I586, 4);
  164. CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, I686, 4);
  165. }
  166. break;
  167. case 5:
  168. if (disable)
  169. {
  170. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, LZCNT, 5);
  171. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, MOVBE, 5);
  172. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SHSTK, 5);
  173. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSSE3, 5);
  174. }
  175. break;
  176. case 6:
  177. if (disable)
  178. {
  179. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, POPCNT, 6);
  180. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE4_1, 6);
  181. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE4_2, 6);
  182. }
  183. break;
  184. case 7:
  185. if (disable)
  186. {
  187. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512F, 7);
  188. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, OSXSAVE, 7);
  189. }
  190. break;
  191. case 8:
  192. if (disable)
  193. {
  194. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512CD, 8);
  195. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512BW, 8);
  196. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512DQ, 8);
  197. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512ER, 8);
  198. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512PF, 8);
  199. CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512VL, 8);
  200. }
  201. CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Slow_BSF,
  202. disable, 8);
  203. break;
  204. case 10:
  205. if (disable)
  206. {
  207. CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, AVX_Usable,
  208. 10);
  209. CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, FMA_Usable,
  210. 10);
  211. }
  212. break;
  213. case 11:
  214. if (disable)
  215. {
  216. CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, AVX2_Usable,
  217. 11);
  218. CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, FMA4_Usable,
  219. 11);
  220. }
  221. CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Prefer_ERMS,
  222. disable, 11);
  223. CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH (n, cpu_features,
  224. Slow_SSE4_2, SSE4_2,
  225. disable, 11);
  226. CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Prefer_FSRM,
  227. disable, 11);
  228. break;
  229. case 13:
  230. if (disable)
  231. {
  232. /* Update xsave_state_size to XSAVE state size. */
  233. cpu_features->xsave_state_size
  234. = cpu_features->xsave_state_full_size;
  235. CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features,
  236. XSAVEC_Usable, 13);
  237. }
  238. break;
  239. case 14:
  240. if (disable)
  241. {
  242. CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features,
  243. AVX512F_Usable, 14);
  244. }
  245. break;
  246. case 15:
  247. if (disable)
  248. {
  249. CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features,
  250. AVX512DQ_Usable, 15);
  251. }
  252. CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Fast_Rep_String,
  253. disable, 15);
  254. break;
  255. case 16:
  256. {
  257. CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
  258. (n, cpu_features, Prefer_No_AVX512, AVX512F_Usable,
  259. disable, 16);
  260. }
  261. break;
  262. case 18:
  263. {
  264. CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
  265. Fast_Copy_Backward, disable,
  266. 18);
  267. }
  268. break;
  269. case 19:
  270. {
  271. CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
  272. Fast_Unaligned_Load, disable,
  273. 19);
  274. CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
  275. Fast_Unaligned_Copy, disable,
  276. 19);
  277. }
  278. break;
  279. case 20:
  280. {
  281. CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
  282. (n, cpu_features, Prefer_No_VZEROUPPER, AVX_Usable,
  283. disable, 20);
  284. }
  285. break;
  286. case 21:
  287. {
  288. CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
  289. Prefer_MAP_32BIT_EXEC, disable,
  290. 21);
  291. }
  292. break;
  293. case 23:
  294. {
  295. CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
  296. (n, cpu_features, AVX_Fast_Unaligned_Load, AVX_Usable,
  297. disable, 23);
  298. }
  299. break;
  300. case 24:
  301. {
  302. CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
  303. (n, cpu_features, MathVec_Prefer_No_AVX512,
  304. AVX512F_Usable, disable, 24);
  305. }
  306. break;
  307. case 26:
  308. {
  309. CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH
  310. (n, cpu_features, Prefer_PMINUB_for_stringop, SSE2,
  311. disable, 26);
  312. }
  313. break;
  314. }
  315. p += len + 1;
  316. }
  317. while (*p != '\0');
  318. }
  319. # if CET_ENABLED
  320. # include <cet-tunables.h>
  321. attribute_hidden
  322. void
  323. TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *valp)
  324. {
  325. if (DEFAULT_MEMCMP (valp->strval, "on", sizeof ("on")) == 0)
  326. {
  327. GL(dl_x86_feature_1)[1] &= ~((1 << CET_MAX) - 1);
  328. GL(dl_x86_feature_1)[1] |= CET_ALWAYS_ON;
  329. }
  330. else if (DEFAULT_MEMCMP (valp->strval, "off", sizeof ("off")) == 0)
  331. {
  332. GL(dl_x86_feature_1)[1] &= ~((1 << CET_MAX) - 1);
  333. GL(dl_x86_feature_1)[1] |= CET_ALWAYS_OFF;
  334. }
  335. else if (DEFAULT_MEMCMP (valp->strval, "permissive",
  336. sizeof ("permissive")) == 0)
  337. {
  338. GL(dl_x86_feature_1)[1] &= ~((1 << CET_MAX) - 1);
  339. GL(dl_x86_feature_1)[1] |= CET_PERMISSIVE;
  340. }
  341. }
  342. attribute_hidden
  343. void
  344. TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *valp)
  345. {
  346. if (DEFAULT_MEMCMP (valp->strval, "on", sizeof ("on")) == 0)
  347. {
  348. GL(dl_x86_feature_1)[1] &= ~(((1 << CET_MAX) - 1) << CET_MAX);
  349. GL(dl_x86_feature_1)[1] |= (CET_ALWAYS_ON << CET_MAX);
  350. }
  351. else if (DEFAULT_MEMCMP (valp->strval, "off", sizeof ("off")) == 0)
  352. {
  353. GL(dl_x86_feature_1)[1] &= ~(((1 << CET_MAX) - 1) << CET_MAX);
  354. GL(dl_x86_feature_1)[1] |= (CET_ALWAYS_OFF << CET_MAX);
  355. }
  356. else if (DEFAULT_MEMCMP (valp->strval, "permissive",
  357. sizeof ("permissive")) == 0)
  358. {
  359. GL(dl_x86_feature_1)[1] &= ~(((1 << CET_MAX) - 1) << CET_MAX);
  360. GL(dl_x86_feature_1)[1] |= (CET_PERMISSIVE << CET_MAX);
  361. }
  362. }
  363. # endif
  364. #endif