perf.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759
  1. /* Copyright (C) 2002-2019 Free Software Foundation, Inc.
  2. This file is part of the GNU C Library.
  3. Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 2.1 of the License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with the GNU C Library; if not, see
  14. <http://www.gnu.org/licenses/>. */
  15. #define _GNU_SOURCE 1
  16. #include <argp.h>
  17. #include <error.h>
  18. #include <errno.h>
  19. #include <fcntl.h>
  20. #include <inttypes.h>
  21. #include <limits.h>
  22. #include <pthread.h>
  23. #include <signal.h>
  24. #include <stdbool.h>
  25. #include <stdlib.h>
  26. #include <string.h>
  27. #include <time.h>
  28. #include <unistd.h>
  29. #include <sys/param.h>
  30. #include <sys/types.h>
  31. #ifndef MAX_THREADS
  32. # define MAX_THREADS 100000
  33. #endif
  34. #ifndef DEFAULT_THREADS
  35. # define DEFAULT_THREADS 50
  36. #endif
  37. #define OPT_TO_THREAD 300
  38. #define OPT_TO_PROCESS 301
  39. #define OPT_SYNC_SIGNAL 302
  40. #define OPT_SYNC_JOIN 303
  41. #define OPT_TOPLEVEL 304
  42. static const struct argp_option options[] =
  43. {
  44. { NULL, 0, NULL, 0, "\
  45. This is a test for threads so we allow ther user to selection the number of \
  46. threads which are used at any one time. Independently the total number of \
  47. rounds can be selected. This is the total number of threads which will have \
  48. run when the process terminates:" },
  49. { "threads", 't', "NUMBER", 0, "Number of threads used at once" },
  50. { "starts", 's', "NUMBER", 0, "Total number of working threads" },
  51. { "toplevel", OPT_TOPLEVEL, "NUMBER", 0,
  52. "Number of toplevel threads which start the other threads; this \
  53. implies --sync-join" },
  54. { NULL, 0, NULL, 0, "\
  55. Each thread can do one of two things: sleep or do work. The latter is 100% \
  56. CPU bound. The work load is the probability a thread does work. All values \
  57. from zero to 100 (inclusive) are valid. How often each thread repeats this \
  58. can be determined by the number of rounds. The work cost determines how long \
  59. each work session (not sleeping) takes. If it is zero a thread would \
  60. effectively nothing. By setting the number of rounds to zero the thread \
  61. does no work at all and pure thread creation times can be measured." },
  62. { "workload", 'w', "PERCENT", 0, "Percentage of time spent working" },
  63. { "workcost", 'c', "NUMBER", 0,
  64. "Factor in the cost of each round of working" },
  65. { "rounds", 'r', "NUMBER", 0, "Number of rounds each thread runs" },
  66. { NULL, 0, NULL, 0, "\
  67. There are a number of different methods how thread creation can be \
  68. synchronized. Synchronization is necessary since the number of concurrently \
  69. running threads is limited." },
  70. { "sync-signal", OPT_SYNC_SIGNAL, NULL, 0,
  71. "Synchronize using a signal (default)" },
  72. { "sync-join", OPT_SYNC_JOIN, NULL, 0, "Synchronize using pthread_join" },
  73. { NULL, 0, NULL, 0, "\
  74. One parameter for each threads execution is the size of the stack. If this \
  75. parameter is not used the system's default stack size is used. If many \
  76. threads are used the stack size should be chosen quite small." },
  77. { "stacksize", 'S', "BYTES", 0, "Size of threads stack" },
  78. { "guardsize", 'g', "BYTES", 0,
  79. "Size of stack guard area; must fit into the stack" },
  80. { NULL, 0, NULL, 0, "Signal options:" },
  81. { "to-thread", OPT_TO_THREAD, NULL, 0, "Send signal to main thread" },
  82. { "to-process", OPT_TO_PROCESS, NULL, 0,
  83. "Send signal to process (default)" },
  84. { NULL, 0, NULL, 0, "Administrative options:" },
  85. { "progress", 'p', NULL, 0, "Show signs of progress" },
  86. { "timing", 'T', NULL, 0,
  87. "Measure time from startup to the last thread finishing" },
  88. { NULL, 0, NULL, 0, NULL }
  89. };
  90. /* Prototype for option handler. */
  91. static error_t parse_opt (int key, char *arg, struct argp_state *state);
  92. /* Data structure to communicate with argp functions. */
  93. static struct argp argp =
  94. {
  95. options, parse_opt
  96. };
  97. static unsigned long int threads = DEFAULT_THREADS;
  98. static unsigned long int workload = 75;
  99. static unsigned long int workcost = 20;
  100. static unsigned long int rounds = 10;
  101. static long int starts = 5000;
  102. static unsigned long int stacksize;
  103. static long int guardsize = -1;
  104. static bool progress;
  105. static bool timing;
  106. static bool to_thread;
  107. static unsigned long int toplevel = 1;
  108. static long int running;
  109. static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER;
  110. static pid_t pid;
  111. static pthread_t tmain;
  112. static clockid_t cl;
  113. static struct timespec start_time;
  114. static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER;
  115. unsigned int sum;
  116. static enum
  117. {
  118. sync_signal,
  119. sync_join
  120. }
  121. sync_method;
  122. /* We use 64bit values for the times. */
  123. typedef unsigned long long int hp_timing_t;
  124. /* Attributes for all created threads. */
  125. static pthread_attr_t attr;
  126. static void *
  127. work (void *arg)
  128. {
  129. unsigned long int i;
  130. unsigned int state = (unsigned long int) arg;
  131. for (i = 0; i < rounds; ++i)
  132. {
  133. /* Determine what to do. */
  134. unsigned int rnum;
  135. /* Uniform distribution. */
  136. do
  137. rnum = rand_r (&state);
  138. while (rnum >= UINT_MAX - (UINT_MAX % 100));
  139. rnum %= 100;
  140. if (rnum < workload)
  141. {
  142. int j;
  143. int a[4] = { i, rnum, i + rnum, rnum - i };
  144. if (progress)
  145. write (STDERR_FILENO, "c", 1);
  146. for (j = 0; j < workcost; ++j)
  147. {
  148. a[0] += a[3] >> 12;
  149. a[1] += a[2] >> 20;
  150. a[2] += a[1] ^ 0x3423423;
  151. a[3] += a[0] - a[1];
  152. }
  153. pthread_mutex_lock (&sum_mutex);
  154. sum += a[0] + a[1] + a[2] + a[3];
  155. pthread_mutex_unlock (&sum_mutex);
  156. }
  157. else
  158. {
  159. /* Just sleep. */
  160. struct timespec tv;
  161. tv.tv_sec = 0;
  162. tv.tv_nsec = 10000000;
  163. if (progress)
  164. write (STDERR_FILENO, "w", 1);
  165. nanosleep (&tv, NULL);
  166. }
  167. }
  168. return NULL;
  169. }
  170. static void *
  171. thread_function (void *arg)
  172. {
  173. work (arg);
  174. pthread_mutex_lock (&running_mutex);
  175. if (--running <= 0 && starts <= 0)
  176. {
  177. /* We are done. */
  178. if (progress)
  179. write (STDERR_FILENO, "\n", 1);
  180. if (timing)
  181. {
  182. struct timespec end_time;
  183. if (clock_gettime (cl, &end_time) == 0)
  184. {
  185. end_time.tv_sec -= start_time.tv_sec;
  186. end_time.tv_nsec -= start_time.tv_nsec;
  187. if (end_time.tv_nsec < 0)
  188. {
  189. end_time.tv_nsec += 1000000000;
  190. --end_time.tv_sec;
  191. }
  192. printf ("\nRuntime: %lu.%09lu seconds\n",
  193. (unsigned long int) end_time.tv_sec,
  194. (unsigned long int) end_time.tv_nsec);
  195. }
  196. }
  197. printf ("Result: %08x\n", sum);
  198. exit (0);
  199. }
  200. pthread_mutex_unlock (&running_mutex);
  201. if (sync_method == sync_signal)
  202. {
  203. if (to_thread)
  204. /* This code sends a signal to the main thread. */
  205. pthread_kill (tmain, SIGUSR1);
  206. else
  207. /* Use this code to test sending a signal to the process. */
  208. kill (pid, SIGUSR1);
  209. }
  210. if (progress)
  211. write (STDERR_FILENO, "f", 1);
  212. return NULL;
  213. }
  214. struct start_info
  215. {
  216. unsigned int starts;
  217. unsigned int threads;
  218. };
  219. static void *
  220. start_threads (void *arg)
  221. {
  222. struct start_info *si = arg;
  223. unsigned int starts = si->starts;
  224. pthread_t ths[si->threads];
  225. unsigned int state = starts;
  226. unsigned int n;
  227. unsigned int i = 0;
  228. int err;
  229. if (progress)
  230. write (STDERR_FILENO, "T", 1);
  231. memset (ths, '\0', sizeof (pthread_t) * si->threads);
  232. while (starts-- > 0)
  233. {
  234. if (ths[i] != 0)
  235. {
  236. /* Wait for the threads in the order they were created. */
  237. err = pthread_join (ths[i], NULL);
  238. if (err != 0)
  239. error (EXIT_FAILURE, err, "cannot join thread");
  240. if (progress)
  241. write (STDERR_FILENO, "f", 1);
  242. }
  243. err = pthread_create (&ths[i], &attr, work,
  244. (void *) (long) (rand_r (&state) + starts + i));
  245. if (err != 0)
  246. error (EXIT_FAILURE, err, "cannot start thread");
  247. if (progress)
  248. write (STDERR_FILENO, "t", 1);
  249. if (++i == si->threads)
  250. i = 0;
  251. }
  252. n = i;
  253. do
  254. {
  255. if (ths[i] != 0)
  256. {
  257. err = pthread_join (ths[i], NULL);
  258. if (err != 0)
  259. error (EXIT_FAILURE, err, "cannot join thread");
  260. if (progress)
  261. write (STDERR_FILENO, "f", 1);
  262. }
  263. if (++i == si->threads)
  264. i = 0;
  265. }
  266. while (i != n);
  267. if (progress)
  268. write (STDERR_FILENO, "F", 1);
  269. return NULL;
  270. }
  271. int
  272. main (int argc, char *argv[])
  273. {
  274. int remaining;
  275. sigset_t ss;
  276. pthread_t th;
  277. pthread_t *ths = NULL;
  278. int empty = 0;
  279. int last;
  280. bool cont = true;
  281. /* Parse and process arguments. */
  282. argp_parse (&argp, argc, argv, 0, &remaining, NULL);
  283. if (sync_method == sync_join)
  284. {
  285. ths = (pthread_t *) calloc (threads, sizeof (pthread_t));
  286. if (ths == NULL)
  287. error (EXIT_FAILURE, errno,
  288. "cannot allocate memory for thread descriptor array");
  289. last = threads;
  290. }
  291. else
  292. {
  293. ths = &th;
  294. last = 1;
  295. }
  296. if (toplevel > threads)
  297. {
  298. printf ("resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n",
  299. threads);
  300. toplevel = threads;
  301. }
  302. if (timing)
  303. {
  304. if (clock_getcpuclockid (0, &cl) != 0
  305. || clock_gettime (cl, &start_time) != 0)
  306. timing = false;
  307. }
  308. /* We need this later. */
  309. pid = getpid ();
  310. tmain = pthread_self ();
  311. /* We use signal SIGUSR1 for communication between the threads and
  312. the main thread. We only want sychronous notification. */
  313. if (sync_method == sync_signal)
  314. {
  315. sigemptyset (&ss);
  316. sigaddset (&ss, SIGUSR1);
  317. if (sigprocmask (SIG_BLOCK, &ss, NULL) != 0)
  318. error (EXIT_FAILURE, errno, "cannot set signal mask");
  319. }
  320. /* Create the thread attributes. */
  321. pthread_attr_init (&attr);
  322. /* If the user provided a stack size use it. */
  323. if (stacksize != 0
  324. && pthread_attr_setstacksize (&attr, stacksize) != 0)
  325. puts ("could not set stack size; will use default");
  326. /* And stack guard size. */
  327. if (guardsize != -1
  328. && pthread_attr_setguardsize (&attr, guardsize) != 0)
  329. puts ("invalid stack guard size; will use default");
  330. /* All threads are created detached if we are not using pthread_join
  331. to synchronize. */
  332. if (sync_method != sync_join)
  333. pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
  334. if (sync_method == sync_signal)
  335. {
  336. while (1)
  337. {
  338. int err;
  339. bool do_wait = false;
  340. pthread_mutex_lock (&running_mutex);
  341. if (starts-- < 0)
  342. cont = false;
  343. else
  344. do_wait = ++running >= threads && starts > 0;
  345. pthread_mutex_unlock (&running_mutex);
  346. if (! cont)
  347. break;
  348. if (progress)
  349. write (STDERR_FILENO, "t", 1);
  350. err = pthread_create (&ths[empty], &attr, thread_function,
  351. (void *) starts);
  352. if (err != 0)
  353. error (EXIT_FAILURE, err, "cannot start thread %lu", starts);
  354. if (++empty == last)
  355. empty = 0;
  356. if (do_wait)
  357. sigwaitinfo (&ss, NULL);
  358. }
  359. /* Do nothing anymore. On of the threads will terminate the program. */
  360. sigfillset (&ss);
  361. sigdelset (&ss, SIGINT);
  362. while (1)
  363. sigsuspend (&ss);
  364. }
  365. else
  366. {
  367. pthread_t ths[toplevel];
  368. struct start_info si[toplevel];
  369. unsigned int i;
  370. for (i = 0; i < toplevel; ++i)
  371. {
  372. unsigned int child_starts = starts / (toplevel - i);
  373. unsigned int child_threads = threads / (toplevel - i);
  374. int err;
  375. si[i].starts = child_starts;
  376. si[i].threads = child_threads;
  377. err = pthread_create (&ths[i], &attr, start_threads, &si[i]);
  378. if (err != 0)
  379. error (EXIT_FAILURE, err, "cannot start thread");
  380. starts -= child_starts;
  381. threads -= child_threads;
  382. }
  383. for (i = 0; i < toplevel; ++i)
  384. {
  385. int err = pthread_join (ths[i], NULL);
  386. if (err != 0)
  387. error (EXIT_FAILURE, err, "cannot join thread");
  388. }
  389. /* We are done. */
  390. if (progress)
  391. write (STDERR_FILENO, "\n", 1);
  392. if (timing)
  393. {
  394. struct timespec end_time;
  395. if (clock_gettime (cl, &end_time) == 0)
  396. {
  397. end_time.tv_sec -= start_time.tv_sec;
  398. end_time.tv_nsec -= start_time.tv_nsec;
  399. if (end_time.tv_nsec < 0)
  400. {
  401. end_time.tv_nsec += 1000000000;
  402. --end_time.tv_sec;
  403. }
  404. printf ("\nRuntime: %lu.%09lu seconds\n",
  405. (unsigned long int) end_time.tv_sec,
  406. (unsigned long int) end_time.tv_nsec);
  407. }
  408. }
  409. printf ("Result: %08x\n", sum);
  410. exit (0);
  411. }
  412. /* NOTREACHED */
  413. return 0;
  414. }
  415. /* Handle program arguments. */
  416. static error_t
  417. parse_opt (int key, char *arg, struct argp_state *state)
  418. {
  419. unsigned long int num;
  420. long int snum;
  421. switch (key)
  422. {
  423. case 't':
  424. num = strtoul (arg, NULL, 0);
  425. if (num <= MAX_THREADS)
  426. threads = num;
  427. else
  428. printf ("\
  429. number of threads limited to %u; recompile with a higher limit if necessary",
  430. MAX_THREADS);
  431. break;
  432. case 'w':
  433. num = strtoul (arg, NULL, 0);
  434. if (num <= 100)
  435. workload = num;
  436. else
  437. puts ("workload must be between 0 and 100 percent");
  438. break;
  439. case 'c':
  440. workcost = strtoul (arg, NULL, 0);
  441. break;
  442. case 'r':
  443. rounds = strtoul (arg, NULL, 0);
  444. break;
  445. case 's':
  446. starts = strtoul (arg, NULL, 0);
  447. break;
  448. case 'S':
  449. num = strtoul (arg, NULL, 0);
  450. if (num >= PTHREAD_STACK_MIN)
  451. stacksize = num;
  452. else
  453. printf ("minimum stack size is %d\n", PTHREAD_STACK_MIN);
  454. break;
  455. case 'g':
  456. snum = strtol (arg, NULL, 0);
  457. if (snum < 0)
  458. printf ("invalid guard size %s\n", arg);
  459. else
  460. guardsize = snum;
  461. break;
  462. case 'p':
  463. progress = true;
  464. break;
  465. case 'T':
  466. timing = true;
  467. break;
  468. case OPT_TO_THREAD:
  469. to_thread = true;
  470. break;
  471. case OPT_TO_PROCESS:
  472. to_thread = false;
  473. break;
  474. case OPT_SYNC_SIGNAL:
  475. sync_method = sync_signal;
  476. break;
  477. case OPT_SYNC_JOIN:
  478. sync_method = sync_join;
  479. break;
  480. case OPT_TOPLEVEL:
  481. num = strtoul (arg, NULL, 0);
  482. if (num < MAX_THREADS)
  483. toplevel = num;
  484. else
  485. printf ("\
  486. number of threads limited to %u; recompile with a higher limit if necessary",
  487. MAX_THREADS);
  488. sync_method = sync_join;
  489. break;
  490. default:
  491. return ARGP_ERR_UNKNOWN;
  492. }
  493. return 0;
  494. }
  495. static hp_timing_t
  496. get_clockfreq (void)
  497. {
  498. /* We read the information from the /proc filesystem. It contains at
  499. least one line like
  500. cpu MHz : 497.840237
  501. or also
  502. cpu MHz : 497.841
  503. We search for this line and convert the number in an integer. */
  504. static hp_timing_t result;
  505. int fd;
  506. /* If this function was called before, we know the result. */
  507. if (result != 0)
  508. return result;
  509. fd = open ("/proc/cpuinfo", O_RDONLY);
  510. if (__glibc_likely (fd != -1))
  511. {
  512. /* XXX AFAIK the /proc filesystem can generate "files" only up
  513. to a size of 4096 bytes. */
  514. char buf[4096];
  515. ssize_t n;
  516. n = read (fd, buf, sizeof buf);
  517. if (__builtin_expect (n, 1) > 0)
  518. {
  519. char *mhz = memmem (buf, n, "cpu MHz", 7);
  520. if (__glibc_likely (mhz != NULL))
  521. {
  522. char *endp = buf + n;
  523. int seen_decpoint = 0;
  524. int ndigits = 0;
  525. /* Search for the beginning of the string. */
  526. while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
  527. ++mhz;
  528. while (mhz < endp && *mhz != '\n')
  529. {
  530. if (*mhz >= '0' && *mhz <= '9')
  531. {
  532. result *= 10;
  533. result += *mhz - '0';
  534. if (seen_decpoint)
  535. ++ndigits;
  536. }
  537. else if (*mhz == '.')
  538. seen_decpoint = 1;
  539. ++mhz;
  540. }
  541. /* Compensate for missing digits at the end. */
  542. while (ndigits++ < 6)
  543. result *= 10;
  544. }
  545. }
  546. close (fd);
  547. }
  548. return result;
  549. }
  550. int
  551. clock_getcpuclockid (pid_t pid, clockid_t *clock_id)
  552. {
  553. /* We don't allow any process ID but our own. */
  554. if (pid != 0 && pid != getpid ())
  555. return EPERM;
  556. #ifdef CLOCK_PROCESS_CPUTIME_ID
  557. /* Store the number. */
  558. *clock_id = CLOCK_PROCESS_CPUTIME_ID;
  559. return 0;
  560. #else
  561. /* We don't have a timer for that. */
  562. return ENOENT;
  563. #endif
  564. }
  565. #ifdef i386
  566. #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var))
  567. #elif defined __x86_64__
  568. # define HP_TIMING_NOW(Var) \
  569. ({ unsigned int _hi, _lo; \
  570. asm volatile ("rdtsc" : "=a" (_lo), "=d" (_hi)); \
  571. (Var) = ((unsigned long long int) _hi << 32) | _lo; })
  572. #elif defined __ia64__
  573. #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (Var) : : "memory")
  574. #else
  575. #error "HP_TIMING_NOW missing"
  576. #endif
  577. /* Get current value of CLOCK and store it in TP. */
  578. int
  579. clock_gettime (clockid_t clock_id, struct timespec *tp)
  580. {
  581. int retval = -1;
  582. switch (clock_id)
  583. {
  584. case CLOCK_PROCESS_CPUTIME_ID:
  585. {
  586. static hp_timing_t freq;
  587. hp_timing_t tsc;
  588. /* Get the current counter. */
  589. HP_TIMING_NOW (tsc);
  590. if (freq == 0)
  591. {
  592. freq = get_clockfreq ();
  593. if (freq == 0)
  594. return EINVAL;
  595. }
  596. /* Compute the seconds. */
  597. tp->tv_sec = tsc / freq;
  598. /* And the nanoseconds. This computation should be stable until
  599. we get machines with about 16GHz frequency. */
  600. tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq;
  601. retval = 0;
  602. }
  603. break;
  604. default:
  605. errno = EINVAL;
  606. break;
  607. }
  608. return retval;
  609. }