dm-block-manager.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631
  1. /*
  2. * Copyright (C) 2011 Red Hat, Inc.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm-block-manager.h"
  7. #include "dm-persistent-data-internal.h"
  8. #include "../dm-bufio.h"
  9. #include <linux/crc32c.h>
  10. #include <linux/module.h>
  11. #include <linux/slab.h>
  12. #include <linux/rwsem.h>
  13. #include <linux/device-mapper.h>
  14. #include <linux/stacktrace.h>
  15. #define DM_MSG_PREFIX "block manager"
  16. /*----------------------------------------------------------------*/
  17. /*
  18. * This is a read/write semaphore with a couple of differences.
  19. *
  20. * i) There is a restriction on the number of concurrent read locks that
  21. * may be held at once. This is just an implementation detail.
  22. *
  23. * ii) Recursive locking attempts are detected and return EINVAL. A stack
  24. * trace is also emitted for the previous lock acquisition.
  25. *
  26. * iii) Priority is given to write locks.
  27. */
  28. #define MAX_HOLDERS 4
  29. #define MAX_STACK 10
  30. typedef unsigned long stack_entries[MAX_STACK];
  31. struct block_lock {
  32. spinlock_t lock;
  33. __s32 count;
  34. struct list_head waiters;
  35. struct task_struct *holders[MAX_HOLDERS];
  36. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  37. struct stack_trace traces[MAX_HOLDERS];
  38. stack_entries entries[MAX_HOLDERS];
  39. #endif
  40. };
  41. struct waiter {
  42. struct list_head list;
  43. struct task_struct *task;
  44. int wants_write;
  45. };
  46. static unsigned __find_holder(struct block_lock *lock,
  47. struct task_struct *task)
  48. {
  49. unsigned i;
  50. for (i = 0; i < MAX_HOLDERS; i++)
  51. if (lock->holders[i] == task)
  52. break;
  53. BUG_ON(i == MAX_HOLDERS);
  54. return i;
  55. }
  56. /* call this *after* you increment lock->count */
  57. static void __add_holder(struct block_lock *lock, struct task_struct *task)
  58. {
  59. unsigned h = __find_holder(lock, NULL);
  60. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  61. struct stack_trace *t;
  62. #endif
  63. get_task_struct(task);
  64. lock->holders[h] = task;
  65. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  66. t = lock->traces + h;
  67. t->nr_entries = 0;
  68. t->max_entries = MAX_STACK;
  69. t->entries = lock->entries[h];
  70. t->skip = 2;
  71. save_stack_trace(t);
  72. #endif
  73. }
  74. /* call this *before* you decrement lock->count */
  75. static void __del_holder(struct block_lock *lock, struct task_struct *task)
  76. {
  77. unsigned h = __find_holder(lock, task);
  78. lock->holders[h] = NULL;
  79. put_task_struct(task);
  80. }
  81. static int __check_holder(struct block_lock *lock)
  82. {
  83. unsigned i;
  84. for (i = 0; i < MAX_HOLDERS; i++) {
  85. if (lock->holders[i] == current) {
  86. DMERR("recursive lock detected in metadata");
  87. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  88. DMERR("previously held here:");
  89. print_stack_trace(lock->traces + i, 4);
  90. DMERR("subsequent acquisition attempted here:");
  91. dump_stack();
  92. #endif
  93. return -EINVAL;
  94. }
  95. }
  96. return 0;
  97. }
  98. static void __wait(struct waiter *w)
  99. {
  100. for (;;) {
  101. set_task_state(current, TASK_UNINTERRUPTIBLE);
  102. if (!w->task)
  103. break;
  104. schedule();
  105. }
  106. set_task_state(current, TASK_RUNNING);
  107. }
  108. static void __wake_waiter(struct waiter *w)
  109. {
  110. struct task_struct *task;
  111. list_del(&w->list);
  112. task = w->task;
  113. smp_mb();
  114. w->task = NULL;
  115. wake_up_process(task);
  116. }
  117. /*
  118. * We either wake a few readers or a single writer.
  119. */
  120. static void __wake_many(struct block_lock *lock)
  121. {
  122. struct waiter *w, *tmp;
  123. BUG_ON(lock->count < 0);
  124. list_for_each_entry_safe(w, tmp, &lock->waiters, list) {
  125. if (lock->count >= MAX_HOLDERS)
  126. return;
  127. if (w->wants_write) {
  128. if (lock->count > 0)
  129. return; /* still read locked */
  130. lock->count = -1;
  131. __add_holder(lock, w->task);
  132. __wake_waiter(w);
  133. return;
  134. }
  135. lock->count++;
  136. __add_holder(lock, w->task);
  137. __wake_waiter(w);
  138. }
  139. }
  140. static void bl_init(struct block_lock *lock)
  141. {
  142. int i;
  143. spin_lock_init(&lock->lock);
  144. lock->count = 0;
  145. INIT_LIST_HEAD(&lock->waiters);
  146. for (i = 0; i < MAX_HOLDERS; i++)
  147. lock->holders[i] = NULL;
  148. }
  149. static int __available_for_read(struct block_lock *lock)
  150. {
  151. return lock->count >= 0 &&
  152. lock->count < MAX_HOLDERS &&
  153. list_empty(&lock->waiters);
  154. }
  155. static int bl_down_read(struct block_lock *lock)
  156. {
  157. int r;
  158. struct waiter w;
  159. spin_lock(&lock->lock);
  160. r = __check_holder(lock);
  161. if (r) {
  162. spin_unlock(&lock->lock);
  163. return r;
  164. }
  165. if (__available_for_read(lock)) {
  166. lock->count++;
  167. __add_holder(lock, current);
  168. spin_unlock(&lock->lock);
  169. return 0;
  170. }
  171. get_task_struct(current);
  172. w.task = current;
  173. w.wants_write = 0;
  174. list_add_tail(&w.list, &lock->waiters);
  175. spin_unlock(&lock->lock);
  176. __wait(&w);
  177. put_task_struct(current);
  178. return 0;
  179. }
  180. static int bl_down_read_nonblock(struct block_lock *lock)
  181. {
  182. int r;
  183. spin_lock(&lock->lock);
  184. r = __check_holder(lock);
  185. if (r)
  186. goto out;
  187. if (__available_for_read(lock)) {
  188. lock->count++;
  189. __add_holder(lock, current);
  190. r = 0;
  191. } else
  192. r = -EWOULDBLOCK;
  193. out:
  194. spin_unlock(&lock->lock);
  195. return r;
  196. }
  197. static void bl_up_read(struct block_lock *lock)
  198. {
  199. spin_lock(&lock->lock);
  200. BUG_ON(lock->count <= 0);
  201. __del_holder(lock, current);
  202. --lock->count;
  203. if (!list_empty(&lock->waiters))
  204. __wake_many(lock);
  205. spin_unlock(&lock->lock);
  206. }
  207. static int bl_down_write(struct block_lock *lock)
  208. {
  209. int r;
  210. struct waiter w;
  211. spin_lock(&lock->lock);
  212. r = __check_holder(lock);
  213. if (r) {
  214. spin_unlock(&lock->lock);
  215. return r;
  216. }
  217. if (lock->count == 0 && list_empty(&lock->waiters)) {
  218. lock->count = -1;
  219. __add_holder(lock, current);
  220. spin_unlock(&lock->lock);
  221. return 0;
  222. }
  223. get_task_struct(current);
  224. w.task = current;
  225. w.wants_write = 1;
  226. /*
  227. * Writers given priority. We know there's only one mutator in the
  228. * system, so ignoring the ordering reversal.
  229. */
  230. list_add(&w.list, &lock->waiters);
  231. spin_unlock(&lock->lock);
  232. __wait(&w);
  233. put_task_struct(current);
  234. return 0;
  235. }
  236. static void bl_up_write(struct block_lock *lock)
  237. {
  238. spin_lock(&lock->lock);
  239. __del_holder(lock, current);
  240. lock->count = 0;
  241. if (!list_empty(&lock->waiters))
  242. __wake_many(lock);
  243. spin_unlock(&lock->lock);
  244. }
  245. static void report_recursive_bug(dm_block_t b, int r)
  246. {
  247. if (r == -EINVAL)
  248. DMERR("recursive acquisition of block %llu requested.",
  249. (unsigned long long) b);
  250. }
  251. /*----------------------------------------------------------------*/
  252. /*
  253. * Block manager is currently implemented using dm-bufio. struct
  254. * dm_block_manager and struct dm_block map directly onto a couple of
  255. * structs in the bufio interface. I want to retain the freedom to move
  256. * away from bufio in the future. So these structs are just cast within
  257. * this .c file, rather than making it through to the public interface.
  258. */
  259. static struct dm_buffer *to_buffer(struct dm_block *b)
  260. {
  261. return (struct dm_buffer *) b;
  262. }
  263. dm_block_t dm_block_location(struct dm_block *b)
  264. {
  265. return dm_bufio_get_block_number(to_buffer(b));
  266. }
  267. EXPORT_SYMBOL_GPL(dm_block_location);
  268. void *dm_block_data(struct dm_block *b)
  269. {
  270. return dm_bufio_get_block_data(to_buffer(b));
  271. }
  272. EXPORT_SYMBOL_GPL(dm_block_data);
  273. struct buffer_aux {
  274. struct dm_block_validator *validator;
  275. struct block_lock lock;
  276. int write_locked;
  277. };
  278. static void dm_block_manager_alloc_callback(struct dm_buffer *buf)
  279. {
  280. struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
  281. aux->validator = NULL;
  282. bl_init(&aux->lock);
  283. }
  284. static void dm_block_manager_write_callback(struct dm_buffer *buf)
  285. {
  286. struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
  287. if (aux->validator) {
  288. aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf,
  289. dm_bufio_get_block_size(dm_bufio_get_client(buf)));
  290. }
  291. }
  292. /*----------------------------------------------------------------
  293. * Public interface
  294. *--------------------------------------------------------------*/
  295. struct dm_block_manager {
  296. struct dm_bufio_client *bufio;
  297. bool read_only:1;
  298. };
  299. struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
  300. unsigned block_size,
  301. unsigned cache_size,
  302. unsigned max_held_per_thread)
  303. {
  304. int r;
  305. struct dm_block_manager *bm;
  306. bm = kmalloc(sizeof(*bm), GFP_KERNEL);
  307. if (!bm) {
  308. r = -ENOMEM;
  309. goto bad;
  310. }
  311. bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread,
  312. sizeof(struct buffer_aux),
  313. dm_block_manager_alloc_callback,
  314. dm_block_manager_write_callback);
  315. if (IS_ERR(bm->bufio)) {
  316. r = PTR_ERR(bm->bufio);
  317. kfree(bm);
  318. goto bad;
  319. }
  320. bm->read_only = false;
  321. return bm;
  322. bad:
  323. return ERR_PTR(r);
  324. }
  325. EXPORT_SYMBOL_GPL(dm_block_manager_create);
  326. void dm_block_manager_destroy(struct dm_block_manager *bm)
  327. {
  328. dm_bufio_client_destroy(bm->bufio);
  329. kfree(bm);
  330. }
  331. EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
  332. unsigned dm_bm_block_size(struct dm_block_manager *bm)
  333. {
  334. return dm_bufio_get_block_size(bm->bufio);
  335. }
  336. EXPORT_SYMBOL_GPL(dm_bm_block_size);
  337. dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
  338. {
  339. return dm_bufio_get_device_size(bm->bufio);
  340. }
  341. static int dm_bm_validate_buffer(struct dm_block_manager *bm,
  342. struct dm_buffer *buf,
  343. struct buffer_aux *aux,
  344. struct dm_block_validator *v)
  345. {
  346. if (unlikely(!aux->validator)) {
  347. int r;
  348. if (!v)
  349. return 0;
  350. r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio));
  351. if (unlikely(r)) {
  352. DMERR_LIMIT("%s validator check failed for block %llu", v->name,
  353. (unsigned long long) dm_bufio_get_block_number(buf));
  354. return r;
  355. }
  356. aux->validator = v;
  357. } else {
  358. if (unlikely(aux->validator != v)) {
  359. DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu",
  360. aux->validator->name, v ? v->name : "NULL",
  361. (unsigned long long) dm_bufio_get_block_number(buf));
  362. return -EINVAL;
  363. }
  364. }
  365. return 0;
  366. }
  367. int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
  368. struct dm_block_validator *v,
  369. struct dm_block **result)
  370. {
  371. struct buffer_aux *aux;
  372. void *p;
  373. int r;
  374. p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
  375. if (IS_ERR(p))
  376. return PTR_ERR(p);
  377. aux = dm_bufio_get_aux_data(to_buffer(*result));
  378. r = bl_down_read(&aux->lock);
  379. if (unlikely(r)) {
  380. dm_bufio_release(to_buffer(*result));
  381. report_recursive_bug(b, r);
  382. return r;
  383. }
  384. aux->write_locked = 0;
  385. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  386. if (unlikely(r)) {
  387. bl_up_read(&aux->lock);
  388. dm_bufio_release(to_buffer(*result));
  389. return r;
  390. }
  391. return 0;
  392. }
  393. EXPORT_SYMBOL_GPL(dm_bm_read_lock);
  394. int dm_bm_write_lock(struct dm_block_manager *bm,
  395. dm_block_t b, struct dm_block_validator *v,
  396. struct dm_block **result)
  397. {
  398. struct buffer_aux *aux;
  399. void *p;
  400. int r;
  401. if (bm->read_only)
  402. return -EPERM;
  403. p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
  404. if (IS_ERR(p))
  405. return PTR_ERR(p);
  406. aux = dm_bufio_get_aux_data(to_buffer(*result));
  407. r = bl_down_write(&aux->lock);
  408. if (r) {
  409. dm_bufio_release(to_buffer(*result));
  410. report_recursive_bug(b, r);
  411. return r;
  412. }
  413. aux->write_locked = 1;
  414. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  415. if (unlikely(r)) {
  416. bl_up_write(&aux->lock);
  417. dm_bufio_release(to_buffer(*result));
  418. return r;
  419. }
  420. return 0;
  421. }
  422. EXPORT_SYMBOL_GPL(dm_bm_write_lock);
  423. int dm_bm_read_try_lock(struct dm_block_manager *bm,
  424. dm_block_t b, struct dm_block_validator *v,
  425. struct dm_block **result)
  426. {
  427. struct buffer_aux *aux;
  428. void *p;
  429. int r;
  430. p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result);
  431. if (IS_ERR(p))
  432. return PTR_ERR(p);
  433. if (unlikely(!p))
  434. return -EWOULDBLOCK;
  435. aux = dm_bufio_get_aux_data(to_buffer(*result));
  436. r = bl_down_read_nonblock(&aux->lock);
  437. if (r < 0) {
  438. dm_bufio_release(to_buffer(*result));
  439. report_recursive_bug(b, r);
  440. return r;
  441. }
  442. aux->write_locked = 0;
  443. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  444. if (unlikely(r)) {
  445. bl_up_read(&aux->lock);
  446. dm_bufio_release(to_buffer(*result));
  447. return r;
  448. }
  449. return 0;
  450. }
  451. int dm_bm_write_lock_zero(struct dm_block_manager *bm,
  452. dm_block_t b, struct dm_block_validator *v,
  453. struct dm_block **result)
  454. {
  455. int r;
  456. struct buffer_aux *aux;
  457. void *p;
  458. if (bm->read_only)
  459. return -EPERM;
  460. p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result);
  461. if (IS_ERR(p))
  462. return PTR_ERR(p);
  463. memset(p, 0, dm_bm_block_size(bm));
  464. aux = dm_bufio_get_aux_data(to_buffer(*result));
  465. r = bl_down_write(&aux->lock);
  466. if (r) {
  467. dm_bufio_release(to_buffer(*result));
  468. return r;
  469. }
  470. aux->write_locked = 1;
  471. aux->validator = v;
  472. return 0;
  473. }
  474. EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero);
  475. void dm_bm_unlock(struct dm_block *b)
  476. {
  477. struct buffer_aux *aux;
  478. aux = dm_bufio_get_aux_data(to_buffer(b));
  479. if (aux->write_locked) {
  480. dm_bufio_mark_buffer_dirty(to_buffer(b));
  481. bl_up_write(&aux->lock);
  482. } else
  483. bl_up_read(&aux->lock);
  484. dm_bufio_release(to_buffer(b));
  485. }
  486. EXPORT_SYMBOL_GPL(dm_bm_unlock);
  487. int dm_bm_flush(struct dm_block_manager *bm)
  488. {
  489. if (bm->read_only)
  490. return -EPERM;
  491. return dm_bufio_write_dirty_buffers(bm->bufio);
  492. }
  493. EXPORT_SYMBOL_GPL(dm_bm_flush);
  494. void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
  495. {
  496. dm_bufio_prefetch(bm->bufio, b, 1);
  497. }
  498. bool dm_bm_is_read_only(struct dm_block_manager *bm)
  499. {
  500. return bm->read_only;
  501. }
  502. EXPORT_SYMBOL_GPL(dm_bm_is_read_only);
  503. void dm_bm_set_read_only(struct dm_block_manager *bm)
  504. {
  505. bm->read_only = true;
  506. }
  507. EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
  508. void dm_bm_set_read_write(struct dm_block_manager *bm)
  509. {
  510. bm->read_only = false;
  511. }
  512. EXPORT_SYMBOL_GPL(dm_bm_set_read_write);
  513. u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
  514. {
  515. return crc32c(~(u32) 0, data, len) ^ init_xor;
  516. }
  517. EXPORT_SYMBOL_GPL(dm_bm_checksum);
  518. /*----------------------------------------------------------------*/
  519. MODULE_LICENSE("GPL");
  520. MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
  521. MODULE_DESCRIPTION("Immutable metadata library for dm");
  522. /*----------------------------------------------------------------*/