准备 MySQL 内核版本: 8.0.17
在MySQL中,当两个或两个以上的事务相互持有或者请求锁,并形成一个循环的依赖关系,就会产生死锁. 多个事务同时锁定同一个资源时,也会产生死锁. 在一个事务系统中,死锁是确切存在并且是不能完全避免的. InnoDB 会在每一个事务申请锁时触发死锁检测,并选择一个事务回滚.
在 MySQL 中,事务在申请 record lock 后假如无法立即获取锁会进行死锁检测. 在事务的回滚中,会释放该事务持有的所有 lock.
用户可以配置--innodb-deadlock-detect[={OFF|ON}]
选择是否打开死锁检测.
死锁检测 我们从源码层面分析 MySQL 的死锁检测机制,直接通过源码分析可以更直观的介绍死锁检测机制. MySQL 的死锁检测算法是深度优先搜索,如果在搜索过程中发现了环,就说明发生了死锁. 为了避免死锁检测开销过大,如果搜索深度超过了 200(LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK)也同样认为发生了死锁。
基本的代码流程如下, add_to_waitq()
是申请 Record Lock 的入口函数:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 ------------------------- | RecLock::add_to_waitq () | ------------------------- | | | ------------------- --> | RecLock::create () | | ------------------- | | | | | | ----------------------- | --> | RecLock::lock_alloc () | | | ----------------------- | | | | | | --------------------- | --> | RecLock::lock_add () | | --------------------- | | | ----------------------- --> | RecLock::jump_queue () | | ----------------------- | | | --------------------------- --> | RecLock::deadlock_check () | --------------------------- | | | -------------------------------------- --> | DeadlockChecker::check_and_resolve () | | -------------------------------------- | | | ---------------------------------- --> | RecLock::check_deadlock_result () | ----------------------------------
死锁检测的主流程代码在DeadlockChecker::check_and_resolve()
:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 const trx_t *DeadlockChecker::check_and_resolve (const lock_t *lock, trx_t *trx) { ut_ad (lock_mutex_own ()); ut_ad (trx_mutex_own (trx)); check_trx_state (trx); ut_ad (!srv_read_only_mode); if (trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) { return (trx); } else if (!innobase_deadlock_detect) { return (NULL ); } const bool was_trx_mutex_ownership_tracked = trx->owns_mutex; trx->owns_mutex = false ; trx_mutex_exit (trx); const trx_t *victim_trx; do { DeadlockChecker checker (trx, lock, s_lock_mark_counter) ; victim_trx = checker.search (); if (checker.is_too_deep ()) { ut_ad (trx == checker.m_start); ut_ad (trx == victim_trx); rollback_print (victim_trx, lock); MONITOR_INC (MONITOR_DEADLOCK); break ; } else if (victim_trx != NULL && victim_trx != trx) { ut_ad (victim_trx == checker.m_wait_lock->trx); checker.trx_rollback (); lock_deadlock_found = true ; MONITOR_INC (MONITOR_DEADLOCK); } } while (victim_trx != NULL && victim_trx != trx); trx_mutex_enter (trx); trx->owns_mutex = was_trx_mutex_ownership_tracked; return (victim_trx); }
关于MySQL死锁检测如何判断是否存在死锁核心代码在函数DeadlockChecker::search()
:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 const trx_t *DeadlockChecker::search () { ut_ad (lock_mutex_own ()); ut_ad (!trx_mutex_own (m_start)); ut_ad (m_start != NULL ); ut_ad (m_wait_lock != NULL ); check_trx_state (m_wait_lock->trx); ut_ad (m_mark_start <= s_lock_mark_counter); ulint heap_no; const lock_t *lock = get_first_lock (&heap_no); for (;;) { ut_ad (lock == NULL || !is_visited (lock)); while (m_n_elems > 1 && lock == NULL ) { pop (lock, heap_no); lock = get_next_lock (lock, heap_no); } if (lock == NULL ) { break ; } else if (lock == m_wait_lock) { lock->trx->lock.deadlock_mark = ++s_lock_mark_counter; ut_ad (s_lock_mark_counter > 0 ); lock = NULL ; } else if (!lock_has_to_wait (m_wait_lock, lock)) { lock = get_next_lock (lock, heap_no); } else if (lock->trx == m_start) { notify (lock); return (select_victim ()); } else if (is_too_deep ()) { m_too_deep = true ; return (m_start); } else if (lock->trx_que_state () == TRX_QUE_LOCK_WAIT) { ++m_cost; if (!push (lock, heap_no)) { m_too_deep = true ; return (m_start); } m_wait_lock = lock->trx->lock.wait_lock; lock = get_first_lock (&heap_no); if (is_visited (lock)) { lock = get_next_lock (lock, heap_no); } } else { lock = get_next_lock (lock, heap_no); } } ut_a (lock == NULL && m_n_elems == 0 ); return (0 ); }
select_victim()
返回一个选中需要被回滚的事务,MySQL 并不会迭代所有的 trx 来选择一个代价较小的事务,仅仅在m_start
和m_wait_lock->trx
这两个事务中选一个优先级较低的事务回滚.