[PATCH] microoptimize locking primitives by avoiding unnecessary atomic ops

From: Mateusz Guzik <mjguzik_at_gmail.com>
Date: Fri, 27 May 2016 21:17:01 +0200
Hello there,

quite some time ago I posted a trivial patch to locking primitives. What
they do is the inline part tries an atomic op and if that fails the
actual function is called, which immediately tries the same op.

The obvious optimisation checks for the availability of the lock first.

There concerns about the way it was done previously by relying on
volatile behaving in a specific way.

Later a simplified version was posted which should not have the concern,
but the thread died.

I refer you to https://lists.freebsd.org/pipermail/freebsd-current/2015-November/058100.html
for simple benchmark results.

I would like to get the patch in before 11 freeze.

diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
index 34d7b34..1b10623 100644
--- a/sys/kern/kern_lock.c
+++ b/sys/kern/kern_lock.c
_at__at_ -787,8 +787,10 _at__at_ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
 			break;
 		}
 
-		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
-		    tid)) {
+		for (;;) {
+			if (lk->lk_lock == LK_UNLOCKED &&
+			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
+				break;
 #ifdef HWPMC_HOOKS
 			PMC_SOFT_CALL( , , lock, failed);
 #endif
_at__at_ -1124,7 +1126,11 _at__at_ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
 			    __func__, iwmesg, file, line);
 		}
 
-		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
+		for (;;) {
+			if (lk->lk_lock == LK_UNLOCKED &&
+			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
+				break;
+
 #ifdef HWPMC_HOOKS
 			PMC_SOFT_CALL( , , lock, failed);
 #endif
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 3f62d17..d167205 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
_at__at_ -419,7 +419,9 _at__at_ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
 	all_time -= lockstat_nsecs(&m->lock_object);
 #endif
 
-	while (!_mtx_obtain_lock(m, tid)) {
+	for (;;) {
+		if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+			break;
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
_at__at_ -602,8 +604,9 _at__at_ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
 #ifdef KDTRACE_HOOKS
 	spin_time -= lockstat_nsecs(&m->lock_object);
 #endif
-	while (!_mtx_obtain_lock(m, tid)) {
-
+	for (;;) {
+		if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+			break;
 		/* Give interrupts a chance while we spin. */
 		spinlock_exit();
 		while (m->mtx_lock != MTX_UNOWNED) {
_at__at_ -675,7 +678,9 _at__at_ retry:
 			    m->lock_object.lo_name, file, line));
 		WITNESS_CHECKORDER(&m->lock_object,
 		    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
-		while (!_mtx_obtain_lock(m, tid)) {
+		for (;;) {
+			if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+				break;
 			if (m->mtx_lock == tid) {
 				m->mtx_recurse++;
 				break;
diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c
index 6541724..6a904d2 100644
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
_at__at_ -771,7 +771,9 _at__at_ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
 	all_time -= lockstat_nsecs(&rw->lock_object);
 	state = rw->rw_lock;
 #endif
-	while (!_rw_write_lock(rw, tid)) {
+	for (;;) {
+		if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid))
+			break;
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 96e117b..2a81c04 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
_at__at_ -544,7 +544,10 _at__at_ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
 	all_time -= lockstat_nsecs(&sx->lock_object);
 	state = sx->sx_lock;
 #endif
-	while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
+	for (;;) {
+		if (sx->sx_lock == SX_LOCK_UNLOCKED &&
+		    atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
+			break;
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h
index a9ec072..0443922 100644
--- a/sys/sys/mutex.h
+++ b/sys/sys/mutex.h
_at__at_ -185,7 +185,7 _at__at_ void	thread_lock_flags_(struct thread *, int, const char *, int);
 #define __mtx_lock(mp, tid, opts, file, line) do {			\
 	uintptr_t _tid = (uintptr_t)(tid);				\
 									\
-	if (!_mtx_obtain_lock((mp), _tid))				\
+	if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid)))\
 		_mtx_lock_sleep((mp), _tid, (opts), (file), (line));	\
 	else								\
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,	\
_at__at_ -203,7 +203,7 _at__at_ void	thread_lock_flags_(struct thread *, int, const char *, int);
 	uintptr_t _tid = (uintptr_t)(tid);				\
 									\
 	spinlock_enter();						\
-	if (!_mtx_obtain_lock((mp), _tid)) {				\
+	if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid))) {\
 		if ((mp)->mtx_lock == _tid)				\
 			(mp)->mtx_recurse++;				\
 		else							\
_at__at_ -232,7 +232,7 _at__at_ void	thread_lock_flags_(struct thread *, int, const char *, int);
 									\
 	if ((mp)->mtx_recurse == 0)					\
 		LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, mp);	\
-	if (!_mtx_release_lock((mp), _tid))				\
+	if ((mp)->mtx_lock != _tid || !_mtx_release_lock((mp), _tid))	\
 		_mtx_unlock_sleep((mp), (opts), (file), (line));	\
 } while (0)
 
diff --git a/sys/sys/rwlock.h b/sys/sys/rwlock.h
index f8947c5..6b4f505 100644
--- a/sys/sys/rwlock.h
+++ b/sys/sys/rwlock.h
_at__at_ -96,7 +96,7 _at__at_
 #define	__rw_wlock(rw, tid, file, line) do {				\
 	uintptr_t _tid = (uintptr_t)(tid);				\
 									\
-	if (!_rw_write_lock((rw), _tid))				\
+	if ((rw)->rw_lock != RW_UNLOCKED || !_rw_write_lock((rw), _tid))\
 		_rw_wlock_hard((rw), _tid, (file), (line));		\
 	else 								\
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,	\
_at__at_ -112,7 +112,7 _at__at_
 	else {								\
 		LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw,	\
 		    LOCKSTAT_WRITER);					\
-		if (!_rw_write_unlock((rw), _tid))			\
+		if ((rw)->rw_lock != _tid || !_rw_write_unlock((rw), _tid))\
 			_rw_wunlock_hard((rw), _tid, (file), (line));	\
 	}								\
 } while (0)
diff --git a/sys/sys/sx.h b/sys/sys/sx.h
index 03b51d3..57a31d9 100644
--- a/sys/sys/sx.h
+++ b/sys/sys/sx.h
_at__at_ -150,7 +150,8 _at__at_ __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file,
 	uintptr_t tid = (uintptr_t)td;
 	int error = 0;
 
-	if (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
+	if (sx->sx_lock != SX_LOCK_UNLOCKED ||
+	    !atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
 		error = _sx_xlock_hard(sx, tid, opts, file, line);
 	else 
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
_at__at_ -168,7 +169,8 _at__at_ __sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line)
 	if (sx->sx_recurse == 0)
 		LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx,
 		    LOCKSTAT_WRITER);
-	if (!atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
+	if (sx->sx_lock != tid ||
+	    !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
 		_sx_xunlock_hard(sx, tid, file, line);
 }
 
Received on Fri May 27 2016 - 17:17:06 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:05 UTC