Web lists-archives.com

[PATCH 2/3] sched/swait: Switch to full exclusive mode




Linus noted that swait basically implements exclusive mode -- because
swake_up() only wakes a single waiter. And because of that it should
take care to properly deal with the interruptible case.

In short, the problem is that swake_up() can race with a signal. In
this this case it is possible the swake_up() 'wakes' the waiter that
is already on the way out because it just got a signal and the wakeup
gets lost.

The normal wait code is very careful and avoids this situation, make
sure we do too.

Copy the exact exclusive semantics from wait.

Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
 include/linux/swait.h |   11 ++++++-----
 kernel/sched/swait.c  |   22 +++++++++++++++++-----
 2 files changed, 23 insertions(+), 10 deletions(-)

--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -38,8 +38,8 @@
  *    all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
  *    sleeper state.
  *
- *  - the exclusive mode; because this requires preserving the list order
- *    and this is hard.
+ *  - the !exclusive mode; because that leads to O(n) wakeups, everything is
+ *    exclusive.
  *
  *  - custom wake callback functions; because you cannot give any guarantees
  *    about random code. This also allows swait to be used in RT, such that
@@ -167,9 +167,10 @@ extern long prepare_to_swait_event(struc
 extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 
-/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
+/* as per ___wait_event() but for swait, therefore "exclusive == 1" */
 #define ___swait_event(wq, condition, state, ret, cmd)			\
 ({									\
+	__label__ __out;						\
 	struct swait_queue __wait;					\
 	long __ret = ret;						\
 									\
@@ -182,13 +183,13 @@ extern void finish_swait(struct swait_qu
 									\
 		if (___wait_is_interruptible(state) && __int) {		\
 			__ret = __int;					\
-			break;						\
+			goto __out;					\
 		}							\
 									\
 		cmd;							\
 	}								\
 	finish_swait(&wq, &__wait);					\
-	__ret;								\
+__out:	__ret;								\
 })
 
 #define __swait_event(wq, condition)					\
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -73,7 +73,7 @@ static void __prepare_to_swait(struct sw
 {
 	wait->task = current;
 	if (list_empty(&wait->task_list))
-		list_add(&wait->task_list, &q->task_list);
+		list_add_tail(&wait->task_list, &q->task_list);
 }
 
 void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
@@ -89,12 +89,24 @@ EXPORT_SYMBOL(prepare_to_swait);
 
 long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
 {
-	if (signal_pending_state(state, current))
-		return -ERESTARTSYS;
+	unsigned long flags;
+	long ret = 0;
 
-	prepare_to_swait(q, wait, state);
+	raw_spin_lock_irqsave(&q->lock, flags);
+	if (unlikely(signal_pending_state(state, current))) {
+		/*
+		 * See prepare_to_wait_event(). TL;DR, subsequent swake_up()
+		 * must not see us.
+		 */
+		list_del_init(&wait->task_list);
+		ret = -ERESTARTSYS;
+	} else {
+		__prepare_to_swait(q, wait);
+		set_current_state(state);
+	}
+	raw_spin_unlock_irqrestore(&q->lock, flags);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(prepare_to_swait_event);