sched: prevent wakeup over-scheduling

Prevent wakeup over-scheduling.  Once a task has been preempted by a
task of the same or lower priority, it becomes ineligible for repeated
preemption by same until it has been ticked, or slept.  Instead, the
task is marked for preemption at the next tick.  Tasks of higher
priority still preempt immediately.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04233c8..8be5b57 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -912,6 +912,7 @@
 	struct load_weight	load;		/* for load-balancing */
 	struct rb_node		run_node;
 	unsigned int		on_rq;
+	int			peer_preempt;
 
 	u64			exec_start;
 	u64			sum_exec_runtime;
diff --git a/kernel/sched.c b/kernel/sched.c
index 0bd8f2c..e8051bd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -445,6 +445,7 @@
 	SCHED_FEAT_TREE_AVG             = 4,
 	SCHED_FEAT_APPROX_AVG           = 8,
 	SCHED_FEAT_WAKEUP_PREEMPT	= 16,
+	SCHED_FEAT_PREEMPT_RESTRICT	= 32,
 };
 
 const_debug unsigned int sysctl_sched_features =
@@ -452,7 +453,8 @@
 		SCHED_FEAT_START_DEBIT		*1 |
 		SCHED_FEAT_TREE_AVG		*0 |
 		SCHED_FEAT_APPROX_AVG		*0 |
-		SCHED_FEAT_WAKEUP_PREEMPT	*1;
+		SCHED_FEAT_WAKEUP_PREEMPT	*1 |
+		SCHED_FEAT_PREEMPT_RESTRICT	*1;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3843ec7..f819f94 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -526,6 +526,7 @@
 
 	update_stats_dequeue(cfs_rq, se);
 	if (sleep) {
+		se->peer_preempt = 0;
 #ifdef CONFIG_SCHEDSTATS
 		if (entity_is_task(se)) {
 			struct task_struct *tsk = task_of(se);
@@ -553,8 +554,10 @@
 
 	ideal_runtime = sched_slice(cfs_rq, curr);
 	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
-	if (delta_exec > ideal_runtime)
+	if (delta_exec > ideal_runtime ||
+			(sched_feat(PREEMPT_RESTRICT) && curr->peer_preempt))
 		resched_task(rq_of(cfs_rq)->curr);
+	curr->peer_preempt = 0;
 }
 
 static void
@@ -839,8 +842,12 @@
 		if (unlikely(se->load.weight != NICE_0_LOAD))
 			gran = calc_delta_fair(gran, &se->load);
 
-		if (delta > gran)
-			resched_task(curr);
+		if (delta > gran) {
+			int now = !sched_feat(PREEMPT_RESTRICT);
+
+			if (now || p->prio < curr->prio || !se->peer_preempt++)
+				resched_task(curr);
+		}
 	}
 }
 
@@ -1034,6 +1041,7 @@
 	check_spread(cfs_rq, curr);
 	__enqueue_entity(cfs_rq, se);
 	account_entity_enqueue(cfs_rq, se);
+	se->peer_preempt = 0;
 	resched_task(rq->curr);
 }