/linux+v2.6.19/include/linux/sched.h
1503static inline void set_tsk_need_resched(struct task_struct *tsk)
1504{
1505 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1506}
1507
1508static inline void clear_tsk_need_resched(struct task_struct *tsk)
1509{
1510 clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1511}
1512
1513static inline int signal_pending(struct task_struct *p)
1514{
1515 return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
1516}
1517
1518static inline int need_resched(void)
1519{
1520 return unlikely(test_thread_flag(TIF_NEED_RESCHED));
1521}
///////////////////////////////////////////////////////////////////////////////
/linux+v2.6.19/kernel/sched.c
991/*
992 * resched_task - mark a task 'to be rescheduled now'.
993 *
994 * On UP this means the setting of the need_resched flag, on SMP it
995 * might also involve a cross-CPU call to trigger the scheduler on
996 * the target CPU.
997 */
998#ifdef CONFIG_SMP
999
1000#ifndef tsk_is_polling
1001#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
1002#endif
1003
1004static void resched_task(struct task_struct *p)
1005{
1006 int cpu;
1007
1008 assert_spin_locked(&task_rq(p)->lock);
1009
1010 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
1011 return;
1012
1013 set_tsk_thread_flag(p, TIF_NEED_RESCHED);
1014
1015 cpu = task_cpu(p);
1016 if (cpu == smp_processor_id())
1017 return;
1018
1019 /* NEED_RESCHED must be visible before we test polling */
1020 smp_mb();
1021 if (!tsk_is_polling(p))
1022 smp_send_reschedule(cpu);
1023}
1024#else
1025static inline void resched_task(struct task_struct *p)
1026{
1027 assert_spin_locked(&task_rq(p)->lock);
1028 set_tsk_need_resched(p);
1029}
1030#endif
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
1366/***
1367 * try_to_wake_up - wake up a thread
1368 * @p: the to-be-woken-up thread
1369 * @state: the mask of task states that can be woken
1370 * @sync: do a synchronous wakeup?
1371 *
1372 * Put it on the run-queue if it's not already there. The "current"
1373 * thread is always on the run-queue (except when the actual
1374 * re-schedule is in progress), and as such you're allowed to do
1375 * the simpler "current->state = TASK_RUNNING" to mark yourself
1376 * runnable without the overhead of this.
1377 *
1378 * returns failure only if the task is already active.
1379 */
1380static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
1538int fastcall wake_up_process(struct task_struct *p)
1539{
1540 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
1541 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
1542}
1543EXPORT_SYMBOL(wake_up_process);
1545int fastcall wake_up_state(struct task_struct *p, unsigned int state)
1546{
1547 return try_to_wake_up(p, state, 0);
1548}
1616/*
1617 * wake_up_new_task - wake up a newly created task for the first time.
1618 *
1619 * This function will do some initial scheduler statistics housekeeping
1620 * that must be done for every newly created context, then puts the task
1621 * on the runqueue and wakes it.
1622 */
1623void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
3571/*
3572 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
3573 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
3574 * number) then we wake all the non-exclusive tasks and one exclusive task.
3575 *
3576 * There are circumstances in which we can try to wake a task which has already
3577 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
3578 * zero in this (rare) case, and we handle it by continuing to scan the queue.
3579 */
3580static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
3581 int nr_exclusive, int sync, void *key)
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
3595/**
3596 * __wake_up - wake up threads blocked on a waitqueue.
3597 * @q: the waitqueue
3598 * @mode: which threads
3599 * @nr_exclusive: how many wake-one or wake-many threads to wake up
3600 * @key: is directly passed to the wakeup function
3601 */
3602void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
3603 int nr_exclusive, void *key)
3604{
3605 unsigned long flags;
3606
3607 spin_lock_irqsave(&q->lock, flags);
3608 __wake_up_common(q, mode, nr_exclusive, 0, key);
3609 spin_unlock_irqrestore(&q->lock, flags);
3610}
3611EXPORT_SYMBOL(__wake_up);
3564int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
3565 void *key)
3566{
3567 return try_to_wake_up(curr->private, mode, sync);
3568}
3569EXPORT_SYMBOL(default_wake_function);
3652void fastcall complete(struct completion *x)
3653{
3654 unsigned long flags;
3655
3656 spin_lock_irqsave(&x->wait.lock, flags);
3657 x->done++;
3658 __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
3659 1, 0, NULL);
3660 spin_unlock_irqrestore(&x->wait.lock, flags);
3661}
3662EXPORT_SYMBOL(complete);
User Preemption
User preemption occurs when the kernel is about to return to user-space,
need_resched is set, and therefore, the scheduler is invoked.
If the kernel is returning to user-space, it knows it is in a safe quiescent state.
In other words, if it is safe to continue executing the current task,
it is also safe to pick a new task to execute. Consequently,
whenever the kernel is preparing to return to user-space either on return from
an interrupt or after a system call, the value of need_resched is checked.
If it is set, the scheduler is invoked to select a new (more fit) process to execute.
Both the return paths for return from interrupt and return from system call
are architecture dependent and typically implemented in assembly in entry.S
(which, aside from kernel entry code, also contains kernel exit code).
In short, user preemption can occur
When returning to user-space from a system call
When returning to user-space from an interrupt handler
Kernel Preemption
The Linux kernel, unlike most other Unix variants and many other operating systems,
is a fully preemptive kernel. In non-preemptive kernels, kernel code runs until completion.
That is, the scheduler is not capable of rescheduling a task while it is in the kernel.
kernel code is scheduled cooperatively, not preemptively.
Kernel code runs until it finishes (returns to user-space) or explicitly blocks.
In the 2.6 kernel, however, the Linux kernel became preemptive:
It is now possible to preempt a task at any point, so long as the kernel is in a state in which it is safe to reschedule.
So when is it safe to reschedule? The kernel is capable of preempting a task
running in the kernel so long as it does not hold a lock. That is, locks are used as
markers of regions of non-preemptibility. Because the kernel is SMP-safe,
if a lock is not held, the current code is reentrant and capable of being preempted.
The first change in supporting kernel preemption was the addition of a preemption counter,
preempt_count, to each process's thread_info. This counter begins at zero and increments
once for each lock that is acquired and decrements once for each lock that is released.
When the counter is zero, the kernel is preemptible. Upon return from interrupt,
if returning to kernel-space, the kernel checks the values of need_resched and preempt_count.
If need_resched is set and preempt_count is zero, then a more important task is runnable and
it is safe to preempt. Thus, the scheduler is invoked. If preempt_count is nonzero,
a lock is held and it is unsafe to reschedule. In that case, the interrupt returns
as usual to the currently executing task. When all the locks that the current task is holding are released,
preempt_count returns to zero. At that time, the unlock code checks whether need_resched is set.
If so, the scheduler is invoked. Enabling and disabling kernel preemption is sometimes
required in kernel code and is discussed in Chapter 9
.
Kernel preemption can also occur explicitly, when a task in the kernel blocks or explicitly calls schedule().
This form of kernel preemption has always been supported because no additional logic is
required to ensure that the kernel is in a state that is safe to preempt.
It is assumed that the code that explicitly calls schedule() knows it is safe to reschedule.
Kernel preemption can occur
When an interrupt handler exits, before returning to kernel-space
When kernel code becomes preemptible again
If a task in the kernel explicitly calls schedule()
If a task in the kernel blocks (which results in a call to schedule())