/linux+v2.6.19/include/linux/sched.h
1503static inline void set_tsk_need_resched(struct task_struct *tsk)
1504{
1505 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1506}
1507
1508static inline void clear_tsk_need_resched(struct task_struct *tsk)
1509{
1510 clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1511}
1512
1513static inline int signal_pending(struct task_struct *p)
1514{
1515 return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
1516}
1517
1518static inline int need_resched(void)
1519{
1520 return unlikely(test_thread_flag(TIF_NEED_RESCHED));
1521}
///////////////////////////////////////////////////////////////////////////////
/linux+v2.6.19/kernel/sched.c
991/*
992 * resched_task - mark a task 'to be rescheduled now'.
993 *
994 * On UP this means the setting of the need_resched flag, on SMP it
995 * might also involve a cross-CPU call to trigger the scheduler on
996 * the target CPU.
997 */
998#ifdef CONFIG_SMP
999
1000#ifndef tsk_is_polling
1001#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
1002#endif
1003
1004static void resched_task(struct task_struct *p)
1005{
1006 int cpu;
1007
1008 assert_spin_locked(&task_rq(p)->lock);
1009
1010 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
1011 return;
1012
1013 set_tsk_thread_flag(p, TIF_NEED_RESCHED);
1014
1015 cpu = task_cpu(p);
1016 if (cpu == smp_processor_id())
1017 return;
1018
1019 /* NEED_RESCHED must be visible before we test polling */
1020 smp_mb();
1021 if (!tsk_is_polling(p))
1022 smp_send_reschedule(cpu);
1023}
1024#else
1025static inline void resched_task(struct task_struct *p)
1026{
1027 assert_spin_locked(&task_rq(p)->lock);
1028 set_tsk_need_resched(p);
1029}
1030#endif
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
1366/***
1367 * try_to_wake_up - wake up a thread
1368 * @p: the to-be-woken-up thread
1369 * @state: the mask of task states that can be woken
1370 * @sync: do a synchronous wakeup?
1371 *
1372 * Put it on the run-queue if it's not already there. The "current"
1373 * thread is always on the run-queue (except when the actual
1374 * re-schedule is in progress), and as such you're allowed to do
1375 * the simpler "current->state = TASK_RUNNING" to mark yourself
1376 * runnable without the overhead of this.
1377 *
1378 * returns failure only if the task is already active.
1379 */
1380static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
1538int fastcall wake_up_process(struct task_struct *p)
1539{
1540 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
1541 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
1542}
1543EXPORT_SYMBOL(wake_up_process);
1545int fastcall wake_up_state(struct task_struct *p, unsigned int state)
1546{
1547 return try_to_wake_up(p, state, 0);
1548}
1616/*
1617 * wake_up_new_task - wake up a newly created task for the first time.
1618 *
1619 * This function will do some initial scheduler statistics housekeeping
1620 * that must be done for every newly created context, then puts the task
1621 * on the runqueue and wakes it.
1622 */
1623void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
3571/*
3572 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
3573 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
3574 * number) then we wake all the non-exclusive tasks and one exclusive task.
3575 *
3576 * There are circumstances in which we can try to wake a task which has already
3577 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
3578 * zero in this (rare) case, and we handle it by continuing to scan the queue.
3579 */
3580static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
3581 int nr_exclusive, int sync, void *key)
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
3595/**
3596 * __wake_up - wake up threads blocked on a waitqueue.
3597 * @q: the waitqueue
3598 * @mode: which threads
3599 * @nr_exclusive: how many wake-one or wake-many threads to wake up
3600 * @key: is directly passed to the wakeup function
3601 */
3602void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
3603 int nr_exclusive, void *key)
3604{
3605 unsigned long flags;
3606
3607 spin_lock_irqsave(&q->lock, flags);
3608 __wake_up_common(q, mode, nr_exclusive, 0, key);
3609 spin_unlock_irqrestore(&q->lock, flags);
3610}
3611EXPORT_SYMBOL(__wake_up);
3564int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
3565 void *key)
3566{
3567 return try_to_wake_up(curr->private, mode, sync);
3568}
3569EXPORT_SYMBOL(default_wake_function);
3652void fastcall complete(struct completion *x)
3653{
3654 unsigned long flags;
3655
3656 spin_lock_irqsave(&x->wait.lock, flags);
3657 x->done++;
3658 __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
3659 1, 0, NULL);
3660 spin_unlock_irqrestore(&x->wait.lock, flags);
3661}
3662EXPORT_SYMBOL(complete);
User Preemption
User preemption occurs when the kernel is about to return to user-space,
need_resched is set, and therefore, the scheduler is invoked.
If the kernel is returning to user-space, it knows it is in a safe quiescent state.
In other words, if it is safe to continue executing the current task,
it is also safe to pick a new task to execute. Consequently,
whenever the kernel is preparing to return to user-space either on return from
an interrupt or after a system call, the value of need_resched is checked.
If it is set, the scheduler is invoked to select a new (more fit) process to execute.
Both the return paths for return from interrupt and return from system call
are architecture dependent and typically implemented in assembly in entry.S
(which, aside from kernel entry code, also contains kernel exit code).
In short, user preemption can occur
When returning to user-space from a system call
When returning to user-space from an interrupt handler
Kernel Preemption
The Linux kernel, unlike most other Unix variants and many other operating systems,
is a fully preemptive kernel. In non-preemptive kernels, kernel code runs until completion.
That is, the scheduler is not capable of rescheduling a task while it is in the kernel.
kernel code is scheduled cooperatively, not preemptively.
Kernel code runs until it finishes (returns to user-space) or explicitly blocks.
In the 2.6 kernel, however, the Linux kernel became preemptive:
It is now possible to preempt a task at any point, so long as the kernel is in a state in which it is safe to reschedule.
So when is it safe to reschedule? The kernel is capable of preempting a task
running in the kernel so long as it does not hold a lock. That is, locks are used as
markers of regions of non-preemptibility. Because the kernel is SMP-safe,
if a lock is not held, the current code is reentrant and capable of being preempted.
The first change in supporting kernel preemption was the addition of a preemption counter,
preempt_count, to each process's thread_info. This counter begins at zero and increments
once for each lock that is acquired and decrements once for each lock that is released.
When the counter is zero, the kernel is preemptible. Upon return from interrupt,
if returning to kernel-space, the kernel checks the values of need_resched and preempt_count.
If need_resched is set and preempt_count is zero, then a more important task is runnable and
it is safe to preempt. Thus, the scheduler is invoked. If preempt_count is nonzero,
a lock is held and it is unsafe to reschedule. In that case, the interrupt returns
as usual to the currently executing task. When all the locks that the current task is holding are released,
preempt_count returns to zero. At that time, the unlock code checks whether need_resched is set.
If so, the scheduler is invoked. Enabling and disabling kernel preemption is sometimes
required in kernel code and is discussed in Chapter 9
.
Kernel preemption can also occur explicitly, when a task in the kernel blocks or explicitly calls schedule().
This form of kernel preemption has always been supported because no additional logic is
required to ensure that the kernel is in a state that is safe to preempt.
It is assumed that the code that explicitly calls schedule() knows it is safe to reschedule.
Kernel preemption can occur
When an interrupt handler exits, before returning to kernel-space
When kernel code becomes preemptible again
If a task in the kernel explicitly calls schedule()
If a task in the kernel blocks (which results in a call to schedule())
内核从2.6开始就支持内核抢占,对于非内核抢占系统,内核代码可以一直执行,直到完成,也就是说当进程处于内核态时,是不能被抢占的(当然,运行于内核态的进程可以主动放弃CPU,比如,在系统调用服务例程中,由于内核代码由于等待资源而放弃CPU,这种情况叫做计划性进程切换(planned process switch))。但是,对于由异步事件(比如中断)引起的进程切换,抢占式内核与非抢占式是有区别的,对于前者叫做强制性进程切换(forced process switch)。
//kernel/softirq.c
void local_bh_enable(void)
{
WARN_ON(irqs_disabled());
/*
* Keep preemption disabled until we are done with
* softirq processing:
*/
//软中断计数器值减1
preempt_count() -= SOFTIRQ_OFFSET - 1;
if (unlikely(!in_interrupt() && local_softirq_pending()))
do_softirq(); //软中断处理
//抢占计数据器值减1
dec_preempt_count();
//检查是否需要进行内核抢占调度
preempt_check_resched();
}
//include/linux/preempt.h
#define preempt_check_resched() \
do { \
//检查need_resched
if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
//抢占调度
preempt_schedule(); \
} while (0)
//kernel/sched.c
asmlinkage void __sched preempt_schedule(void)
{
struct thread_info *ti = current_thread_info();
/*
* If there is a non-zero preempt_count or interrupts are disabled,
* we do not want to preempt the current task. Just return..
*/
//检查是否允许抢占,本地中断关闭,或者抢占计数器值不为0时不允许抢占
if (unlikely(ti->preempt_count || irqs_disabled()))
return;
need_resched:
ti->preempt_count = PREEMPT_ACTIVE;
//发生调度
schedule();
ti->preempt_count = 0;
/* we could miss a preemption opportunity between schedule and now */
barrier();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
}
/*返回用户空间,只需要检查need_resched*/
ENTRY(resume_userspace) #返回用户空间,中断或异常发生时,任务处于用户空间
cli # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
movl TI_flags(%ebp), %ecx
andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
# int/exception return?
jne work_pending #还有其它工作要做
jmp restore_all #所有工作都做完,则恢复处理器状态
#恢复处理器状态
restore_all:
RESTORE_ALL
# perform work that needs to be done immediately before resumption
ALIGN
#完成其它工作
work_pending:
testb $_TIF_NEED_RESCHED, %cl #检查是否需要重新调度
jz work_notifysig #不需要重新调度
#需要重新调度
work_resched:
call schedule #调度进程
cli # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
movl TI_flags(%ebp), %ecx
/*检查是否还有其它的事要做*/
andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
# than syscall tracing?
jz restore_all #没有其它的事,则恢复处理器状态
testb $_TIF_NEED_RESCHED, %cl
jnz work_resched #如果need_resched再次置位,则继续调度
#VM和信号检测
work_notifysig: # deal with pending signals and
# notify-resume requests
testl $VM_MASK, EFLAGS(%esp) #检查是否是VM模式
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
# vm86-space
xorl %edx, %edx
#进行信号处理
call do_notify_resume
jmp restore_all
ALIGN
work_notifysig_v86:
pushl %ecx # save ti_flags for do_notify_resume
call save_v86_state # %eax contains pt_regs pointer
popl %ecx
movl %eax, %esp
xorl %edx, %edx
call do_notify_resume #信号处理
jmp restore_all