内核从2.6开始就支持内核抢占,对于非内核抢占系统,内核代码可以一直执行,直到完成,也就是说当进程处于内核态时,是不能被抢占的(当然,运行于内核态的进程可以主动放弃CPU,比如,在系统调用服务例程中,由于内核代码由于等待资源而放弃CPU,这种情况叫做计划性进程切换(planned process switch))。但是,对于由异步事件(比如中断)引起的进程切换,抢占式内核与非抢占式是有区别的,对于前者叫做强制性进程切换(forced process switch)。
//kernel/softirq.c
void local_bh_enable(void)
{
WARN_ON(irqs_disabled());
/*
* Keep preemption disabled until we are done with
* softirq processing:
*/
//软中断计数器值减1
preempt_count() -= SOFTIRQ_OFFSET - 1;
if (unlikely(!in_interrupt() && local_softirq_pending()))
do_softirq(); //软中断处理
//抢占计数据器值减1
dec_preempt_count();
//检查是否需要进行内核抢占调度
preempt_check_resched();
}
//include/linux/preempt.h
#define preempt_check_resched() \
do { \
//检查need_resched
if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
//抢占调度
preempt_schedule(); \
} while (0)
//kernel/sched.c
asmlinkage void __sched preempt_schedule(void)
{
struct thread_info *ti = current_thread_info();
/*
* If there is a non-zero preempt_count or interrupts are disabled,
* we do not want to preempt the current task. Just return..
*/
//检查是否允许抢占,本地中断关闭,或者抢占计数器值不为0时不允许抢占
if (unlikely(ti->preempt_count || irqs_disabled()))
return;
need_resched:
ti->preempt_count = PREEMPT_ACTIVE;
//发生调度
schedule();
ti->preempt_count = 0;
/* we could miss a preemption opportunity between schedule and now */
barrier();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
}
/*返回用户空间,只需要检查need_resched*/
ENTRY(resume_userspace) #返回用户空间,中断或异常发生时,任务处于用户空间
cli # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
movl TI_flags(%ebp), %ecx
andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
# int/exception return?
jne work_pending #还有其它工作要做
jmp restore_all #所有工作都做完,则恢复处理器状态
#恢复处理器状态
restore_all:
RESTORE_ALL
# perform work that needs to be done immediately before resumption
ALIGN
#完成其它工作
work_pending:
testb $_TIF_NEED_RESCHED, %cl #检查是否需要重新调度
jz work_notifysig #不需要重新调度
#需要重新调度
work_resched:
call schedule #调度进程
cli # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
movl TI_flags(%ebp), %ecx
/*检查是否还有其它的事要做*/
andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
# than syscall tracing?
jz restore_all #没有其它的事,则恢复处理器状态
testb $_TIF_NEED_RESCHED, %cl
jnz work_resched #如果need_resched再次置位,则继续调度
#VM和信号检测
work_notifysig: # deal with pending signals and
# notify-resume requests
testl $VM_MASK, EFLAGS(%esp) #检查是否是VM模式
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
# vm86-space
xorl %edx, %edx
#进行信号处理
call do_notify_resume
jmp restore_all
ALIGN
work_notifysig_v86:
pushl %ecx # save ti_flags for do_notify_resume
call save_v86_state # %eax contains pt_regs pointer
popl %ecx
movl %eax, %esp
xorl %edx, %edx
call do_notify_resume #信号处理
jmp restore_all
1、指令之间没有数据依赖
如:a = b * 3; c = d + 5;, 就没有依赖;
如:a = b * 3; c = a + 5; ,“写后读”依赖。第二条指令需要a的值作为输入,而a的值依赖于第一条指令的计算结果;
如:a = b * 3; a = d + 5; ,“写后写”依赖。不过虽然第二条指令一定要在第一条指令修改a的值之后才能修改a的值(确保最终a的值是d + 5的结果),但是其实两条指令是可以并行执行的,最后将结果commit到a的时候再串行就OK了;
如:a = b * 3; b = d + 5;, “读后写”依赖。同样,虽然第二条指令一定不能在第一条指令读取b的值之前就将b的值修改(确保第一条指令读到的是旧值),但是只要确保第一条指令先拿到 b的旧值、或者直接跟生成b的旧值的那条指令关联上,之后两条指令还是可以并行执行的;
2、CPU功能部件充足
CPU中用来执行具体操作的功能部件是有限的,假设CPU只有一个乘法器。
如:a = b * 3; c = d + 5; ,一个使用乘法器、另一个使用加法器,互不影响就可以并行;
如:a = b * 3; c = d * 5; ,两条指令都需要使用这个仅有的乘法器,就只能串行了(当然也未必是第一条指令先占用乘法器,因为可能它所依赖的b的值尚未ready、而第二条指令所需要的d已经OK);
int run(const int *array, int size, int step)
{
int result = 0;
printf("run...\n");
for (int i = 0; i < step; i++) {
for (int j = i; j < size; j += step) {
asm volatile("lfence");
result += calcu(array[j]);
}
}
return result;
}
$ for x in 1 2 4 8 16 32; do ./prefetch.normal test.tar.gz 1024 $x; done
array size: 468787200, step: 1024. run with prefetch(1)...
time cost: 36.262511, result: 1309150882
array size: 468787200, step: 1024. run with prefetch(2)...
time cost: 29.902517, result: 1309150882
array size: 468787200, step: 1024. run with prefetch(4)...
time cost: 28.052798, result: 1309150882
array size: 468787200, step: 1024. run with prefetch(8)...
time cost: 26.040215, result: 1309150882
array size: 468787200, step: 1024. run with prefetch(16)...
time cost: 26.198825, result: 1309150882
array size: 468787200, step: 1024. run with prefetch(32)...
time cost: 25.910506, result: 1309150882
int run_withprefetch(const int *array, int size, int step, int prefetch)
{
int result = 0;
printf("run with prefetch(%d)...\n", prefetch);
for (int i = 0; i < step; i++) {
for (int j = i; j < size; j += step) {
int k = j + step * prefetch;
if (k < size) {
const int *addr = &array[k];
asm volatile("mov (%0), %%eax"::"r"(addr):"eax");
}
result += calcu(array[j]);
}
}
return result;
}
重跑case-4:
1234
[case-4.1]$ g++ -O2 prefetch.cpp
[case-4.1]$ ./a.out test.tar.gz 1024 4
array size: 468787200, step: 1024. run with prefetch(4)...
time cost: 37.312423, result: 1309150882
一般情况下,函数的返回值为某个对象时,如果将其声明为const时,多用于操作符的重载。
通常,不建议用const修饰函数的返回值类型为某个对象或对某个对象引用的情况。
原因如下:
如果返回值为某个对象为const(const A test = A 实例)或某个对象的引用为const(const A& test = A实例) ,
则返回值具有const属性,则返回实例只能访问类A中的公有(保护)数据成员和const成员函数,
并且不允许对其进行赋值操作,这在一般情况下很少用到。