/*
* We restart softirq processing MAX_SOFTIRQ_RESTART times,
* and we fall back to softirqd after that.
*
* This number has been established via experimentation.
* The two things to balance is latency against fairness -
* we want to handle softirqs as soon as possible, but they
* should not be able to lock up the box.
*/
#define MAX_SOFTIRQ_RESTART 10
asmlinkage void __do_softirq(void)
{
struct softirq_action *h;
__u32 pending;
int max_restart = MAX_SOFTIRQ_RESTART;
int cpu;
pending = local_softirq_pending();
account_system_vtime(current);
__local_bh_disable((unsigned long)__builtin_return_address(0));
trace_softirq_enter();
cpu = smp_processor_id();
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);
local_irq_enable();
h = softirq_vec;
do
{
if (pending & 1)
{
int prev_count = preempt_count();
h->action(h);
if (unlikely(prev_count != preempt_count()))
{
printk(KERN_ERR "huh, entered softirq %td %p"
"with preempt_count %08x,"
" exited with %08x?\n", h - softirq_vec,
h->action, prev_count, preempt_count());
preempt_count() = prev_count;
}
rcu_bh_qsctr_inc(cpu);
}
h++;
pending >>= 1;
}
while (pending);
local_irq_disable();
pending = local_softirq_pending();
if (pending && --max_restart)
goto restart;
if (pending)
wakeup_softirqd();
trace_softirq_exit();
account_system_vtime(current);
_local_bh_enable();
}
Linux 的调度算法是相对独立的一个模块,而且较容易理解。因此很多系统高手都爱对调度算法做改进。但是可以说调度器是一个非常神秘,难以捉摸的精灵。可能通过改变一个关键参数你就可以大大提高系统的效率。
对于一般进程, CPU 的使用时间都是系统平均分配给每一个进程的,因此这种公平分享都是从 进程的角度 出发的。 Bach 在 1986 年提出了公平分享调度策略( Fair_Share scheduling )来解决这个问题。和 Linux 三种内建策略比,公平分享调度策略是一种更抽象的调度策略。它认为 CPU 应该根据拥有进程的组(对 Linux 来说是用户)来分配时间,它实现了从 用户角度 考虑的公平原则。
由内核的结构来看,实现这个算法有很多种方式。我们可以在与调度相关的程序里做小小的改动来实现,如改动一些数据结构并改写 schedule() 函数。当然也可以做得很复杂,比如重写 schedule() 来实现所需要的结果。但是有一点我们是要必须牢记的,那就是大部分的 Linux 核心都是以短小高效为最高目标。所以,改进的算法必须尽量向这个目标靠拢。
==================== kernel/sched.c 528 541 ====================
[schedule()]
528 handle_softirq_back:
529
530 /*
531 * 'sched_data' is protected by the fact that we can run
532 * only one process per CPU.
533 */
534 sched_data = & aligned_data[this_cpu].schedule_data;
535
536 spin_lock_irq(&runqueue_lock);
537
538 /* move an exhausted RR process to be last.. */
539 if (prev>policy == SCHED_RR)
540 goto move_rr_last;
541 move_rr_back:
==================== kernel/sched.c 123 187 ====================
[schedule()> goodness() ]
123 /*
124 * This is the function that decides how desirable a process is..
125 * You can weigh different processes against each other depending
126 * on what CPU they've run on lately etc to try to handle cache
127 * and TLB miss penalties.
128 *
129 * Return values:
130 * 1000:never select this
131 * 0: out of time, recalculate counters (but it might still be
132 * selected)
133 * +ve: "goodness" value (the larger, the better)
134 * +1000: realtime process, select this.
135 */
136
137 static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
138 {
139 int weight;
140
141 /*
142 * select the current process after every other
143 * runnable process, but before the idle thread.
144 * Also, dont trigger a counter recalculation.
145 */
146 weight = -1 ;
147 if (p>policy & SCHED_YIELD )
148 goto out;
149
150 /*
151 * Non RT process normal case first.
152 */
153 if ( p>policy == SCHED_OTHER ) {
154 /*
155 * Give the process a firstapproximation goodness value
156 * according to the number of clockticks it has left.
157 *
158 * Don't do any other calculations if the time slice is
159 * over..
160 */
161 weight = p->counter;
162 if (!weight)
163 goto out;
164
165 #ifdef CONFIG_SMP
166 /* Give a largish advantage to the same processor... */
167 /* (this is equivalent to penalizing other processors) */
168 if (p->processor == this_cpu)
169 weight += PROC_CHANGE_PENALTY;
170 #endif
171
172 /* .. and a slight advantage to the current MM */
173 if (p->mm == this_mm || !p->mm)
174 weight += 1;
175 weight += 20- p>nice;
176 goto out;
177 }
178
179 /*
180 * Realtime process, select the first one on the
181 * runqueue (taking priorities within processes
182 * into account).
183 */
184 weight = 1000 + p->rt_priority;
185 out:
186 return weight;
187 }
==================== kernel/sched.c 578 580 ====================
[schedule()]
578 /* Do we need to recalculate
counters? */
579 if (!c)
580 goto recalculate;
[schedule()]
581 /*
582 * from this point on nothing can prevent us from
583 * switching to the next task, save this fact in
584 * sched_data.
585 */
586 sched_data>curr = next;
587 #ifdef CONFIG_SMP
.....
590 #endif
591 spin_unlock_irq(&runqueue_lock);
592
593 if ( prev == next )
594 goto same_process;
595
596 #ifdef CONFIG_SMP
==================== kernel/sched.c 612 657 ====================
612 #endif /* CONFIG_SMP */
613
614 kstat.context_swtch++;
615 /*
616 * there are 3 processes which are affected by a context switch:
617 *
618 * prev == .... ==> (last => next)
620 * It's the 'much more previous' 'prev' that is on next's stack,
621 * but prev is set to (the just run) 'last' process by switch_to().
622 * This might sound slightly confusing but makes tons of sense.
623 */
624 prepare_to_switch ();
625 {
626 struct mm_struct *mm = next->mm;
627 struct mm_struct *oldmm = prev->active_mm;
628 if (!mm) {
629 if (next>active_mm) BUG();
630 next>active_mm = oldmm;
631 atomic_inc(&oldmm>mm_count);
632 enter_lazy_tlb(oldmm, next, this_cpu);
633 } else {
634 if (next>active_mm != mm) BUG();
635 switch_mm(oldmm, mm, next, this_cpu);
636 }
637
638 if (!prev>mm) {
639 prev>active_mm = NULL;
640 mmdrop(oldmm);
641 }
642 }
643
644 /*
645 * This just switches the register state and the
646 * stack.
647 */
648 switch_to(prev, next, prev);
649 __schedule_tail(prev);
650
651 same_process:
652 reacquire_kernel_lock(current);
653 if (current>need_resched)
654 goto need_resched_back;
655
656 return;
While trying to retrieve the URL: http://192.168.34.80/
The following error was encountered:
Unable to forward this request at this time.
This request could not be forwarded to the origin server or to any parent caches. The most likely cause for this error is that:
The cache administrator does not allow this cache to make direct connections to origin servers, and
All configured parent caches are currently unreachable.
Your cache administrator is root.