KyrieGuo
2022/05/31阅读:24主题:橙心
调度器源码分析
1、调度器及其功能
-
内核中用来安排进程执行的模块称为调度器(scheduler),它可以切换进程状态(process state)。例如执行、可中断睡眠、不可中断睡眠、退出、暂停等。
-
调度器是cpu中央处理器的管理员,主要负责完成两件事:一、选择某些就绪进程来执行,二是打断某些执行的进程让它们变为就绪态。调度器分配cpu时间的基本依据就是进程的优先级。上下文切换(contex switch):将进程在cpu中切换执行的过程,内核承担此任务,负责重建和存储被切换掉之前的cpu状态。
2、调度类sched_class结构体与调度类
sched_class结构体表示调度类,定义在kernel/sched/sched.h
struct sched_class {
/* 系统当中有多个调度类,按照调度优先级排成一个链表,下一个优先级的高类 */
const struct sched_class *next;
/* 将进程加入到执行队列当中,即将调度实体(进程)存放到红黑树中,并对nr_running变量自动会加1 */
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
/* 从执行队列当中删除进程,并对nr_running变量自动减1 */
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
/* 放弃cpu的执行权,实际上该函数执行先出队后入队,在这种情况下,它直接将调度实体放在红黑树的最右端 */
void (*yield_task) (struct rq *rq);
bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
/* 用于检查进程是否可被新进程抢占 */
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
/*
* It is the responsibility of the pick_next_task() method that will
* return the next task to call put_prev_task() on the @prev task or
* something equivalent.
*
* May return RETRY_TASK when it finds a higher prio class has runnable
* tasks.
*/
/* 选择下一个应用要运行的进程 */
struct task_struct * (*pick_next_task) (struct rq *rq,
struct task_struct *prev);
/* 将进程放回到运行队列当中 */
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
/* 为进程选择一个合适的cpu */
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
/* 迁移任务到另一个cpu */
void (*migrate_task_rq)(struct task_struct *p);
/* 专门用于唤醒进程 */
void (*task_waking) (struct task_struct *task);
void (*task_woken) (struct rq *this_rq, struct task_struct *task);
/* 修改进程在cpu的亲和力 */
void (*set_cpus_allowed)(struct task_struct *p,
const struct cpumask *newmask);
/* 启动运行队列 */
void (*rq_online)(struct rq *rq);
/* 关闭运行队列 */
void (*rq_offline)(struct rq *rq);
#endif
/* 当进程改变它的调度类或进程组时被调用 */
void (*set_curr_task) (struct rq *rq);
/* 调用自己time_tick函数,它可能引起进程切换,将驱动运行时抢占 */
void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
/* 当进程创建的时候调用,不同调度策略的进程初始化也是不一样的 */
void (*task_fork) (struct task_struct *p);
/* 进程退出时会使用 */
void (*task_dead) (struct task_struct *p);
/*
* The switched_from() call is allowed to drop rq->lock, therefore we
* cannot assume the switched_from/switched_to pair is serliazed by
* rq->lock. They are however serialized by p->pi_lock.
*/
/* 专门用于进程切换操作 */
void (*switched_from) (struct rq *this_rq, struct task_struct *task);
void (*switched_to) (struct rq *this_rq, struct task_struct *task);
/* 更改进程优先级 */
void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
int oldprio);
unsigned int (*get_rr_interval) (struct rq *rq,
struct task_struct *task);
void (*update_curr) (struct rq *rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_move_group) (struct task_struct *p);
#endif
};
成员
-
enqueue_task:向就绪队列添加一个进程,某个任务进入可运行状态时,该函数将会调用。
-
dequeue_task:将一个进程从就绪队列中进行删除,当某个任务退出可运行状态时调用该函数,它将从红黑树中去掉对应调度实体。
-
yield_task:在进程想要资源放弃对处理器的控制权时,可使用在sched_yield系统调用,会调用内核API去处理操作。
-
check_preempt_curr:检查当前运行的任务是否被抢占。
-
pick_next_task:选择下来要运行的最合适的实体。
-
put_prev_task:用于另一个进程代替当前运行的进程。
-
set_curr_task:当任务修改它的调用类或修改它的任务组时,将调用这个函数
-
task_tick:在每次激活周期调度器时,由周期性调度器调用
Linux调度类
调度类:dl_sched_class、rt_sched_class、fair_sched_class及idle_sched_class等。每个进程都有对应一种调度策略,每一种调度策略又对应一种调度类(每一个调度类对应多种调度策略)
extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
extern const struct sched_class rt_sched_class;
extern const struct sched_class fair_sched_class;
extern const struct sched_class idle_sched_class;
-
rt_sched_class类 实时调度器(调度策略:SCHED_FIFO、SCHED_RR) -
fair_sched_class类 完全公平调度器(调度策略:SCHED_NORMAL、SCHED_BATCH)
SCHED_FIFO调度策略的实时进程永远比SCHED_NORMAL调度策略的普通进程优先运行。代码可以到有一个函数pick_next_task函数即可。
-
调度类的优先级顺序:stop_sched_class > dl_sched_class > rt_sched_class > fair_sched_class > idle_sched_class
//优先级最高的线程,会中断所有其他线程,并且不会被其他任务打断
const struct sched_class stop_sched_class = {
.next = &dl_sched_class,
.enqueue_task = enqueue_task_stop,
.dequeue_task = dequeue_task_stop,
.yield_task = yield_task_stop,
.check_preempt_curr = check_preempt_curr_stop,
.pick_next_task = pick_next_task_stop,
.put_prev_task = put_prev_task_stop,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_stop,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
.set_curr_task = set_curr_task_stop,
.task_tick = task_tick_stop,
.get_rr_interval = get_rr_interval_stop,
.prio_changed = prio_changed_stop,
.switched_to = switched_to_stop,
.update_curr = update_curr_stop,
};
const struct sched_class dl_sched_class = {
.next = &rt_sched_class,
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
.check_preempt_curr = check_preempt_curr_dl,
.pick_next_task = pick_next_task_dl,
.put_prev_task = put_prev_task_dl,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_dl,
.set_cpus_allowed = set_cpus_allowed_dl,
.rq_online = rq_online_dl,
.rq_offline = rq_offline_dl,
.task_woken = task_woken_dl,
#endif
.set_curr_task = set_curr_task_dl,
.task_tick = task_tick_dl,
.task_fork = task_fork_dl,
.task_dead = task_dead_dl,
.prio_changed = prio_changed_dl,
.switched_from = switched_from_dl,
.switched_to = switched_to_dl,
.update_curr = update_curr_dl,
};
//作用于实时线程
const struct sched_class rt_sched_class = {
.next = &fair_sched_class,
.enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt,
.yield_task = yield_task_rt,
.check_preempt_curr = check_preempt_curr_rt,
.pick_next_task = pick_next_task_rt,
.put_prev_task = put_prev_task_rt,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_rt,
.set_cpus_allowed = set_cpus_allowed_common,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
.task_woken = task_woken_rt,
.switched_from = switched_from_rt,
#endif
.set_curr_task = set_curr_task_rt,
.task_tick = task_tick_rt,
.get_rr_interval = get_rr_interval_rt,
.prio_changed = prio_changed_rt,
.switched_to = switched_to_rt,
.update_curr = update_curr_rt,
};
//每个cpu的第一个PID=0的线程,swapper,是一个静态线程。调度类属于idle_sched_class。一般运行在开机过程和cpu异常的时候会做dump
const struct sched_class idle_sched_class = {
/* .next is NULL */
/* no enqueue/yield_task for idle tasks */
/* dequeue is not valid, we print a debug message there: */
.dequeue_task = dequeue_task_idle,
.check_preempt_curr = check_preempt_curr_idle,
.pick_next_task = pick_next_task_idle,
.put_prev_task = put_prev_task_idle,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_idle,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
.set_curr_task = set_curr_task_idle,
.task_tick = task_tick_idle,
.get_rr_interval = get_rr_interval_idle,
.prio_changed = prio_changed_idle,
.switched_to = switched_to_idle,
.update_curr = update_curr_idle,
};
//公平调度器CFS,一般常用线程
const struct sched_class fair_sched_class = {
.next = &idle_sched_class,
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
.yield_to_task = yield_to_task_fair,
.check_preempt_curr = check_preempt_wakeup,
.pick_next_task = pick_next_task_fair,
.put_prev_task = put_prev_task_fair,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_fair,
.migrate_task_rq = migrate_task_rq_fair,
.rq_online = rq_online_fair,
.rq_offline = rq_offline_fair,
.task_waking = task_waking_fair,
.task_dead = task_dead_fair,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
.set_curr_task = set_curr_task_fair,
.task_tick = task_tick_fair,
.task_fork = task_fork_fair,
.prio_changed = prio_changed_fair,
.switched_from = switched_from_fair,
.switched_to = switched_to_fair,
.get_rr_interval = get_rr_interval_fair,
.update_curr = update_curr_fair,
#ifdef CONFIG_FAIR_GROUP_SCHED
.task_move_group = task_move_group_fair,
#endif
};
-
SCHED_NORMAL,SCHED_BATCH,SCHED_IDLE直接被映射到fair_sched_class; -
SCHED_RR,SCHED_FIFO与rt_schedule_class进行相关联;
Linux调度的核心选择下一个合适的task运行时,会按照优先级顺序调度类的pick_next_task函数
3、优先级
-
task_struct结构体中采用三个成员表示进程的优先级:prio和normal_prio表示动态优先级,static_prio表示进程的静态优先级 -
内核将任务优先级划分,实时优先级范围是0到MAX_RT_PRIO-1(即99),而普通进程的静态优先级范围是从MAX_RT_PRIO到MAX_PRIO-1(即100-139)
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH)
#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2)
进程分类:
实时进程(Real-Time Process):优先级高、需要立即被执行的进程
普通进程(Normal Process):优先级低,更长执行时间的进程
进程的优先级是一个0-139的整数来表示的。数字越小,优先级越高。其中优先级0-99给实时进程,100-139留给普通进程。
作者介绍