2023-12-20

Computer System II Lab 5 ：RV64 内核线程调度

引言：

线程是操作系统调度的基本单位，当一个线程的时间片结束之后，操作系统需要选择一个新的线程来占有CPU。本实验需要完成RV64架构下的线程调度机制，探究与线程有关的一些问题。 ### 1 实验目的

2 实验步骤

2.1 实验准备

按照实验指导在仓库下添加修改一些文件，包括defs.h,proc.h等

并在proc.c中补全dummy函数和一些函数的定义。

2.2 task_init 线程初始化

按照实验指导并结合已给出的代码逻辑补全

void task_init() {
    idle = (struct task_struct*) kalloc();
    // 1. 调用 kalloc() 为 idle 分配一个物理页
    idle->state = TASK_RUNNING;
    // 2. 设置 state 为 TASK_RUNNING;
    idle->counter = 0;
    idle->priority = 0;
    // 3. 由于 idle 不参与调度 可以将其 counter / priority 设置为 0
    idle->pid = 0;
	 // 4. 设置 idle 的 pid 为 0
    current = idle;
    task[0] = idle;
	// 5. 将 current 和 task[0] 指向 idle

    
    int i = 0;
    for(i = 1;i < NR_TASKS; i++){
        struct task_struct * temp = (struct task_struct *)kalloc();
        temp->state = TASK_RUNNING;
        temp->counter = 0;
        temp->priority = (uint64)rand()%(PRIORITY_MAX-PRIORITY_MIN+1)+PRIORITY_MIN;
        temp->pid = i;
        (temp->thread).ra = (uint64)__dummy;
        (temp->thread).sp = (uint64)temp+PGSIZE;
        task[i] = temp;
    }
    // 1. 参考 idle 的设置, 为 task[1] ~ task[NR_TASKS - 1] 进行初始化
    // 2. 其中每个线程的 state 为 TASK_RUNNING, counter 为 0, priority 使用 rand() 来设置, pid 为该线程在线程数组中的下标。
    // 3. 为 task[1] ~ task[NR_TASKS - 1] 设置 `thread_struct` 中的 `ra` 和 `sp`, 
    // 4. 其中 `ra` 设置为 __dummy （见 4.3.2）的地址， `sp` 设置为 该线程申请的物理页的高地址
    


    printk("...proc_init done!\n");
}

在设置每个线程的priority时，需要注意数值的范围。因取余后数值是从0开始的，所以我们要加上Priority_MIN使得最后得到的数值是符合要求的。

在C语言中，得到范围在（a，b）中的随机数：
1
result = rand() % (b - a + 1) + a;

2.3 switch_to 线程切换

当线程切换时需要保存该线程的上下文到该线程的运行栈上。现在我们需要计算一下具体保存在栈的什么位置。

__swtich_to接受两个参数，一个是当前线程的地址a0，一个是下一个现成的地址a1。结构体的定义如下。

struct thread_struct {
    uint64 ra;
    uint64 sp;
    uint64 s[12];
};

/* 线程数据结构 */
struct task_struct {
    struct thread_info* thread_info;
    uint64 state;    // 线程状态
    uint64 counter;  // 运行剩余时间 
    uint64 priority; // 运行优先级
    uint64 pid;      // 线程id

    struct thread_struct thread;
};

其中，thread_info是8bytes的地址值，剩下四个都是8bytes的unsigned int型常量。

所以ra相对于地址值的偏移量为5 * 8 = 40

即我们需要把当前线程的寄存器值保存在a0 + 40的位置，并且从下一个线程的40 + a1处开始恢复寄存器。

.globl __switch_to
__switch_to:
    sd ra, 40(a0)
    sd sp, 48(a0)
    sd s0, 56(a0)
    sd s1, 64(a0)
    sd s2, 72(a0)
    sd s3, 80(a0)
    sd s4, 88(a0)
    sd s5, 96(a0)
    sd s6, 104(a0)
    sd s7, 112(a0)
    sd s8, 120(a0)
    sd s9, 128(a0)
    sd s10, 136(a0)
    sd s11, 144(a0)

    ld ra, 40(a1)
    ld sp, 48(a1)
    ld s0, 56(a1)
    ld s1, 64(a1)
    ld s2, 72(a1)
    ld s3, 80(a1)
    ld s4, 88(a1)
    ld s5, 96(a1)
    ld s6, 104(a1)
    ld s7, 112(a1)
    ld s8, 120(a1)
    ld s9, 128(a1)
    ld s10, 136(a1)
    ld s11, 144(a1)
    ret

在切换时，需要对两个线程进行pid值的比较，如果两个线程pid值相等，则不进行切换。

extern void __switch_to(struct task_struct* prev, struct task_struct* next);

void switch_to(struct task_struct* next) {
    if (next->pid != current->pid) {
        printk("\nswitch to [PID = %d PRIORITY = %d COUNTER = %d]\n", next->pid, next->priority, next->counter);
        struct task_struct* prev = current;
        current = next; // 切换
        __switch_to(prev, next);
    }
}

2.4 do_timer 调度入口

这里需要判断当前线程是否已经运行完毕，是则使用schedule函数进行调度，如果不是继续运行当前线程并执行counter--，直到运行结束。

void do_timer(){
    if(current == idle || current -> counter == 0) schedule();
    else{
        current->counter--;
        if(current->counter == 0) schedule();
    }
}

2.5 schedule 线程调度

在这里我们考虑这几种情况：

当前线程运行完毕，需要切换到下一个进行运行
所有线程均运行完毕，需重新设置
在切换线程时保证先运行counter少的。

根据实验指导中提供的linux执行该算法的源码，编写schedule函数

void schedule(){
    int i , next, c;
    struct task_struct ** p;
    while(1){
        c = -1;
        next = 0;
        i = NR_TASKS;
        p = &task[NR_TASKS];
        while(--i){
            if(!*--p) continue;
            if((*p)->state == TASK_RUNNING && (signed)(*p)->counter > c){
                c = (*p)->counter;
                next = i;
            }
        }
        if(c) break;
        for (int j = 1; j < NR_TASKS; ++j) {
            task[j]->counter = task[j]->priority;
            if (j == 1) printk("\n");
            printk("SET [PID = %d PRIORITY = %d COUNTER = %d]\n", task[j]->pid, task[j]->priority, task[j]->counter);
        }
        
    }
    switch_to(task[next]);
}

需要注意的是，由于我们的counter设置的是unsigned int，如果根据linux的源码执行的话，0 > -1是错误的，因为这时候-1也会被当成是unsigned类型变量。所以我们需要在比较之前加上一个signed指定有符号比较。

linux源码：

while (1) {
		c = -1;
		next = 0;
		i = NR_TASKS;
		p = &task[NR_TASKS];
		while (--i) {
			if (!*--p)
				continue;
			if ((*p)->state == TASK_RUNNING && (*p)->counter > c)
				c = (*p)->counter, next = i;
		}
		if (c) break;
		for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
			if (*p)
				(*p)->counter = ((*p)->counter >> 1) +
						(*p)->priority;
	}
	switch_to(next);

2.6 _start段更改

在最后的跳转之前分别调用mm_init和task_init

如果不对内存初始化，在task_init的时候就会在kalloc这里死掉

.........
call mm_init
call task_init
j start_kernel

3 实验结果

make run运行结果如图：

4 思考题

4.1 在 RV6 4 中一共用 3 2 个通用寄存器，为什么 `context_switch` 中只保存了 14 个？

在swtich_to中调用__swtich_to时，（由于swtich函数是用C语言写的）会自动把一些寄存保存在栈上，不需要我们手动保存。

所以__switch_to只需要把C语言中没用保存的部分，保存在栈上即可。

4.2 当线程第一次调用时，其 `ra` 所代表的返回点是 `__dummy`。那么在之后的线程调用中 `context_switch` 中，`ra` 保存 / 恢复的函数返回点是什么呢？请同学用g db 尝试追踪一次完整的线程切换流程，并关注每一次 `ra` 的变换。

追踪完整的线程切换流程，我们现在switch_to处打断点

查看__swtich_to段的地址，并在上下两天sd ra，ld ra处打断点

分别查看ra的值

sd：

ld：

这是第一次，即对线程进行初始化。根据推测出可知前几次都是，所以我们直接c, 每次在这两个断点处查看ra的值

第二次

第三次

第四次

可以看到，从第四次开始，所有线程都已经有了保存的上下文，所以恢复的ra也就都是switch_to+136

引言：