当前位置：网站首页>setcontext getcontext makecontext swapcontext

setcontext getcontext makecontext swapcontext

2022-04-23 14:11:00 【Mrpre】

Linux上下文切换以及协程

上下文切换，听起来虚无缥缈，什么是上下文，切换又是指的是什么？其实上下文就可以理解为一个进程所运行的相关的寄存器值，即包括sp/bp/pc等值，换句话说，一个上下文，就是包括了能够恢复进程运行所需要的所有必要的东西。所谓的切换，那是多进程的操作系统必要的功能，一个CPU能够运行多个进程（看起来），那么必然要在多个进程之间不停的切换，A切换到B时，必然需要保存A相关信息，这样才能从B切换回来时接着运行A,且正确的运行A。

不过上面描述的都是OS的事情，一个进程或者一个线程在内核都是由struct task_struct描述的，OS的调度对象就是task_struct。而在用户态想实现类似的功能，那么必然需要对应的库函数。根据上面的描述，我们已经知道，要实现所谓的调度，那么必须保存相关寄存器信息。

先看例test1.c

#include <ucontext.h>
#include <stdio.h>

int done = 0;

int main()
{
	ucontext_t context;
	getcontext(&context);
	if (done)
	{
		printf("return from getcontext,exit\n");
		return 0;
	}

	done = 1;
	setcontext(&context);

	return 0;//never goto here!
}

程序先调用getcontext保存当前寄存器信息到了context中，然后执行setcontext，所谓的setcontext就是把context中的寄存器信息恢复到当前的寄存器信息，也就是说，强制把context的pc bp sp等值，赋值到了当前cpu的寄存中，显而易见的是，这又跳回去到了getcontext 处。

上面这个goto一样，那用goto就行了？那看下面这个例子（test2.c）：

#include <ucontext.h>
#include <stdio.h>

int done = 0;

int func1(ucontext_t *context)
{
	done = 1;
	setcontext(context);
}
int main()
{
	ucontext_t context;
	getcontext(&context);
	if (done)
	{
		printf("return from getcontext,exit\n");
		return 0;
	}
	func1(&context);
	return 0;
}

goto 一定做不了函数之间的跳转，只能做本地跳转。当然 getcontet 和 setcontext 肯定不仅仅只有这些功能，看下面这个例子。

//test3.c

#include <ucontext.h>
#include <stdio.h>
#include <malloc.h>
void func()
{
        printf("in func\n");
}

int main()
{
        ucontext_t context;
        getcontext(&context);

        //指定栈
        context.uc_stack.ss_sp = malloc(10000);
        context.uc_stack.ss_size = 10000;
        context.uc_link = NULL;
        makecontext(&context, func, 0);

        setcontext(&context);

        return 0;//never goto here!
}

先setcontext初始化context，然后makecontext，指定跳转的函数，然后再setcontext切换context到func函数。

除此之外，还为新的context指定了新的栈，为什么的？因为如果不指定栈，那么栈还是getcontext时获取的sp bp指针，sp bp描述的是main函数的栈大小，如果main函数栈大小是100字节，但是你所要执行的func的栈大小需要1000字节，显然不够用。

看到这，肯定好多人心里很是疑惑，如果执行完func，还会返回到main吗？答是不会。为什么不会？难道func不是man调用的吗？怎么会不返回到main呢？下面我们就讲讲这个问题，不过讲这个问题前，希望大家自行去了解一下x86、x64的调用规则。

我们知道一个main作为一个函数，肯定是被其他地方调用的。
我们写一个简单的main函数，然后断点打住，推栈，返现其调用者是 __libc_start_main：

Breakpoint 3, 0x00000000004004d8 in main () at main.c:7
7	}
(gdb) disassemble
Dump of assembler code for function main:
   0x00000000004004c4 <+0>:	push   %rbp
   0x00000000004004c5 <+1>:	mov    %rsp,%rbp
   0x00000000004004c8 <+4>:	mov    $0xa,%edi
   0x00000000004004cd <+9>:	callq  0x4003b8 <putchar@plt>
   0x00000000004004d2 <+14>:	mov    $0x0,%eax
   0x00000000004004d7 <+19>:	leaveq
=> 0x00000000004004d8 <+20>:	retq
End of assembler dump.
(gdb) i r
rax            0x0	0
rbx            0x0	0
rcx            0xffffffff	4294967295
rdx            0x7ffff77d1e10	140737345560080
rsi            0x7ffff7ff7000	140737354100736
rdi            0x0	0
rbp            0xcc	0xcc
rsp            0x7fffffffe528	0x7fffffffe528
r8             0xffffffff	4294967295
r9             0xa	10
r10            0xffffffff	4294967295
r11            0x246	582
r12            0x4003e0	4195296
r13            0x7fffffffe600	140737488348672
r14            0x0	0
r15            0x0	0
rip            0x4004d8	0x4004d8 <main+20>
eflags         0x246	[ PF ZF IF ]
cs             0x33	51
ss             0x2b	43
ds             0x0	0
es             0x0	0
fs             0x0	0
gs             0x0	0
(gdb) x/40xg 0x7fffffffe528
0x7fffffffe528:	0x00007ffff7460d5d	0x0000000000000000
0x7fffffffe538:	0x00007fffffffe608	0x0000000100000000
0x7fffffffe548:	0x00000000004004c4	0x0000000000000000
0x7fffffffe558:	0x6fcc4be0773d5f7c	0x00000000004003e0
0x7fffffffe568:	0x00007fffffffe600	0x0000000000000000
0x7fffffffe578:	0x0000000000000000	0x9033b41fbd5d5f7c
0x7fffffffe588:	0x9033a56c6d0d5f7c	0x00007fff00000000
0x7fffffffe598:	0x0000000000000000	0x0000000000000000
0x7fffffffe5a8:	0x00000000004004f0	0x00007fffffffe608
0x7fffffffe5b8:	0x0000000000000001	0x0000000000000000
0x7fffffffe5c8:	0x0000000000000000	0x00000000004003e0
0x7fffffffe5d8:	0x00007fffffffe600	0x0000000000000000
0x7fffffffe5e8:	0x0000000000400409	0x00007fffffffe5f8
0x7fffffffe5f8:	0x000000000000001c	0x0000000000000001
0x7fffffffe608:	0x00007fffffffe805	0x0000000000000000
0x7fffffffe618:	0x00007fffffffe822	0x00007fffffffe83d
0x7fffffffe628:	0x00007fffffffe84d	0x00007fffffffe861
0x7fffffffe638:	0x00007fffffffe872	0x00007fffffffee7f
0x7fffffffe648:	0x00007fffffffee95	0x00007fffffffeea6
0x7fffffffe658:	0x00007fffffffeebb	0x00007fffffffeec7
(gdb) info symbol 0x00007ffff7460d5d
__libc_start_main + 253 in section .text of /lib64/libc.so.6

main函数返回后，执行了0x00007ffff7460d5d，也直接调用了exit，结束进程。

0x00007ffff7460d5d：

   0x00007ffff7460d56 <+246>:	mov    (%rax),%rdx
   0x00007ffff7460d59 <+249>:	callq  *0x18(%rsp)
   0x00007ffff7460d5d <+253>:	mov    %eax,%edi
   0x00007ffff7460d5f <+255>:	callq  0x7ffff7477a40 <exit>

说了那么多，那么我们 test3.c 的func是谁调用的？如果简单的想
其调用栈肯定是
main->setcontext->func ，但是我上面说了，并不是这样的。

我们在func打断点看一下：

Breakpoint 1, func () at test3.c:6
6		printf("in func\n");
Missing separate debuginfos, use: debuginfo-install glibc-2.12-1.166.alios6.7.x86_64
(gdb) bt
#0  func () at test3.c:6
#1  0x00007ffff74858f0 in ?? () from /lib64/libc.so.6
#2  0x0000000000000000 in ?? ()
(gdb) disassemble
Dump of assembler code for function func:
   0x00000000004005f4 <+0>:	push   %rbp
   0x00000000004005f5 <+1>:	mov    %rsp,%rbp
=> 0x00000000004005f8 <+4>:	mov    $0x400778,%edi
   0x00000000004005fd <+9>:	callq  0x4004a8 <puts@plt>
   0x0000000000400602 <+14>:	leaveq
   0x0000000000400603 <+15>:	retq
End of assembler dump.
(gdb) i r
rax            0x0	0
rbx            0x603710	6305552
rcx            0x0	0
rdx            0x7fffffffe618	140737488348696
rsi            0x7fffffffe608	140737488348680
rdi            0x7fffffffe170	140737488347504
rbp            0x603700	0x603700
rsp            0x603700	0x603700
r8             0x7ffff77d1300	140737345557248
r9             0x7ffff7debac0	140737351957184
r10            0x8	8
r11            0x246	582
r12            0x400510	4195600
r13            0x7fffffffe600	140737488348672
r14            0x0	0
r15            0x0	0
rip            0x4005f8	0x4005f8 <func+4>
eflags         0x246	[ PF ZF IF ]
cs             0x33	51
ss             0x2b	43
ds             0x0	0
es             0x0	0
fs             0x0	0
gs             0x0	0
(gdb) x/40xg 0x603700
0x603700:	0x00007fffffffe520	0x00007ffff74858f0
0x603710:	0x0000000000000000	0x0000000000000000
0x603720:	0x0000000000000000	0x00000000000208e1
0x603730:	0x0000000000000000	0x0000000000000000
0x603740:	0x0000000000000000	0x0000000000000000
0x603750:	0x0000000000000000	0x0000000000000000
0x603760:	0x0000000000000000	0x0000000000000000
0x603770:	0x0000000000000000	0x0000000000000000
0x603780:	0x0000000000000000	0x0000000000000000
0x603790:	0x0000000000000000	0x0000000000000000
0x6037a0:	0x0000000000000000	0x0000000000000000
0x6037b0:	0x0000000000000000	0x0000000000000000
0x6037c0:	0x0000000000000000	0x0000000000000000
0x6037d0:	0x0000000000000000	0x0000000000000000
0x6037e0:	0x0000000000000000	0x0000000000000000
0x6037f0:	0x0000000000000000	0x0000000000000000
0x603800:	0x0000000000000000	0x0000000000000000
0x603810:	0x0000000000000000	0x0000000000000000
0x603820:	0x0000000000000000	0x0000000000000000
0x603830:	0x0000000000000000	0x0000000000000000
(gdb) info symbol 0x00007ffff74858f0
__start_context in section .text of /lib64/libc.so.6

func的栈地址是0x603700 他是我们的main函数malloc得到的地址+栈大小从而得到的栈顶（还有一定的偏移）。
现在问题来了，func是由__start_context调用的，而且从 x/40xg 0x603700 指令可以看出，除了__start_context，栈其余全是0，也就意味着没有人调用__start_context。也意味着没有从main->setcontext … 一路调用到func。很奇怪把，但是仔细想想，所谓栈，也就是内存，内存里是什么东西，肯定有人放的。

其实能够猜到__start_context是被刻意安排在栈中的。即刻意安排在0x603700中的，好让func执行 retq返回时，读取的pc指针是__start_context。

ss_sp就是我们在main函数中malloc的地址，sp = ss_sp + ss_size就指向了栈顶。
我们在sp[0]安排了一个地址&__start_context，而func的函数栈就是这个sp，func在最后执行ret时，会pop 这个 sp[0]，然后放到自己的pc指针上，然后跳到pc处，也就是说，func的返回值就是__start_context。

好，现在看看__start_context干了些什么事情，以为从前面推栈的结果来看没人调用了__start_context，所以猜想里面肯定直接调用了exit了。

(gdb) disassemble __start_context
Dump of assembler code for function __start_context:
   0x00007ffff74858f0 <+0>:	mov    %rbx,%rsp
   0x00007ffff74858f3 <+3>:	pop    %rdi
   0x00007ffff74858f4 <+4>:	test   %rdi,%rdi
   0x00007ffff74858f7 <+7>:	je     0x7ffff74858fe <__start_context+14>
   0x00007ffff74858f9 <+9>:	callq  0x7ffff7483090 <setcontext>
   0x00007ffff74858fe <+14>:	mov    %rax,%rdi
   0x00007ffff7485901 <+17>:	callq  0x7ffff7477a40 <exit>
   0x00007ffff7485906 <+22>:	hlt
End of assembler dump.

func返回到__start_context后，就exit。

总结一下，在执行func时，在其栈顶放了一个__start_context，这样func执行ret时，就执行到了__start_context。换句话说，并不是__start_context调用了func，而是func返回到了__start_context。

仔细看__start_context函数汇编，他有一个可能就是不执行 exit，而是执行setcontext，判断条件就是%rdi是否是0，不是0就指向setcontext了，这个又是什么逻辑呢。且看test4.c

#include <ucontext.h>
#include <stdio.h>
#include <malloc.h>
int did = 0;
void func()
{
	did = 1;
	printf("in func\n");
}

int main()
{
	ucontext_t context,rt;
	getcontext(&context);
	getcontext(&rt);
	if(did == 1)
	{
		printf("continue from func\n");
		return 0;
	}
	//指定栈
	context.uc_stack.ss_sp = malloc(10000);
	context.uc_stack.ss_size = 10000;
	context.uc_link = &rt;
	makecontext(&context, func, 0);

	setcontext(&context);

	return 0;//never goto here!
}

test4.c比test3.c就多个一个对uc_link的赋值（还有一些流程控制变量）。uc_link就指定了func执行完之后，接着执行的上下文，如果为空，则执行完func后，exit，就像test3.c一样；如果uc_link不为空，则执行完func后，接着执行uc_link就指定的上下文。

就如__start_context逻辑一致。

接下来，看test4.c，我们从getcontext(&rt);处返回了，但是能不能从setcontext处返回呢，当然可以，方法1就是getcontext(&rt);返回后，goto到setcontext 后面。但是glibc还提供了一个接口就是swapcontext。且看下面那个例子。

#include <ucontext.h>
#include <stdio.h>
#include <malloc.h>
int did = 0;
void func()
{
	did = 1;
	printf("in func\n");
}

int main()
{
	ucontext_t context,rt;
	getcontext(&context);

	//指定栈
	context.uc_stack.ss_sp = malloc(10000);
	context.uc_stack.ss_size = 10000;
	context.uc_link = &rt;
	makecontext(&context, func, 0);

	swapcontext(&rt,&context);

	printf("finally return\n");
	return 0;//should goto here!
}

swapcontext 两件事，一个就是保存当前的上下文，到rt中，然后切换到context。context指定的上下文执行完成之后，就跳到了swapcontext后面了。

注意：这些xxxcontext函数给予了用户态程序“调度的功能”，这和os的调度不同，os是对各个线程，即task_struct进行切换调度，而各个线程内部，也就是用户态程序，可以用上面这些库函数进行颗粒度更小的调度（说切换更合理）。这就是所谓的协程的概念。

版权声明
本文为[Mrpre]所创，转载请带上原文链接，感谢
https://wonderful.blog.csdn.net/article/details/78699865

当前位置：网站首页>setcontext getcontext makecontext swapcontext

setcontext getcontext makecontext swapcontext

Linux上下文切换以及协程

边栏推荐

猜你喜欢

随机推荐