From aa731ee1d74ff8b8bc34279c8fbd8829cee8c5d3 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2012 00:23:25 +0400 Subject: [PATCH] core: Support task scheduler policies and priorities No magic here, just fetch info using getpriority and sched_getxxx calls. Good news is that the mentioned syscalls take pid as argument and do work with it, i.e. -- no need in parasite help here. Restore is splitted into prep -- copy sched bits from image on restorer args -- and the restore itself. It's done to avoid restoring tasks info with IDLE priority ;) To make restorer not-fail sched bits are validated for sanity on prep stage. Minimal sanity test is also there. Signed-off-by: Pavel Emelyanov --- cr-dump.c | 59 +++++++++++++++ cr-restore.c | 47 ++++++++++++ include/restorer.h | 10 +++ include/syscall-types.h | 1 + include/syscall-x86-64.def | 2 + protobuf/core.proto | 3 + restorer.c | 19 +++++ test/zdtm.sh | 1 + test/zdtm/live/static/Makefile | 1 + test/zdtm/live/static/sched_prio00.c | 108 +++++++++++++++++++++++++++ 10 files changed, 251 insertions(+) create mode 100644 test/zdtm/live/static/sched_prio00.c diff --git a/cr-dump.c b/cr-dump.c index fdd7810ed..76cd7629c 100644 --- a/cr-dump.c +++ b/cr-dump.c @@ -18,6 +18,9 @@ #include #include +#include +#include + #include #include "protobuf.h" @@ -98,6 +101,54 @@ err: return ret; } +static int dump_sched_info(int pid, ThreadCoreEntry *tc) +{ + int ret; + struct sched_param sp; + + BUILD_BUG_ON(SCHED_OTHER != 0); /* default in proto message */ + + ret = sched_getscheduler(pid); + if (ret < 0) { + pr_perror("Can't get sched policy for %d", pid); + return -1; + } + + pr_info("%d has %d sched policy\n", pid, ret); + tc->has_sched_policy = true; + tc->sched_policy = ret; + + if ((ret == SCHED_RR) || (ret == SCHED_FIFO)) { + ret = sched_getparam(pid, &sp); + if (ret < 0) { + pr_perror("Can't get sched param for %d", pid); + return -1; + } + + pr_info("\tdumping %d prio for %d\n", sp.sched_priority, pid); + tc->has_sched_prio = true; + tc->sched_prio = sp.sched_priority; + } + + /* + * The nice is ignored for RT sched policies, but is stored + * in kernel. Thus we have to take it with us in the image. + */ + + errno = 0; + ret = getpriority(PRIO_PROCESS, pid); + if (errno) { + pr_perror("Can't get nice for %d", pid); + return -1; + } + + pr_info("\tdumping %d nice for %d\n", ret, pid); + tc->has_sched_nice = true; + tc->sched_nice = ret; + + return 0; +} + struct cr_fdset *glob_fdset; static int collect_fds(pid_t pid, struct parasite_drain_fd *dfds) @@ -880,6 +931,10 @@ static int dump_task_core_all(pid_t pid, const struct proc_pid_stat *stat, core->tc->task_state = TASK_ALIVE; core->tc->exit_code = 0; + ret = dump_sched_info(pid, core->thread_core); + if (ret) + goto err_free; + ret = pb_write_one(fd_core, core, PB_CORE); if (ret < 0) { pr_info("ERROR\n"); @@ -1277,6 +1332,10 @@ static int dump_task_thread(struct parasite_ctl *parasite_ctl, struct pid *tid) pr_info("%d: tid_address=%p\n", pid, taddr); core->thread_info->clear_tid_addr = (u64) taddr; + ret = dump_sched_info(pid, core->thread_core); + if (ret) + goto err_free; + pr_info("OK\n"); fd_core = open_image(CR_FD_CORE, O_DUMP, tid->virt); diff --git a/cr-restore.c b/cr-restore.c index b12b3cf20..3e61f57de 100644 --- a/cr-restore.c +++ b/cr-restore.c @@ -1258,6 +1258,45 @@ static int remap_restorer_blob(void *addr) return 0; } +static int validate_sched_parm(struct rst_sched_param *sp) +{ + if ((sp->nice < -20) || (sp->nice > 19)) + return 0; + + switch (sp->policy) { + case SCHED_RR: + case SCHED_FIFO: + return ((sp->prio > 0) && (sp->prio < 100)); + case SCHED_IDLE: + case SCHED_OTHER: + case SCHED_BATCH: + return sp->prio == 0; + } + + return 0; +} + +static int prep_sched_info(struct rst_sched_param *sp, ThreadCoreEntry *tc) +{ + if (!tc->has_sched_policy) { + sp->policy = SCHED_OTHER; + sp->nice = 0; + return 0; + } + + sp->policy = tc->sched_policy; + sp->nice = tc->sched_nice; + sp->prio = tc->sched_prio; + + if (!validate_sched_parm(sp)) { + pr_err("Inconsistent sched params received (%d.%d.%d)\n", + sp->policy, sp->nice, sp->prio); + return -1; + } + + return 0; +} + static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_vmas, int nr_vmas) { long restore_task_vma_len; @@ -1427,6 +1466,10 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v task_args->has_futex = true; task_args->futex_rla = core->thread_core->futex_rla; task_args->futex_rla_len = core->thread_core->futex_rla_len; + + ret = prep_sched_info(&task_args->sp, core->thread_core); + if (ret) + goto err; } /* No longer need it */ @@ -1494,6 +1537,10 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v thread_args[i].has_futex = true; thread_args[i].futex_rla = core->thread_core->futex_rla; thread_args[i].futex_rla_len = core->thread_core->futex_rla_len; + + ret = prep_sched_info(&thread_args[i].sp, core->thread_core); + if (ret) + goto err; } core_entry__free_unpacked(core, NULL); diff --git a/include/restorer.h b/include/restorer.h index 3b9fbcfed..732180c42 100644 --- a/include/restorer.h +++ b/include/restorer.h @@ -56,6 +56,12 @@ struct restore_mem_zone { #define first_on_heap(ptr, heap) ((typeof(ptr))heap) #define next_on_heap(ptr, prev) ((typeof(ptr))((long)(prev) + sizeof(*(prev)))) +struct rst_sched_param { + int policy; + int nice; + int prio; +}; + /* Make sure it's pow2 in size */ struct thread_restore_args { struct restore_mem_zone mem_zone; @@ -68,6 +74,8 @@ struct thread_restore_args { bool has_futex; u64 futex_rla; u32 futex_rla_len; + + struct rst_sched_param sp; } __aligned(sizeof(long)); struct task_restore_core_args { @@ -113,6 +121,8 @@ struct task_restore_core_args { int *rst_tcp_socks; int rst_tcp_socks_size; + + struct rst_sched_param sp; } __aligned(sizeof(long)); struct pt_regs { diff --git a/include/syscall-types.h b/include/syscall-types.h index 01f64c537..261e288af 100644 --- a/include/syscall-types.h +++ b/include/syscall-types.h @@ -10,6 +10,7 @@ #include #include +#include #include "types.h" diff --git a/include/syscall-x86-64.def b/include/syscall-x86-64.def index 6cc54275a..1a3d75961 100644 --- a/include/syscall-x86-64.def +++ b/include/syscall-x86-64.def @@ -54,6 +54,8 @@ __NR_setfsgid 123 sys_setfsgid (int fsgid) __NR_getsid 124 sys_getsid (void) __NR_capset 126 sys_capset (struct cap_header *h, struct cap_data *d) __NR_personality 135 sys_personality (unsigned int personality) +__NR_setpriority 141 sys_setpriority (int which, int who, int nice) +__NR_sched_setscheduler 144 sys_sched_setscheduler (int pid, int policy, struct sched_param *p) __NR_prctl 157 sys_prctl (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) __NR_arch_prctl 158 sys_arch_prctl (int option, unsigned long addr) __NR_mount 165 sys_mount (char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data) diff --git a/protobuf/core.proto b/protobuf/core.proto index 3e09bdd75..72a2c7eca 100644 --- a/protobuf/core.proto +++ b/protobuf/core.proto @@ -69,6 +69,9 @@ message thread_info_x86 { message thread_core_entry { required uint64 futex_rla = 1; required uint32 futex_rla_len = 2; + optional sint32 sched_nice = 3; + optional uint32 sched_policy = 4; + optional uint32 sched_prio = 5; } message core_entry { diff --git a/restorer.c b/restorer.c index 04337a9b2..53f058b61 100644 --- a/restorer.c +++ b/restorer.c @@ -127,6 +127,21 @@ static void restore_creds(CredsEntry *ce) sys_capset(&hdr, data); } +static void restore_sched_info(struct rst_sched_param *p) +{ + struct sched_param parm; + + if ((p->policy == SCHED_OTHER) && (p->nice == 0)) + return; + + pr_info("Restoring scheduler params %d.%d.%d\n", + p->policy, p->nice, p->prio); + + sys_setpriority(PRIO_PROCESS, 0, p->nice); + parm.sched_priority = p->prio; + sys_sched_setscheduler(0, p->policy, &parm); +} + /* * Threads restoration via sigreturn. Note it's locked * routine and calls for unlock at the end. @@ -152,6 +167,8 @@ long __export_restore_thread(struct thread_restore_args *args) } } + restore_sched_info(&args->sp); + rt_sigframe = (void *)args->mem_zone.rt_sigframe + 8; #define CPREGT1(d) rt_sigframe->uc.uc_mcontext.d = args->gpregs.d @@ -442,6 +459,8 @@ long __export_restore_task(struct task_restore_core_args *args) } } + restore_sched_info(&args->sp); + /* * We need to prepare a valid sigframe here, so * after sigreturn the kernel will pick up the diff --git a/test/zdtm.sh b/test/zdtm.sh index 54b942db9..928e3ee7f 100644 --- a/test/zdtm.sh +++ b/test/zdtm.sh @@ -19,6 +19,7 @@ static/write_read02 static/write_read10 static/wait00 static/vdso00 +static/sched_prio00 static/file_shared static/timers static/futex diff --git a/test/zdtm/live/static/Makefile b/test/zdtm/live/static/Makefile index a8ed43e50..64d3c15c2 100644 --- a/test/zdtm/live/static/Makefile +++ b/test/zdtm/live/static/Makefile @@ -19,6 +19,7 @@ TST_NOFILE = \ mprotect00 \ timers \ unbound_sock \ + sched_prio00 \ socket_listen \ socket_udp \ socket6_udp \ diff --git a/test/zdtm/live/static/sched_prio00.c b/test/zdtm/live/static/sched_prio00.c new file mode 100644 index 000000000..39496d0fa --- /dev/null +++ b/test/zdtm/live/static/sched_prio00.c @@ -0,0 +1,108 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check sched prios to be preserved"; +const char *test_author = "Pavel Emelyanov "; + +#define NRTASKS 4 + +static int do_nothing(void) +{ + while (1) + sleep(10); + + return -1; +} + +static void kill_all(int *pid, int n) +{ + int i; + + for (i = 0; i < n; i++) + kill(pid[i], SIGKILL); +} + +int main(int argc, char ** argv) +{ + int pid[NRTASKS], i, parm[NRTASKS], ret; + + test_init(argc, argv); + + /* first 3 -- normal */ + parm[0] = -20; + parm[1] = 19; + parm[2] = 1; + parm[3] = 3; + + /* next 1 -- RR */ + + for (i = 0; i < NRTASKS; i++) { + pid[i] = fork(); + if (!pid[i]) + return do_nothing(); + + if (i < 3) { + if (setpriority(PRIO_PROCESS, pid[i], parm[i])) { + err("Can't set prio %d", i); + kill_all(pid, i); + return -1; + } + } else { + struct sched_param p; + + p.sched_priority = parm[i]; + if (sched_setscheduler(pid[i], SCHED_RR, &p)) { + err("Can't set policy %d", i); + kill_all(pid, i); + return -1; + } + } + } + + test_daemon(); + test_waitsig(); + + for (i = 0; i < NRTASKS; i++) { + if (i < 3) { + errno = 0; + ret = getpriority(PRIO_PROCESS, pid[i]); + if (errno) { + fail("No prio for task %d", i); + break; + } + + if (ret != parm[i]) { + fail("Broken nice for %d", i); + break; + } + } else { + struct sched_param p; + + ret = sched_getscheduler(pid[i]); + if (ret != SCHED_RR) { + fail("Broken/No policy for %d", i); + break; + } + + ret = sched_getparam(pid[i], &p); + if (ret < 0 || p.sched_priority != parm[i]) { + fail("Broken prio for %d", i); + break; + } + } + } + + if (i == NRTASKS) + pass(); + + kill_all(pid, NRTASKS); + return 0; +}