diff --git a/Makefile b/Makefile index f7452ac47..abd65e8ba 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,7 @@ OBJS += log.o OBJS += libnetlink.o OBJS += sockets.o OBJS += files.o +OBJS += namespaces.o OBJS-BLOB += parasite.o SRCS-BLOB += $(patsubst %.o,%.c,$(OBJS-BLOB)) diff --git a/cr-dump.c b/cr-dump.c index 47d70f456..5e1642a80 100644 --- a/cr-dump.c +++ b/cr-dump.c @@ -26,7 +26,7 @@ #include "ptrace.h" #include "util.h" #include "sockets.h" - +#include "namespaces.h" #include "image.h" #include "proc_parse.h" #include "parasite-syscall.h" @@ -1273,6 +1273,12 @@ int cr_dump_tasks(pid_t pid, struct cr_options *opts) if (collect_pstree(pid, &pstree_list)) goto err; + if (opts->with_namespaces) { + ret = dump_namespaces(pid); + if (ret < 0) + goto err; + } + /* * Ignore collection errors by now since we may not want * to dump the missed sockets. But later, when we will start diff --git a/cr-restore.c b/cr-restore.c index d8561f428..2529ff17f 100644 --- a/cr-restore.c +++ b/cr-restore.c @@ -36,6 +36,7 @@ #include "proc_parse.h" #include "restorer-blob.h" #include "crtools.h" +#include "namespaces.h" /* * real_pid member formerly served cases when @@ -1238,15 +1239,17 @@ static int restore_one_task(int pid) #define STACK_SIZE (8 * 4096) struct cr_clone_arg { int pid, fd; + unsigned long clone_flags; }; -static inline int fork_with_pid(int pid) +static inline int fork_with_pid(int pid, unsigned long ns_clone_flags) { int ret = -1; char buf[32]; struct cr_clone_arg ca; void *stack; + pr_info("Forking task with %d pid (flags %lx)\n", pid, ns_clone_flags); stack = mmap(NULL, STACK_SIZE, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_GROWSDOWN | MAP_ANONYMOUS, -1, 0); @@ -1257,6 +1260,7 @@ static inline int fork_with_pid(int pid) snprintf(buf, sizeof(buf), "%d", pid - 1); ca.pid = pid; + ca.clone_flags = ns_clone_flags; ca.fd = open(LAST_PID_PATH, O_RDWR); if (ca.fd < 0) { pr_perror("%d: Can't open %s\n", pid, LAST_PID_PATH); @@ -1272,7 +1276,7 @@ static inline int fork_with_pid(int pid) goto err_unlock; ret = clone(restore_task_with_children, stack + STACK_SIZE, - SIGCHLD, &ca); + ns_clone_flags | SIGCHLD, &ca); if (ret < 0) pr_perror("Can't fork for %d\n", pid); @@ -1320,6 +1324,12 @@ static int restore_task_with_children(void *_arg) exit(-1); } + if (ca->clone_flags) { + ret = prepare_namespace(pid, ca->clone_flags); + if (ret) + exit(-1); + } + /* * The block mask will be restored in sigresturn. * @@ -1378,7 +1388,7 @@ static int restore_task_with_children(void *_arg) pr_info("%d: Restoring %d children:\n", pid, e.nr_children); for (i = 0; i < e.nr_children; i++) { - ret = fork_with_pid(pids[i]); + ret = fork_with_pid(pids[i], 0); if (ret < 0) exit(1); } @@ -1393,6 +1403,7 @@ static int restore_root_task(int fd, struct cr_options *opts) struct pstree_entry e; int ret, i; struct sigaction act; + unsigned long ns_clone_flags; ret = read(fd, &e, sizeof(e)); if (ret != sizeof(e)) { @@ -1416,7 +1427,19 @@ static int restore_root_task(int fd, struct cr_options *opts) return -1; } - ret = fork_with_pid(e.pid); + /* + * FIXME -- currently we assume that all the tasks live + * in the same set of namespaces. This is done to debug + * the ns contents dumping/restoring. Need to revisit + * this later. + */ + + if (opts->with_namespaces) + ns_clone_flags = 0; + else + ns_clone_flags = 0; + + ret = fork_with_pid(e.pid, ns_clone_flags); if (ret < 0) return -1; diff --git a/cr-show.c b/cr-show.c index 15bcf409c..456f814f6 100644 --- a/cr-show.c +++ b/cr-show.c @@ -13,7 +13,7 @@ #include "types.h" #include "list.h" - +#include "namespaces.h" #include "compiler.h" #include "crtools.h" #include "util.h" @@ -502,6 +502,10 @@ static int cr_show_all(unsigned long pid, struct cr_options *opts) close_cr_fdset(&cr_fdset); + ret = try_show_namespaces(pid); + if (ret) + goto out; + list_for_each_entry(item, &pstree_list, list) { cr_fdset = prep_cr_fdset_for_restore(item->pid, CR_FD_DESC_TASK); diff --git a/crtools.c b/crtools.c index d8f71a9de..815413649 100644 --- a/crtools.c +++ b/crtools.c @@ -229,7 +229,7 @@ int main(int argc, char *argv[]) int action = -1; int log_inited = 0; - static const char short_opts[] = "df:p:t:hcD:o:"; + static const char short_opts[] = "df:p:t:hcD:o:n"; BUILD_BUG_ON(PAGE_SIZE != PAGE_IMAGE_SIZE); @@ -275,6 +275,9 @@ int main(int argc, char *argv[]) return -1; log_inited = 1; break; + case 'n': + opts.with_namespaces = true; + break; case 'h': default: goto usage; diff --git a/include/crtools.h b/include/crtools.h index 7af6a48ec..4028d8f24 100644 --- a/include/crtools.h +++ b/include/crtools.h @@ -46,11 +46,12 @@ enum cr_task_state { }; struct cr_options { - bool leader_only; - enum cr_task_state final_state; - bool show_pages_content; - char *show_dump_file; - bool restore_detach; + enum cr_task_state final_state; + char *show_dump_file; + bool leader_only; + bool show_pages_content; + bool restore_detach; + bool with_namespaces; }; /* file descriptors template */ diff --git a/include/namespaces.h b/include/namespaces.h new file mode 100644 index 000000000..ee4aad60c --- /dev/null +++ b/include/namespaces.h @@ -0,0 +1,6 @@ +#ifndef __CR_NS_H__ +#define __CR_NS_H__ +int dump_namespaces(int pid); +int prepare_namespace(int pid, unsigned long clone_flags); +int try_show_namespaces(int pid); +#endif diff --git a/include/syscall-codes.h b/include/syscall-codes.h index 8881c2cd0..79f5f58d1 100644 --- a/include/syscall-codes.h +++ b/include/syscall-codes.h @@ -38,6 +38,7 @@ #define __NR_get_thread_area 211 #define __NR_restart_syscall 219 #define __NR_msync 227 +#define __NR_setns 308 #else /* CONFIG_X86_64 */ # error x86-32 bit mode not yet implemented diff --git a/include/syscall.h b/include/syscall.h index 7081d40ba..382b31d02 100644 --- a/include/syscall.h +++ b/include/syscall.h @@ -308,6 +308,13 @@ static long always_inline sys_msync(void *addr, unsigned long length, int flags) return syscall3(__NR_msync, (long)addr, length, (long)flags); } +static long always_inline sys_setns(int fd, int nstype) +{ + return syscall2(__NR_setns, (long)fd, (long)nstype); +} + +#define setns sys_setns + #else /* CONFIG_X86_64 */ # error x86-32 bit mode not yet implemented #endif /* CONFIG_X86_64 */ diff --git a/namespaces.c b/namespaces.c new file mode 100644 index 000000000..fa1969c94 --- /dev/null +++ b/namespaces.c @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include "util.h" +#include "crtools.h" +#include "syscall.h" + +static int switch_ns(int pid, int type, char *ns) +{ + char buf[32]; + int nsfd, ret; + + snprintf(buf, sizeof(buf), "/proc/%d/ns/%s", pid, ns); + nsfd = open(buf, O_RDONLY); + if (nsfd < 0) { + pr_perror("Can't open ipcns file\n"); + goto out; + } + + ret = setns(nsfd, type); + if (ret < 0) + pr_perror("Can't setns %d/%s\n", pid, ns); + + close(nsfd); +out: + return ret; +} + +static int do_dump_namespaces(int ns_pid) +{ + return 0; +} + +int dump_namespaces(int ns_pid) +{ + int pid, ret, status; + + /* + * The setns syscall is cool, we can switch to the other + * namespace and then return back to our initial one, but + * for me it's much easier just to fork another task and + * let it do the job, all the more so it can be done in + * parallel with task dumping routine. + * + * However, the question how to dump sockets from the target + * net namesapce with this is still open + */ + + pr_info("Dumping %d's namespaces\n", ns_pid); + + pid = fork(); + if (pid < 0) { + pr_perror("Can't fork ns dumper\n"); + return -1; + } + + if (pid == 0) { + ret = do_dump_namespaces(ns_pid); + exit(ret); + } + + ret = waitpid(pid, &status, 0); + if (ret != pid) { + pr_perror("Can't wait ns dumper\n"); + return -1; + } + + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + pr_err("Namespaces dumping finished with error %d\n", status); + return -1; + } + + pr_info("Namespaces dump complete\n"); + return 0; +} + +int prepare_namespace(int pid, unsigned long clone_flags) +{ + int ret = 0; + + pr_info("Restoring namespaces %d flags %lx\n", + pid, clone_flags); + + return ret; +} + +int try_show_namespaces(int ns_pid) +{ + return 0; +}