#define _XOPEN_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "compiler.h" #include "asm/types.h" #include "list.h" #include "util.h" #include "rst-malloc.h" #include "image.h" #include "vma.h" #include "cr_options.h" #include "servicefd.h" #define VMA_OPT_LEN 128 static void vma_opt_str(const struct vma_area *v, char *opt) { int p = 0; #define opt2s(_o, _s) do { \ if (v->vma.status & _o) \ p += sprintf(opt + p, _s " "); \ } while (0) opt[p] = '\0'; opt2s(VMA_AREA_REGULAR, "reg"); opt2s(VMA_AREA_STACK, "stk"); opt2s(VMA_AREA_VSYSCALL, "vsys"); opt2s(VMA_AREA_VDSO, "vdso"); opt2s(VMA_FORCE_READ, "frd"); opt2s(VMA_AREA_HEAP, "heap"); opt2s(VMA_FILE_PRIVATE, "fp"); opt2s(VMA_FILE_SHARED, "fs"); opt2s(VMA_ANON_SHARED, "as"); opt2s(VMA_ANON_PRIVATE, "ap"); opt2s(VMA_AREA_SYSVIPC, "sysv"); opt2s(VMA_AREA_SOCKET, "sk"); #undef opt2s } void pr_vma(unsigned int loglevel, const struct vma_area *vma_area) { char opt[VMA_OPT_LEN]; memset(opt, 0, VMA_OPT_LEN); if (!vma_area) return; vma_opt_str(vma_area, opt); print_on_level(loglevel, "%#"PRIx64"-%#"PRIx64" (%"PRIi64"K) prot %#x flags %#x off %#"PRIx64" " "%s shmid: %#"PRIx64"\n", vma_area->vma.start, vma_area->vma.end, KBYTES(vma_area_len(vma_area)), vma_area->vma.prot, vma_area->vma.flags, vma_area->vma.pgoff, opt, vma_area->vma.shmid); } int close_safe(int *fd) { int ret = 0; if (*fd > -1) { ret = close(*fd); if (!ret) *fd = -1; else pr_perror("Unable to close fd %d", *fd); } return ret; } int reopen_fd_as_safe(char *file, int line, int new_fd, int old_fd, bool allow_reuse_fd) { int tmp; if (old_fd != new_fd) { if (!allow_reuse_fd) { if (fcntl(new_fd, F_GETFD) != -1 || errno != EBADF) { if (new_fd < 3) { /* * Standard descriptors. */ pr_warn("fd %d already in use (called at %s:%d)\n", new_fd, file, line); } else { pr_err("fd %d already in use (called at %s:%d)\n", new_fd, file, line); return -1; } } } tmp = dup2(old_fd, new_fd); if (tmp < 0) { pr_perror("Dup %d -> %d failed (called at %s:%d)", old_fd, new_fd, file, line); return tmp; } /* Just to have error message if failed */ close_safe(&old_fd); } return 0; } int move_img_fd(int *img_fd, int want_fd) { if (*img_fd == want_fd) { int tmp; tmp = dup(*img_fd); if (tmp < 0) { pr_perror("Can't dup file"); return -1; } close(*img_fd); *img_fd = tmp; } return 0; } static pid_t open_proc_pid = 0; static int open_proc_fd = -1; int close_pid_proc(void) { int ret = 0; if (open_proc_fd >= 0) ret = close(open_proc_fd); open_proc_fd = -1; open_proc_pid = 0; return ret; } void close_proc() { close_pid_proc(); close_service_fd(PROC_FD_OFF); } int set_proc_fd(int fd) { if (install_service_fd(PROC_FD_OFF, fd) < 0) return -1; return 0; } static int open_proc_sfd(char *path) { int fd, ret; close_proc(); fd = open(path, O_DIRECTORY | O_RDONLY); if (fd == -1) { pr_err("Can't open %s\n", path); return -1; } ret = install_service_fd(PROC_FD_OFF, fd); close(fd); if (ret < 0) return -1; return 0; } inline int open_pid_proc(pid_t pid) { char path[18]; int fd; int dfd; if (pid == open_proc_pid) return open_proc_fd; close_pid_proc(); dfd = get_service_fd(PROC_FD_OFF); if (dfd < 0) { if (open_proc_sfd("/proc") < 0) return -1; dfd = get_service_fd(PROC_FD_OFF); } snprintf(path, sizeof(path), "%d", pid); fd = openat(dfd, path, O_RDONLY); if (fd < 0) pr_perror("Can't open %s", path); else { open_proc_fd = fd; open_proc_pid = pid; } return fd; } int do_open_proc(pid_t pid, int flags, const char *fmt, ...) { char path[128]; va_list args; int dirfd = open_pid_proc(pid); if (dirfd < 0) return -1; va_start(args, fmt); vsnprintf(path, sizeof(path), fmt, args); va_end(args); return openat(dirfd, path, flags); } static int service_fd_rlim_cur; static int service_fd_id = 0; int init_service_fd(void) { struct rlimit rlimit; /* * Service FDs are those that most likely won't * conflict with any 'real-life' ones */ if (getrlimit(RLIMIT_NOFILE, &rlimit)) { pr_perror("Can't get rlimit"); return -1; } service_fd_rlim_cur = (int)rlimit.rlim_cur; BUG_ON(service_fd_rlim_cur < SERVICE_FD_MAX); return 0; } static int __get_service_fd(enum sfd_type type, int service_fd_id) { return service_fd_rlim_cur - type - SERVICE_FD_MAX * service_fd_id; } static DECLARE_BITMAP(sfd_map, SERVICE_FD_MAX); int reserve_service_fd(enum sfd_type type) { int sfd = __get_service_fd(type, service_fd_id); BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX); set_bit(type, sfd_map); return sfd; } int install_service_fd(enum sfd_type type, int fd) { int sfd = __get_service_fd(type, service_fd_id); BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX); if (dup2(fd, sfd) != sfd) { pr_perror("Dup %d -> %d failed", fd, sfd); return -1; } set_bit(type, sfd_map); return sfd; } int get_service_fd(enum sfd_type type) { BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX); if (!test_bit(type, sfd_map)) return -1; return __get_service_fd(type, service_fd_id); } int criu_get_image_dir(void) { return get_service_fd(IMG_FD_OFF); } int close_service_fd(enum sfd_type type) { int fd; fd = get_service_fd(type); if (fd < 0) return 0; if (close_safe(&fd)) return -1; clear_bit(type, sfd_map); return 0; } int clone_service_fd(int id) { int ret = -1, i; if (service_fd_id == id) return 0; for (i = SERVICE_FD_MIN + 1; i < SERVICE_FD_MAX; i++) { int old = __get_service_fd(i, service_fd_id); int new = __get_service_fd(i, id); ret = dup2(old, new); if (ret == -1) { if (errno == EBADF) continue; pr_perror("Unable to clone %d->%d", old, new); } } service_fd_id = id; ret = 0; return ret; } bool is_any_service_fd(int fd) { return fd > __get_service_fd(SERVICE_FD_MAX, service_fd_id) && fd < __get_service_fd(SERVICE_FD_MIN, service_fd_id); } bool is_service_fd(int fd, enum sfd_type type) { return fd == get_service_fd(type); } int copy_file(int fd_in, int fd_out, size_t bytes) { ssize_t written = 0; size_t chunk = bytes ? bytes : 4096; while (1) { ssize_t ret; ret = sendfile(fd_out, fd_in, NULL, chunk); if (ret < 0) { pr_perror("Can't send data to ghost file"); return -1; } if (ret == 0) { if (bytes && (written != bytes)) { pr_err("Ghost file size mismatch %zu/%zu\n", written, bytes); return -1; } break; } written += ret; } return 0; } #ifndef ANON_INODE_FS_MAGIC # define ANON_INODE_FS_MAGIC 0x09041934 #endif bool is_anon_inode(struct statfs *statfs) { return statfs->f_type == ANON_INODE_FS_MAGIC; } int read_fd_link(int lfd, char *buf, size_t size) { char t[32]; ssize_t ret; snprintf(t, sizeof(t), "/proc/self/fd/%d", lfd); ret = readlink(t, buf, size); if (ret < 0) { pr_perror("Can't read link of fd %d", lfd); return -1; } else if ((size_t)ret == size) { pr_err("Buffer for read link of fd %d is too small\n", lfd); return -1; } buf[ret] = 0; return ret; } int is_anon_link_type(int lfd, char *type) { char link[32], aux[32]; if (read_fd_link(lfd, link, sizeof(link)) < 0) return -1; snprintf(aux, sizeof(aux), "anon_inode:%s", type); return !strcmp(link, aux); } void *shmalloc(size_t bytes) { return rst_mem_alloc(bytes, RM_SHARED); } /* Only last chunk can be released */ void shfree_last(void *ptr) { rst_mem_free_last(RM_SHARED); } int run_scripts(char *action) { struct script *script; int ret = 0; if (setenv("CRTOOLS_SCRIPT_ACTION", action, 1)) { pr_perror("Can't set CRTOOLS_SCRIPT_ACTION=%s", action); return -1; } list_for_each_entry(script, &opts.scripts, node) ret |= system(script->path); unsetenv("CRTOOLS_SCRIPT_ACTION"); return ret; } #define DUP_SAFE(fd, out) \ ({ \ int ret__; \ ret__ = dup(fd); \ if (ret__ == -1) { \ pr_perror("dup(%d) failed", fd); \ goto out; \ } \ ret__; \ }) /* * If "in" is negative, stdin will be closed. * If "out" or "err" are negative, a log file descriptor will be used. */ int cr_system(int in, int out, int err, char *cmd, char *const argv[]) { sigset_t blockmask, oldmask; int ret = -1, status; pid_t pid; sigemptyset(&blockmask); sigaddset(&blockmask, SIGCHLD); if (sigprocmask(SIG_BLOCK, &blockmask, &oldmask) == -1) { pr_perror("Can not set mask of blocked signals"); return -1; } pid = fork(); if (pid == -1) { pr_perror("fork() failed"); goto out; } else if (pid == 0) { if (out < 0) out = log_get_fd(); if (err < 0) err = log_get_fd(); /* * out, err, in should be a separate fds, * because reopen_fd_as() closes an old fd */ if (err == out || err == in) err = DUP_SAFE(err, out_chld); if (out == in) out = DUP_SAFE(out, out_chld); if (move_img_fd(&out, STDIN_FILENO) || move_img_fd(&err, STDIN_FILENO)) goto out_chld; if (in < 0) { close(STDIN_FILENO); } else { if (reopen_fd_as_nocheck(STDIN_FILENO, in)) goto out_chld; } if (move_img_fd(&err, STDOUT_FILENO)) goto out_chld; if (reopen_fd_as_nocheck(STDOUT_FILENO, out)) goto out_chld; if (reopen_fd_as_nocheck(STDERR_FILENO, err)) goto out_chld; execvp(cmd, argv); pr_perror("exec failed"); out_chld: _exit(1); } while (1) { ret = waitpid(pid, &status, 0); if (ret == -1) { pr_perror("waitpid() failed"); goto out; } if (WIFEXITED(status)) { if (WEXITSTATUS(status)) pr_err("exited, status=%d\n", WEXITSTATUS(status)); break; } else if (WIFSIGNALED(status)) { pr_err("killed by signal %d\n", WTERMSIG(status)); break; } else if (WIFSTOPPED(status)) { pr_err("stopped by signal %d\n", WSTOPSIG(status)); } else if (WIFCONTINUED(status)) { pr_err("continued\n"); } } ret = status ? -1 : 0; out: if (sigprocmask(SIG_SETMASK, &oldmask, NULL) == -1) { pr_perror("Can not unset mask of blocked signals"); BUG(); } return ret; } int is_root_user() { if (geteuid() != 0) { pr_err("You need to be root to run this command\n"); return 0; } return 1; }