mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-27 04:18:27 +00:00
We see that libbsd redefines __has_include to be always true, which breaks such checks for rseq. The idea behind this patch is remove the use of libbsd functions and always export our replacement functions. Using __strlcat and __strlcpy everywhere in existing code: git grep --files-with-matches "strlcat" | xargs sed -i 's/strlcat/__strlcat/g' git grep --files-with-matches "strlcpy" | xargs sed -i 's/strlcpy/__strlcpy/g' Fixes: #2036 Suggested-by: Andrei Vagin <avagin@google.com> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
2845 lines
62 KiB
C
2845 lines
62 KiB
C
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/types.h>
|
|
#include <dirent.h>
|
|
#include <errno.h>
|
|
#include <sys/stat.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <linux/fs.h>
|
|
#include <sys/sysmacros.h>
|
|
|
|
#include "types.h"
|
|
#include "common/list.h"
|
|
#include "util.h"
|
|
#include "mount.h"
|
|
#include "filesystems.h"
|
|
#include "mman.h"
|
|
#include "cpu.h"
|
|
#include "file-lock.h"
|
|
#include "pstree.h"
|
|
#include "fsnotify.h"
|
|
#include "posix-timer.h"
|
|
#include "kerndat.h"
|
|
#include "vdso.h"
|
|
#include "vma.h"
|
|
#include "mem.h"
|
|
#include "bfd.h"
|
|
#include "proc_parse.h"
|
|
#include "fdinfo.h"
|
|
#include "parasite.h"
|
|
#include "cr_options.h"
|
|
#include "sysfs_parse.h"
|
|
#include "seccomp.h"
|
|
#include "string.h"
|
|
#include "namespaces.h"
|
|
#include "cgroup.h"
|
|
#include "cgroup-props.h"
|
|
#include "timerfd.h"
|
|
#include "path.h"
|
|
#include "fault-injection.h"
|
|
#include "memfd.h"
|
|
#include "hugetlb.h"
|
|
|
|
#include "protobuf.h"
|
|
#include "images/fdinfo.pb-c.h"
|
|
#include "images/mnt.pb-c.h"
|
|
#include "plugin.h"
|
|
|
|
#include <stdlib.h>
|
|
|
|
#ifndef SIGEV_SIGNAL
|
|
#define SIGEV_SIGNAL 0 /* notify via signal */
|
|
#endif
|
|
#ifndef SIGEV_NONE
|
|
#define SIGEV_NONE 1 /* other notification: meaningless */
|
|
#endif
|
|
#ifndef SIGEV_THREAD
|
|
#define SIGEV_THREAD 2 /* deliver via thread creation */
|
|
#endif
|
|
#ifndef SIGEV_THREAD_ID
|
|
#define SIGEV_THREAD_ID 4 /* deliver to thread */
|
|
#endif
|
|
|
|
#define BUF_SIZE 4096 /* Good enough value - can be changed */
|
|
|
|
struct buffer {
|
|
char buf[BUF_SIZE];
|
|
char end; /* '\0' */
|
|
};
|
|
|
|
static struct buffer __buf;
|
|
static char *buf = __buf.buf;
|
|
|
|
/*
|
|
* This is how AIO ring buffers look like in proc
|
|
*/
|
|
|
|
#define AIO_FNAME "/[aio]"
|
|
|
|
/* check the @line starts with "%lx-%lx" format */
|
|
static bool __is_vma_range_fmt(char *line)
|
|
{
|
|
#define ____is_vma_addr_char(__c) (((__c) <= '9' && (__c) >= '0') || ((__c) <= 'f' && (__c) >= 'a'))
|
|
|
|
while (*line && ____is_vma_addr_char(*line))
|
|
line++;
|
|
|
|
if (*line++ != '-')
|
|
return false;
|
|
|
|
while (*line && ____is_vma_addr_char(*line))
|
|
line++;
|
|
|
|
if (*line++ != ' ')
|
|
return false;
|
|
|
|
return true;
|
|
#undef ____is_vma_addr_char
|
|
}
|
|
|
|
bool is_vma_range_fmt(char *line)
|
|
{
|
|
return __is_vma_range_fmt(line);
|
|
}
|
|
|
|
bool handle_vma_plugin(int *fd, struct stat *stat)
|
|
{
|
|
int ret;
|
|
|
|
ret = run_plugins(HANDLE_DEVICE_VMA, *fd, stat);
|
|
if (ret < 0) {
|
|
pr_perror("handle_device_vma plugin failed");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void __parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf)
|
|
{
|
|
char *tok;
|
|
|
|
if (!buf[0])
|
|
return;
|
|
|
|
tok = strtok(buf, " \n");
|
|
if (!tok)
|
|
return;
|
|
|
|
#define _vmflag_match(_t, _s) (_t[0] == _s[0] && _t[1] == _s[1])
|
|
|
|
do {
|
|
/* mmap() block */
|
|
if (_vmflag_match(tok, "gd"))
|
|
*flags |= MAP_GROWSDOWN;
|
|
else if (_vmflag_match(tok, "lo"))
|
|
*flags |= MAP_LOCKED;
|
|
else if (_vmflag_match(tok, "nr"))
|
|
*flags |= MAP_NORESERVE;
|
|
else if (_vmflag_match(tok, "ht"))
|
|
*flags |= MAP_HUGETLB;
|
|
|
|
/* madvise() block */
|
|
if (_vmflag_match(tok, "sr"))
|
|
*madv |= (1ul << MADV_SEQUENTIAL);
|
|
else if (_vmflag_match(tok, "rr"))
|
|
*madv |= (1ul << MADV_RANDOM);
|
|
else if (_vmflag_match(tok, "dc"))
|
|
*madv |= (1ul << MADV_DONTFORK);
|
|
else if (_vmflag_match(tok, "dd"))
|
|
*madv |= (1ul << MADV_DONTDUMP);
|
|
else if (_vmflag_match(tok, "mg"))
|
|
*madv |= (1ul << MADV_MERGEABLE);
|
|
else if (_vmflag_match(tok, "hg"))
|
|
*madv |= (1ul << MADV_HUGEPAGE);
|
|
else if (_vmflag_match(tok, "nh"))
|
|
*madv |= (1ul << MADV_NOHUGEPAGE);
|
|
|
|
/* vmsplice doesn't work for VM_IO and VM_PFNMAP mappings. */
|
|
if (_vmflag_match(tok, "io") || _vmflag_match(tok, "pf"))
|
|
*io_pf = 1;
|
|
|
|
/*
|
|
* Anything else is just ignored.
|
|
*/
|
|
} while ((tok = strtok(NULL, " \n")));
|
|
|
|
#undef _vmflag_match
|
|
}
|
|
|
|
void parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf)
|
|
{
|
|
__parse_vmflags(buf, flags, madv, io_pf);
|
|
}
|
|
|
|
static void parse_vma_vmflags(char *buf, struct vma_area *vma_area)
|
|
{
|
|
int io_pf = 0;
|
|
|
|
__parse_vmflags(buf, &vma_area->e->flags, &vma_area->e->madv, &io_pf);
|
|
|
|
/*
|
|
* vmsplice doesn't work for VM_IO and VM_PFNMAP mappings, the
|
|
* only exception is VVAR area that mapped by the kernel as
|
|
* VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP
|
|
*/
|
|
if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && !vma_entry_is(vma_area->e, VMA_FILE_SHARED))
|
|
vma_area->e->status |= VMA_UNSUPP;
|
|
|
|
if (vma_area->e->madv)
|
|
vma_area->e->has_madv = true;
|
|
}
|
|
|
|
static inline int is_anon_shmem_map(dev_t dev)
|
|
{
|
|
return kdat.shmem_dev == dev;
|
|
}
|
|
|
|
struct vma_file_info {
|
|
int dev_maj;
|
|
int dev_min;
|
|
unsigned long ino;
|
|
struct vma_area *vma;
|
|
};
|
|
|
|
static inline int vfi_equal(struct vma_file_info *a, struct vma_file_info *b)
|
|
{
|
|
return ((a->ino ^ b->ino) | (a->dev_maj ^ b->dev_maj) | (a->dev_min ^ b->dev_min)) == 0;
|
|
}
|
|
|
|
static int vma_get_mapfile_flags(struct vma_area *vma, DIR *mfd, char *path)
|
|
{
|
|
struct stat stat;
|
|
|
|
if (fstatat(dirfd(mfd), path, &stat, AT_SYMLINK_NOFOLLOW) < 0) {
|
|
if (errno == ENOENT) {
|
|
/* Just mapping w/o map_files link */
|
|
return 0;
|
|
}
|
|
pr_perror("Failed fstatat on map %" PRIx64 "", vma->e->start);
|
|
return -1;
|
|
}
|
|
|
|
switch (stat.st_mode & 0600) {
|
|
case 0200:
|
|
vma->e->fdflags = O_WRONLY;
|
|
break;
|
|
case 0400:
|
|
vma->e->fdflags = O_RDONLY;
|
|
break;
|
|
case 0600:
|
|
vma->e->fdflags = O_RDWR;
|
|
break;
|
|
}
|
|
vma->e->has_fdflags = true;
|
|
return 0;
|
|
}
|
|
|
|
static int vma_stat(struct vma_area *vma, int fd)
|
|
{
|
|
vma->vmst = xmalloc(sizeof(struct stat));
|
|
if (!vma->vmst)
|
|
return -1;
|
|
|
|
/*
|
|
* For AUFS support, we need to check if the symbolic link
|
|
* points to a branch. If it does, we cannot fstat() its file
|
|
* descriptor because it would return a different dev/ino than
|
|
* the real file. If fixup_aufs_vma_fd() returns positive,
|
|
* it means that it has stat()'ed using the full pathname.
|
|
* Zero return means that the symbolic link does not point to
|
|
* a branch and we can do fstat() below.
|
|
*/
|
|
if (opts.aufs) {
|
|
int ret;
|
|
|
|
ret = fixup_aufs_vma_fd(vma, fd);
|
|
if (ret < 0)
|
|
return -1;
|
|
if (ret > 0)
|
|
return 0;
|
|
}
|
|
|
|
if (fstat(fd, vma->vmst) < 0) {
|
|
pr_perror("Failed fstat on map %" PRIx64 "", vma->e->start);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vma_get_mapfile_user(const char *fname, struct vma_area *vma, struct vma_file_info *vfi, int *vm_file_fd,
|
|
const char *path)
|
|
{
|
|
int fd, hugetlb_flag = 0;
|
|
dev_t vfi_dev;
|
|
|
|
/*
|
|
* Kernel prohibits reading map_files for users. The
|
|
* best we can do here is fill stat using the information
|
|
* from smaps file and ... hope for the better :\
|
|
*
|
|
* Here we'll miss AIO-s and sockets :(
|
|
*/
|
|
|
|
if (fname[0] == '\0') {
|
|
/*
|
|
* Another bad thing is that kernel first checks
|
|
* for permission access to ANY map_files link,
|
|
* then checks for its existence. So we have to
|
|
* check for file path being empty to "emulate"
|
|
* the ENOENT case.
|
|
*/
|
|
|
|
if (vfi->dev_maj != 0 || vfi->dev_min != 0 || vfi->ino != 0) {
|
|
pr_err("Strange file mapped at %lx [%s]:%d.%d.%ld\n", (unsigned long)vma->e->start, fname,
|
|
vfi->dev_maj, vfi->dev_min, vfi->ino);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
} else if (fname[0] != '/') {
|
|
/*
|
|
* This should be some kind of
|
|
* special mapping like [heap], [vdso]
|
|
* and such, the caller should take care
|
|
* of the @fname and vma status.
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
vfi_dev = makedev(vfi->dev_maj, vfi->dev_min);
|
|
|
|
if (is_memfd(vfi_dev)) {
|
|
char tmp[PATH_MAX];
|
|
__strlcpy(tmp, fname, PATH_MAX);
|
|
strip_deleted(tmp, strlen(tmp));
|
|
|
|
/*
|
|
* The error EPERM will be shown in the following pr_perror().
|
|
* It comes from the previous open() call.
|
|
*/
|
|
pr_perror("Can't open mapped [%s]", tmp);
|
|
|
|
/*
|
|
* TODO Perhaps we could do better than failing and dump the
|
|
* memory like what is being done in shmem.c
|
|
*/
|
|
return -1;
|
|
}
|
|
|
|
if (is_hugetlb_dev(vfi_dev, &hugetlb_flag) || is_anon_shmem_map(vfi_dev)) {
|
|
if (!(vma->e->flags & MAP_SHARED))
|
|
vma->e->status |= VMA_ANON_PRIVATE;
|
|
else
|
|
vma->e->status |= VMA_ANON_SHARED;
|
|
|
|
vma->e->flags |= MAP_ANONYMOUS;
|
|
vma->e->shmid = vfi->ino;
|
|
vma->e->flags |= hugetlb_flag;
|
|
|
|
if (!strncmp(fname, "/SYSV", 5)) {
|
|
vma->e->status |= VMA_AREA_SYSVIPC;
|
|
} else if (vma->e->flags & MAP_SHARED) {
|
|
if (fault_injected(FI_HUGE_ANON_SHMEM_ID))
|
|
vma->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
pr_info("Failed to open map_files/%s, try to go via [%s] path\n", path, fname);
|
|
fd = open(fname, O_RDONLY);
|
|
if (fd < 0) {
|
|
pr_perror("Can't open mapped [%s]", fname);
|
|
return -1;
|
|
}
|
|
|
|
if (vma_stat(vma, fd)) {
|
|
close(fd);
|
|
return -1;
|
|
}
|
|
|
|
if (vma->vmst->st_dev != vfi_dev || vma->vmst->st_ino != vfi->ino) {
|
|
pr_err("Failed to resolve mapping %lx filename\n", (unsigned long)vma->e->start);
|
|
close(fd);
|
|
return -1;
|
|
}
|
|
|
|
*vm_file_fd = fd;
|
|
return 0;
|
|
}
|
|
|
|
static int vma_get_mapfile(const char *fname, struct vma_area *vma, DIR *mfd, struct vma_file_info *vfi,
|
|
struct vma_file_info *prev_vfi, int *vm_file_fd)
|
|
{
|
|
char path[32];
|
|
int flags;
|
|
|
|
/* Figure out if it's file mapping */
|
|
snprintf(path, sizeof(path), "%" PRIx64 "-%" PRIx64, vma->e->start, vma->e->end);
|
|
|
|
if (vma_get_mapfile_flags(vma, mfd, path))
|
|
return -1;
|
|
|
|
if (prev_vfi->vma && vfi_equal(vfi, prev_vfi)) {
|
|
struct vma_area *prev = prev_vfi->vma;
|
|
|
|
/*
|
|
* If vfi is equal (!) and negative @vm_file_fd --
|
|
* we have nothing to borrow for sure.
|
|
*/
|
|
if (*vm_file_fd < 0)
|
|
return 0;
|
|
|
|
pr_debug("vma %" PRIx64 " borrows vfi from previous %" PRIx64 "\n", vma->e->start, prev->e->start);
|
|
if (prev->e->status & VMA_AREA_SOCKET)
|
|
vma->e->status |= VMA_AREA_SOCKET | VMA_AREA_REGULAR;
|
|
|
|
/*
|
|
* FIXME -- in theory there can be vmas that have
|
|
* dev:ino match, but live in different mount
|
|
* namespaces. However, we only borrow files for
|
|
* subsequent vmas. These are _very_ likely to
|
|
* have files from the same namespaces.
|
|
*/
|
|
vma->file_borrowed = true;
|
|
|
|
return 0;
|
|
}
|
|
close_safe(vm_file_fd);
|
|
|
|
/*
|
|
* Note that we "open" it in dumper process space
|
|
* so later we might refer to it via /proc/self/fd/vm_file_fd
|
|
* if needed.
|
|
*/
|
|
flags = O_PATH;
|
|
if (vfi->dev_maj == 0)
|
|
/*
|
|
* Opening with O_PATH omits calling kernel ->open
|
|
* method, thus for some special files their type
|
|
* detection might be broken. Thus we open those with
|
|
* the O_RDONLY to potentially get ENXIO and check
|
|
* it below.
|
|
*/
|
|
flags = O_RDONLY;
|
|
|
|
*vm_file_fd = openat(dirfd(mfd), path, flags);
|
|
if (*vm_file_fd < 0) {
|
|
if (errno == ENOENT)
|
|
/* Just mapping w/o map_files link */
|
|
return 0;
|
|
|
|
if (errno == ENXIO) {
|
|
struct stat buf;
|
|
|
|
if (fstatat(dirfd(mfd), path, &buf, 0))
|
|
return -1;
|
|
|
|
if (S_ISSOCK(buf.st_mode)) {
|
|
pr_info("Found socket mapping @%" PRIx64 "\n", vma->e->start);
|
|
vma->vm_socket_id = buf.st_ino;
|
|
vma->e->status |= VMA_AREA_SOCKET | VMA_AREA_REGULAR;
|
|
return 0;
|
|
}
|
|
|
|
if ((buf.st_mode & S_IFMT) == 0 && !strncmp(fname, AIO_FNAME, sizeof(AIO_FNAME) - 1)) {
|
|
/* AIO ring, let's try */
|
|
close_safe(vm_file_fd);
|
|
vma->e->status = VMA_AREA_AIORING;
|
|
return 0;
|
|
}
|
|
|
|
pr_err("Unknown shit %o (%s)\n", buf.st_mode, fname);
|
|
return -1;
|
|
}
|
|
|
|
if (errno == EPERM && !opts.aufs)
|
|
return vma_get_mapfile_user(fname, vma, vfi, vm_file_fd, path);
|
|
|
|
pr_perror("Can't open map_files");
|
|
return -1;
|
|
}
|
|
|
|
return vma_stat(vma, *vm_file_fd);
|
|
}
|
|
|
|
int parse_self_maps_lite(struct vm_area_list *vms)
|
|
{
|
|
struct vma_area *prev = NULL;
|
|
struct bfd maps;
|
|
char *buf;
|
|
|
|
vm_area_list_init(vms);
|
|
|
|
maps.fd = open_proc(PROC_SELF, "maps");
|
|
if (maps.fd < 0)
|
|
return -1;
|
|
|
|
if (bfdopenr(&maps))
|
|
return -1;
|
|
|
|
while (1) {
|
|
struct vma_area *vma;
|
|
char *end;
|
|
unsigned long s, e;
|
|
|
|
buf = breadline(&maps);
|
|
if (!buf)
|
|
break;
|
|
if (IS_ERR(buf))
|
|
goto err;
|
|
|
|
s = strtoul(buf, &end, 16);
|
|
e = strtoul(end + 1, NULL, 16);
|
|
|
|
if (prev && prev->e->end == s)
|
|
/*
|
|
* This list is needed for one thing only -- to
|
|
* get the idea of what parts of current address
|
|
* space are busy. So merge them altogether.
|
|
*/
|
|
prev->e->end = e;
|
|
else {
|
|
vma = alloc_vma_area();
|
|
if (!vma)
|
|
goto err;
|
|
|
|
vma->e->start = s;
|
|
vma->e->end = e;
|
|
list_add_tail(&vma->list, &vms->h);
|
|
vms->nr++;
|
|
prev = vma;
|
|
}
|
|
|
|
pr_debug("Parsed %" PRIx64 "-%" PRIx64 " vma\n", prev->e->start, prev->e->end);
|
|
}
|
|
|
|
bclose(&maps);
|
|
return 0;
|
|
|
|
err:
|
|
bclose(&maps);
|
|
return -1;
|
|
}
|
|
|
|
static inline int handle_vdso_vma(struct vma_area *vma)
|
|
{
|
|
vma->e->status |= VMA_AREA_REGULAR;
|
|
if ((vma->e->prot & VDSO_PROT) == VDSO_PROT)
|
|
vma->e->status |= VMA_AREA_VDSO;
|
|
return 0;
|
|
}
|
|
|
|
static inline int handle_vvar_vma(struct vma_area *vma)
|
|
{
|
|
vma->e->status |= VMA_AREA_REGULAR;
|
|
if ((vma->e->prot & VVAR_PROT) == VVAR_PROT)
|
|
vma->e->status |= VMA_AREA_VVAR;
|
|
return 0;
|
|
}
|
|
|
|
static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_path, DIR *map_files_dir,
|
|
struct vma_file_info *vfi, struct vma_file_info *prev_vfi, int *vm_file_fd)
|
|
{
|
|
if (vma_get_mapfile(file_path, vma_area, map_files_dir, vfi, prev_vfi, vm_file_fd))
|
|
goto err_bogus_mapfile;
|
|
|
|
if (vma_area->e->status != 0)
|
|
return 0;
|
|
|
|
if (!strcmp(file_path, "[vsyscall]") || !strcmp(file_path, "[vectors]")) {
|
|
vma_area->e->status |= VMA_AREA_VSYSCALL;
|
|
} else if (!strcmp(file_path, "[vdso]")) {
|
|
if (handle_vdso_vma(vma_area))
|
|
goto err;
|
|
} else if (!strcmp(file_path, "[vvar]")) {
|
|
if (handle_vvar_vma(vma_area))
|
|
goto err;
|
|
} else if (!strcmp(file_path, "[heap]")) {
|
|
vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;
|
|
} else {
|
|
vma_area->e->status = VMA_AREA_REGULAR;
|
|
}
|
|
|
|
/*
|
|
* Some mapping hints for restore, we save this on
|
|
* disk and restore might need to analyze it.
|
|
*/
|
|
if (vma_area->file_borrowed) {
|
|
struct vma_area *prev = prev_vfi->vma;
|
|
|
|
/*
|
|
* Pick-up flags that might be set in the branch below.
|
|
* Status is copied as-is as it should be zero here,
|
|
* and have full match with the previous.
|
|
*/
|
|
vma_area->e->flags |= (prev->e->flags & MAP_ANONYMOUS);
|
|
vma_area->e->status = prev->e->status;
|
|
vma_area->e->shmid = prev->e->shmid;
|
|
vma_area->vmst = prev->vmst;
|
|
vma_area->mnt_id = prev->mnt_id;
|
|
|
|
if (!(vma_area->e->status & VMA_AREA_SYSVIPC)) {
|
|
vma_area->e->status &= ~(VMA_FILE_PRIVATE | VMA_FILE_SHARED);
|
|
if (vma_area->e->flags & MAP_PRIVATE)
|
|
vma_area->e->status |= VMA_FILE_PRIVATE;
|
|
else
|
|
vma_area->e->status |= VMA_FILE_SHARED;
|
|
}
|
|
} else if (*vm_file_fd >= 0) {
|
|
struct stat *st_buf = vma_area->vmst;
|
|
int hugetlb_flag = 0;
|
|
|
|
if (S_ISREG(st_buf->st_mode)) {
|
|
/* regular file mapping -- supported */;
|
|
pr_debug("Found regular file mapping, OK\n");
|
|
} else if (S_ISCHR(st_buf->st_mode) && (st_buf->st_rdev == DEVZERO)) {
|
|
/* devzero mapping -- also makes sense */;
|
|
pr_debug("Found devzero mapping, OK\n");
|
|
} else if (handle_vma_plugin(vm_file_fd, st_buf)) {
|
|
pr_info("Found device file mapping, plugin is available\n");
|
|
vma_area->e->status |= VMA_EXT_PLUGIN;
|
|
} else {
|
|
/* non-regular mapping with no supporting plugin */
|
|
pr_err("Can't handle non-regular mapping on %d's map %" PRIx64 "\n", pid, vma_area->e->start);
|
|
goto err;
|
|
}
|
|
|
|
if ((is_anon_shmem_map(st_buf->st_dev) || is_hugetlb_dev(st_buf->st_dev, NULL)) &&
|
|
!strncmp(file_path, "/SYSV", 5)) {
|
|
vma_area->e->flags |= MAP_ANONYMOUS;
|
|
vma_area->e->status |= VMA_ANON_SHARED;
|
|
vma_area->e->shmid = st_buf->st_ino;
|
|
if (!(vma_area->e->flags & MAP_SHARED))
|
|
goto err_bogus_mapping;
|
|
pr_info("path: %s\n", file_path);
|
|
vma_area->e->status |= VMA_AREA_SYSVIPC;
|
|
} else {
|
|
/* We dump memfd backed mapping, both normal and hugepage anonymous share
|
|
* mapping using memfd approach when possible.
|
|
*/
|
|
if (is_memfd(st_buf->st_dev) || is_anon_shmem_map(st_buf->st_dev) ||
|
|
can_dump_with_memfd_hugetlb(st_buf->st_dev, &hugetlb_flag, file_path, vma_area)) {
|
|
vma_area->e->status |= VMA_AREA_MEMFD;
|
|
vma_area->e->flags |= hugetlb_flag;
|
|
if (fault_injected(FI_HUGE_ANON_SHMEM_ID))
|
|
vma_area->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE;
|
|
} else if (is_hugetlb_dev(st_buf->st_dev, &hugetlb_flag)) {
|
|
vma_area->e->flags |= hugetlb_flag;
|
|
vma_area->e->flags |= MAP_ANONYMOUS;
|
|
|
|
if (vma_area->e->flags & MAP_SHARED) {
|
|
vma_area->e->status |= VMA_ANON_SHARED;
|
|
vma_area->e->shmid = st_buf->st_ino;
|
|
} else {
|
|
vma_area->e->status |= VMA_ANON_PRIVATE;
|
|
}
|
|
|
|
close_safe(vm_file_fd);
|
|
return 0;
|
|
}
|
|
|
|
if (vma_area->e->flags & MAP_PRIVATE)
|
|
vma_area->e->status |= VMA_FILE_PRIVATE;
|
|
else
|
|
vma_area->e->status |= VMA_FILE_SHARED;
|
|
}
|
|
|
|
/*
|
|
* We cannot use the mnt_id value provided by the kernel
|
|
* for vm_file_fd if it is an AUFS file (the value is
|
|
* wrong). In such a case, fixup_aufs_vma_fd() has set
|
|
* mnt_id to -1 to mimic pre-3.15 kernels that didn't
|
|
* have mnt_id.
|
|
*/
|
|
if (vma_area->mnt_id != -1 && get_fd_mntid(*vm_file_fd, &vma_area->mnt_id))
|
|
return -1;
|
|
} else {
|
|
/*
|
|
* No file but mapping -- anonymous one.
|
|
*/
|
|
if (vma_area->e->flags & MAP_SHARED) {
|
|
vma_area->e->status |= VMA_ANON_SHARED;
|
|
vma_area->e->shmid = vfi->ino;
|
|
} else {
|
|
vma_area->e->status |= VMA_ANON_PRIVATE;
|
|
}
|
|
vma_area->e->flags |= MAP_ANONYMOUS;
|
|
}
|
|
|
|
return 0;
|
|
err:
|
|
return -1;
|
|
err_bogus_mapping:
|
|
pr_err("Bogus mapping 0x%" PRIx64 "-0x%" PRIx64 " (flags: %#x vm_file_fd: %d)\n", vma_area->e->start,
|
|
vma_area->e->end, vma_area->e->flags, *vm_file_fd);
|
|
goto err;
|
|
|
|
err_bogus_mapfile:
|
|
pr_perror("Can't open %d's mapfile link %" PRIx64, pid, vma_area->e->start);
|
|
goto err;
|
|
}
|
|
|
|
static int vma_list_add(struct vma_area *vma_area, struct vm_area_list *vma_area_list, unsigned long *prev_end,
|
|
struct vma_file_info *vfi, struct vma_file_info *prev_vfi)
|
|
{
|
|
if (vma_area->e->status & VMA_EXT_PLUGIN) {
|
|
/* Unsupported VMAs that provide special plugins for
|
|
* backup can be treated as regular VMAs and criu
|
|
* should only save their metadata in the dump files.
|
|
* There can be several special backup plugins hooks
|
|
* that might run at different stages during checkpoint
|
|
* and restore.
|
|
*/
|
|
pr_debug("Device file mapping %016" PRIx64 "-%016" PRIx64 " supported via device plugins\n",
|
|
vma_area->e->start, vma_area->e->end);
|
|
} else if (vma_area->e->status & VMA_UNSUPP) {
|
|
pr_err("Unsupported mapping found %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start,
|
|
vma_area->e->end);
|
|
return -1;
|
|
}
|
|
|
|
/* Add a guard page only if here is enough space for it */
|
|
if (vma_has_guard_gap_hidden(vma_area) && *prev_end < vma_area->e->start)
|
|
vma_area->e->start -= PAGE_SIZE; /* Guard page */
|
|
*prev_end = vma_area->e->end;
|
|
|
|
list_add_tail(&vma_area->list, &vma_area_list->h);
|
|
vma_area_list->nr++;
|
|
if (vma_area_is_private(vma_area, kdat.task_size)) {
|
|
unsigned long pages;
|
|
|
|
pages = vma_area_len(vma_area) / PAGE_SIZE;
|
|
vma_area_list->nr_priv_pages += pages;
|
|
vma_area_list->nr_priv_pages_longest = max(vma_area_list->nr_priv_pages_longest, pages);
|
|
} else if (vma_area_is(vma_area, VMA_ANON_SHARED)) {
|
|
unsigned long pages;
|
|
|
|
pages = vma_area_len(vma_area) / PAGE_SIZE;
|
|
vma_area_list->nr_shared_pages_longest = max(vma_area_list->nr_shared_pages_longest, pages);
|
|
}
|
|
|
|
*prev_vfi = *vfi;
|
|
prev_vfi->vma = vma_area;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* On s390 we have old kernels where the global task size assumption of
|
|
* criu does not work. See also compel_task_size() for s390.
|
|
*/
|
|
static int task_size_check(pid_t pid, VmaEntry *entry)
|
|
{
|
|
#ifdef __s390x__
|
|
if (entry->end <= kdat.task_size)
|
|
return 0;
|
|
pr_err("Can't dump high memory region %lx-%lx of task %d because kernel commit ee71d16d22bb is missing\n",
|
|
entry->start, entry->end, pid);
|
|
return -1;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap_t dump_filemap)
|
|
{
|
|
struct vma_area *vma_area = NULL;
|
|
unsigned long start, end, pgoff, prev_end = 0;
|
|
char r, w, x, s;
|
|
int ret = -1, vm_file_fd = -1;
|
|
struct vma_file_info vfi;
|
|
struct vma_file_info prev_vfi = {};
|
|
|
|
DIR *map_files_dir = NULL;
|
|
struct bfd f;
|
|
|
|
vm_area_list_init(vma_area_list);
|
|
|
|
f.fd = open_proc(pid, "smaps");
|
|
if (f.fd < 0)
|
|
goto err_n;
|
|
|
|
if (bfdopenr(&f))
|
|
goto err_n;
|
|
|
|
map_files_dir = opendir_proc(pid, "map_files");
|
|
if (!map_files_dir) /* old kernel? */
|
|
goto err;
|
|
|
|
while (1) {
|
|
int num, path_off;
|
|
bool eof;
|
|
char *str;
|
|
|
|
str = breadline(&f);
|
|
if (IS_ERR(str))
|
|
goto err;
|
|
eof = (str == NULL);
|
|
|
|
if (!eof && !__is_vma_range_fmt(str)) {
|
|
if (!strncmp(str, "VmFlags: ", 9)) {
|
|
BUG_ON(!vma_area);
|
|
parse_vma_vmflags(&str[9], vma_area);
|
|
continue;
|
|
} else
|
|
continue;
|
|
}
|
|
|
|
if (vma_area && vma_list_add(vma_area, vma_area_list, &prev_end, &vfi, &prev_vfi))
|
|
goto err;
|
|
|
|
if (eof)
|
|
break;
|
|
|
|
vma_area = alloc_vma_area();
|
|
if (!vma_area)
|
|
goto err;
|
|
|
|
num = sscanf(str, "%lx-%lx %c%c%c%c %lx %x:%x %lu %n", &start, &end, &r, &w, &x, &s, &pgoff,
|
|
&vfi.dev_maj, &vfi.dev_min, &vfi.ino, &path_off);
|
|
if (num < 10) {
|
|
pr_err("Can't parse: %s\n", str);
|
|
goto err;
|
|
}
|
|
|
|
vma_area->e->start = start;
|
|
vma_area->e->end = end;
|
|
vma_area->e->pgoff = pgoff;
|
|
vma_area->e->prot = PROT_NONE;
|
|
|
|
if (task_size_check(pid, vma_area->e))
|
|
goto err;
|
|
|
|
if (r == 'r')
|
|
vma_area->e->prot |= PROT_READ;
|
|
if (w == 'w')
|
|
vma_area->e->prot |= PROT_WRITE;
|
|
if (x == 'x')
|
|
vma_area->e->prot |= PROT_EXEC;
|
|
|
|
if (s == 's')
|
|
vma_area->e->flags = MAP_SHARED;
|
|
else if (s == 'p')
|
|
vma_area->e->flags = MAP_PRIVATE;
|
|
else {
|
|
pr_err("Unexpected VMA met (%c)\n", s);
|
|
goto err;
|
|
}
|
|
|
|
if (handle_vma(pid, vma_area, str + path_off, map_files_dir, &vfi, &prev_vfi, &vm_file_fd))
|
|
goto err;
|
|
|
|
if (vma_entry_is(vma_area->e, VMA_FILE_PRIVATE) || vma_entry_is(vma_area->e, VMA_FILE_SHARED)) {
|
|
if (dump_filemap && dump_filemap(vma_area, vm_file_fd))
|
|
goto err;
|
|
} else if (vma_entry_is(vma_area->e, VMA_AREA_AIORING))
|
|
vma_area_list->nr_aios++;
|
|
}
|
|
|
|
vma_area = NULL;
|
|
ret = 0;
|
|
|
|
err:
|
|
bclose(&f);
|
|
err_n:
|
|
close_safe(&vm_file_fd);
|
|
if (map_files_dir)
|
|
closedir(map_files_dir);
|
|
|
|
xfree(vma_area);
|
|
return ret;
|
|
}
|
|
|
|
int parse_pid_stat(pid_t pid, struct proc_pid_stat *s)
|
|
{
|
|
char *tok, *p;
|
|
int fd;
|
|
int n;
|
|
|
|
fd = open_proc(pid, "stat");
|
|
if (fd < 0)
|
|
return -1;
|
|
|
|
n = read(fd, buf, BUF_SIZE);
|
|
close(fd);
|
|
if (n < 1) {
|
|
pr_err("stat for %d is corrupted\n", pid);
|
|
return -1;
|
|
}
|
|
|
|
memset(s, 0, sizeof(*s));
|
|
|
|
tok = strchr(buf, ' ');
|
|
if (!tok)
|
|
goto err;
|
|
*tok++ = '\0';
|
|
if (*tok != '(')
|
|
goto err;
|
|
|
|
s->pid = atoi(buf);
|
|
|
|
p = strrchr(tok + 1, ')');
|
|
if (!p)
|
|
goto err;
|
|
*tok = '\0';
|
|
*p = '\0';
|
|
|
|
__strlcpy(s->comm, tok + 1, sizeof(s->comm));
|
|
|
|
n = sscanf(p + 1,
|
|
" %c %d %d %d %d %d %u %lu %lu %lu %lu "
|
|
"%lu %lu %ld %ld %ld %ld %d %d %llu %lu %ld %lu %lu %lu %lu "
|
|
"%lu %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld "
|
|
"%lu %lu %lu %lu %lu %lu %lu %d",
|
|
&s->state, &s->ppid, &s->pgid, &s->sid, &s->tty_nr, &s->tty_pgrp, &s->flags, &s->min_flt,
|
|
&s->cmin_flt, &s->maj_flt, &s->cmaj_flt, &s->utime, &s->stime, &s->cutime, &s->cstime, &s->priority,
|
|
&s->nice, &s->num_threads, &s->zero0, &s->start_time, &s->vsize, &s->mm_rss, &s->rsslim,
|
|
&s->start_code, &s->end_code, &s->start_stack, &s->esp, &s->eip, &s->sig_pending, &s->sig_blocked,
|
|
&s->sig_ignored, &s->sig_handled, &s->wchan, &s->zero1, &s->zero2, &s->exit_signal, &s->task_cpu,
|
|
&s->rt_priority, &s->policy, &s->delayacct_blkio_ticks, &s->gtime, &s->cgtime, &s->start_data,
|
|
&s->end_data, &s->start_brk, &s->arg_start, &s->arg_end, &s->env_start, &s->env_end, &s->exit_code);
|
|
if (n < 50)
|
|
goto err;
|
|
|
|
return 0;
|
|
|
|
err:
|
|
pr_err("Parsing %d's stat failed (#fields do not match)\n", pid);
|
|
return -1;
|
|
}
|
|
|
|
int prepare_loginuid(unsigned int value)
|
|
{
|
|
int fd, ret = 0;
|
|
char buf[11]; /* 4294967295 is maximum for u32 */
|
|
|
|
fd = open_proc_rw(PROC_SELF, "loginuid");
|
|
if (fd < 0)
|
|
return -1;
|
|
|
|
snprintf(buf, 11, "%u", value);
|
|
|
|
if (write(fd, buf, 11) < 0) {
|
|
pr_warn("Write %s to /proc/self/loginuid failed: %s\n", buf, strerror(errno));
|
|
ret = -1;
|
|
}
|
|
close(fd);
|
|
return ret;
|
|
}
|
|
|
|
unsigned int parse_pid_loginuid(pid_t pid, int *err, bool ignore_noent)
|
|
{
|
|
int fd;
|
|
ssize_t num;
|
|
|
|
*err = 0;
|
|
fd = __open_proc(pid, (ignore_noent) ? ENOENT : 0, O_RDONLY, "loginuid");
|
|
if (fd < 0)
|
|
goto out;
|
|
|
|
num = read(fd, buf, 10);
|
|
close(fd);
|
|
if (num < 0) {
|
|
pr_perror("Unable to read /proc/%d/loginuid", pid);
|
|
goto out;
|
|
}
|
|
buf[num] = '\0';
|
|
|
|
return strtol(buf, NULL, 10);
|
|
|
|
out:
|
|
*err = -1;
|
|
return INVALID_UID; /* unset value */
|
|
}
|
|
|
|
int parse_pid_oom_score_adj(pid_t pid, int *err)
|
|
{
|
|
int fd;
|
|
ssize_t num;
|
|
|
|
*err = 0;
|
|
fd = open_proc(pid, "oom_score_adj");
|
|
if (fd < 0)
|
|
goto out;
|
|
|
|
num = read(fd, buf, 10);
|
|
close(fd);
|
|
if (num < 0) {
|
|
pr_perror("Unable to read /proc/%d/oom_score_adj", pid);
|
|
goto out;
|
|
}
|
|
buf[num] = '\0';
|
|
|
|
return strtol(buf, NULL, 10);
|
|
|
|
out:
|
|
*err = -1;
|
|
return 0;
|
|
}
|
|
|
|
static int ids_parse(char *str, unsigned int *arr)
|
|
{
|
|
char *end;
|
|
|
|
arr[0] = strtol(str, &end, 10);
|
|
arr[1] = strtol(end + 1, &end, 10);
|
|
arr[2] = strtol(end + 1, &end, 10);
|
|
arr[3] = strtol(end + 1, &end, 10);
|
|
if (*end)
|
|
return -1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static int cap_parse(char *str, unsigned int *res)
|
|
{
|
|
int i, ret;
|
|
|
|
for (i = 0; i < PROC_CAP_SIZE; i++) {
|
|
ret = sscanf(str, "%08x", &res[PROC_CAP_SIZE - 1 - i]);
|
|
if (ret != 1)
|
|
return -1;
|
|
str += 8;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int parse_pid_status(pid_t pid, struct seize_task_status *ss, void *data)
|
|
{
|
|
struct proc_status_creds *cr = container_of(ss, struct proc_status_creds, s);
|
|
struct bfd f;
|
|
int done = 0;
|
|
int ret = -1;
|
|
char *str;
|
|
bool parsed_seccomp = false;
|
|
int expected_done;
|
|
|
|
f.fd = open_proc(pid, "status");
|
|
if (f.fd < 0)
|
|
return -1;
|
|
|
|
cr->s.sigpnd = 0;
|
|
cr->s.shdpnd = 0;
|
|
cr->s.sigblk = 0;
|
|
cr->s.seccomp_mode = SECCOMP_MODE_DISABLED;
|
|
|
|
if (bfdopenr(&f))
|
|
return -1;
|
|
|
|
while (done < 14) {
|
|
str = breadline(&f);
|
|
if (str == NULL)
|
|
break;
|
|
if (IS_ERR(str))
|
|
goto err_parse;
|
|
|
|
if (!strncmp(str, "State:", 6)) {
|
|
cr->s.state = str[7];
|
|
done++;
|
|
continue;
|
|
}
|
|
|
|
if (!strncmp(str, "PPid:", 5)) {
|
|
if (sscanf(str, "PPid:\t%d", &cr->s.ppid) != 1) {
|
|
pr_err("Unable to parse: %s\n", str);
|
|
goto err_parse;
|
|
}
|
|
done++;
|
|
continue;
|
|
}
|
|
|
|
if (!strncmp(str, "NSpid:", 6)) {
|
|
/* Get a thread ID in the thread PID namespace. */
|
|
char *last;
|
|
|
|
last = strrchr(str, '\t');
|
|
if (!last || sscanf(last, "%d", &cr->s.vpid) != 1) {
|
|
pr_err("Unable to parse: %s\n", str);
|
|
goto err_parse;
|
|
}
|
|
|
|
done++;
|
|
continue;
|
|
}
|
|
|
|
if (!strncmp(str, "Uid:", 4)) {
|
|
if (ids_parse(str + 5, cr->uids))
|
|
goto err_parse;
|
|
|
|
done++;
|
|
continue;
|
|
}
|
|
|
|
if (!strncmp(str, "Gid:", 4)) {
|
|
if (ids_parse(str + 5, cr->gids))
|
|
goto err_parse;
|
|
|
|
done++;
|
|
continue;
|
|
}
|
|
|
|
if (!strncmp(str, "CapInh:", 7)) {
|
|
if (cap_parse(str + 8, cr->cap_inh))
|
|
goto err_parse;
|
|
|
|
done++;
|
|
continue;
|
|
}
|
|
|
|
if (!strncmp(str, "CapEff:", 7)) {
|
|
if (cap_parse(str + 8, cr->cap_eff))
|
|
goto err_parse;
|
|
|
|
done++;
|
|
continue;
|
|
}
|
|
|
|
if (!strncmp(str, "CapPrm:", 7)) {
|
|
if (cap_parse(str + 8, cr->cap_prm))
|
|
goto err_parse;
|
|
|
|
done++;
|
|
continue;
|
|
}
|
|
|
|
if (!strncmp(str, "CapBnd:", 7)) {
|
|
if (cap_parse(str + 8, cr->cap_bnd))
|
|
goto err_parse;
|
|
|
|
done++;
|
|
continue;
|
|
}
|
|
|
|
if (!strncmp(str, "Seccomp:", 8)) {
|
|
if (sscanf(str + 9, "%d", &cr->s.seccomp_mode) != 1) {
|
|
goto err_parse;
|
|
}
|
|
|
|
parsed_seccomp = true;
|
|
done++;
|
|
continue;
|
|
}
|
|
|
|
if (!strncmp(str, "ShdPnd:", 7)) {
|
|
unsigned long long sigpnd;
|
|
|
|
if (sscanf(str + 7, "%llx", &sigpnd) != 1)
|
|
goto err_parse;
|
|
cr->s.shdpnd |= sigpnd;
|
|
|
|
done++;
|
|
continue;
|
|
}
|
|
if (!strncmp(str, "SigPnd:", 7)) {
|
|
unsigned long long sigpnd;
|
|
|
|
if (sscanf(str + 7, "%llx", &sigpnd) != 1)
|
|
goto err_parse;
|
|
cr->s.sigpnd |= sigpnd;
|
|
|
|
done++;
|
|
continue;
|
|
}
|
|
if (!strncmp(str, "SigBlk:", 7)) {
|
|
unsigned long long sigblk = 0;
|
|
|
|
if (sscanf(str + 7, "%llx", &sigblk) != 1)
|
|
goto err_parse;
|
|
cr->s.sigblk |= sigblk;
|
|
|
|
done++;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* seccomp and nspids are optional */
|
|
expected_done = (parsed_seccomp ? 12 : 11);
|
|
if (kdat.has_nspid)
|
|
expected_done++;
|
|
if (done == expected_done)
|
|
ret = 0;
|
|
|
|
err_parse:
|
|
if (ret)
|
|
pr_err("Error parsing proc status file\n");
|
|
bclose(&f);
|
|
return ret;
|
|
}
|
|
|
|
struct opt2flag {
|
|
char *opt;
|
|
unsigned flag;
|
|
};
|
|
|
|
static bool sb_opt_cb(char *opt, char *unknown, size_t *uoff)
|
|
{
|
|
unsigned int id;
|
|
|
|
if (sscanf(opt, "gid=%d", &id) == 1) {
|
|
*uoff += sprintf(unknown + *uoff, "gid=%d", userns_gid(id));
|
|
unknown[*uoff] = ',';
|
|
(*uoff)++;
|
|
return true;
|
|
} else if (sscanf(opt, "uid=%d", &id) == 1) {
|
|
*uoff += sprintf(unknown + *uoff, "uid=%d", userns_uid(id));
|
|
unknown[*uoff] = ',';
|
|
(*uoff)++;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static int do_opt2flag(char *opt, unsigned *flags, const struct opt2flag *opts, char *unknown,
|
|
bool (*cb)(char *opt, char *unknown, size_t *uoff))
|
|
{
|
|
int i;
|
|
char *end;
|
|
size_t uoff = 0;
|
|
|
|
while (1) {
|
|
end = strchr(opt, ',');
|
|
if (end)
|
|
*end = '\0';
|
|
|
|
for (i = 0; opts[i].opt != NULL; i++)
|
|
if (!strcmp(opts[i].opt, opt)) {
|
|
(*flags) |= opts[i].flag;
|
|
break;
|
|
}
|
|
|
|
if (opts[i].opt == NULL && cb && !cb(opt, unknown, &uoff)) {
|
|
if (!unknown) {
|
|
pr_err("Unknown option [%s]\n", opt);
|
|
return -1;
|
|
}
|
|
|
|
strcpy(unknown + uoff, opt);
|
|
uoff += strlen(opt);
|
|
unknown[uoff] = ',';
|
|
uoff++;
|
|
}
|
|
|
|
if (!end) {
|
|
if (uoff)
|
|
uoff--;
|
|
if (unknown)
|
|
unknown[uoff] = '\0';
|
|
break;
|
|
} else
|
|
opt = end + 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int parse_mnt_flags(char *opt, unsigned *flags)
|
|
{
|
|
static const struct opt2flag mnt_opt2flag[] = {
|
|
{
|
|
"rw",
|
|
0,
|
|
},
|
|
{
|
|
"ro",
|
|
MS_RDONLY,
|
|
},
|
|
{
|
|
"nosuid",
|
|
MS_NOSUID,
|
|
},
|
|
{
|
|
"nodev",
|
|
MS_NODEV,
|
|
},
|
|
{
|
|
"noexec",
|
|
MS_NOEXEC,
|
|
},
|
|
{
|
|
"noatime",
|
|
MS_NOATIME,
|
|
},
|
|
{
|
|
"nodiratime",
|
|
MS_NODIRATIME,
|
|
},
|
|
{
|
|
"relatime",
|
|
MS_RELATIME,
|
|
},
|
|
{},
|
|
};
|
|
|
|
if (do_opt2flag(opt, flags, mnt_opt2flag, NULL, NULL))
|
|
return -1;
|
|
|
|
/* Otherwise the kernel assumes RELATIME by default */
|
|
if ((*flags & (MS_RELATIME | MS_NOATIME)) == 0)
|
|
*flags |= MS_STRICTATIME;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int parse_sb_opt(char *opt, unsigned *flags, char *uopt)
|
|
{
|
|
static const struct opt2flag sb_opt2flag[] = {
|
|
{
|
|
"rw",
|
|
0,
|
|
},
|
|
{
|
|
"ro",
|
|
MS_RDONLY,
|
|
},
|
|
{
|
|
"sync",
|
|
MS_SYNC,
|
|
},
|
|
{
|
|
"dirsync",
|
|
MS_DIRSYNC,
|
|
},
|
|
{
|
|
"mad",
|
|
MS_MANDLOCK,
|
|
},
|
|
{},
|
|
};
|
|
|
|
return do_opt2flag(opt, flags, sb_opt2flag, uopt, sb_opt_cb);
|
|
}
|
|
|
|
static int parse_mnt_opt(char *str, struct mount_info *mi, int *off)
|
|
{
|
|
char *istr = str, *end;
|
|
|
|
while (1) {
|
|
end = strchr(str, ' ');
|
|
if (!end) {
|
|
pr_err("Error parsing mount options\n");
|
|
return -1;
|
|
}
|
|
|
|
*end = '\0';
|
|
if (!strncmp(str, "-", 1))
|
|
break;
|
|
else if (!strncmp(str, "shared:", 7)) {
|
|
mi->flags |= MS_SHARED;
|
|
mi->shared_id = atoi(str + 7);
|
|
} else if (!strncmp(str, "master:", 7)) {
|
|
mi->flags |= MS_SLAVE;
|
|
mi->master_id = atoi(str + 7);
|
|
} else if (!strncmp(str, "propagate_from:", 15)) {
|
|
/* skip */;
|
|
} else if (!strncmp(str, "unbindable", 11))
|
|
mi->flags |= MS_UNBINDABLE;
|
|
else {
|
|
pr_err("Unknown option [%s]\n", str);
|
|
return -1;
|
|
}
|
|
|
|
str = end + 1;
|
|
}
|
|
|
|
*off = end - istr + 1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* mountinfo contains mangled paths. space, tab and back slash were replaced
|
|
* with usual octal escape. This function replaces these symbols back.
|
|
*/
|
|
static void cure_path(char *path)
|
|
{
|
|
int i, len, off = 0;
|
|
|
|
if (strchr(path, '\\') == NULL) /* fast path */
|
|
return;
|
|
|
|
len = strlen(path);
|
|
for (i = 0; i < len; i++) {
|
|
if (!strncmp(path + i, "\\040", 4)) {
|
|
path[i - off] = ' ';
|
|
goto replace;
|
|
} else if (!strncmp(path + i, "\\011", 4)) {
|
|
path[i - off] = '\t';
|
|
goto replace;
|
|
} else if (!strncmp(path + i, "\\134", 4)) {
|
|
path[i - off] = '\\';
|
|
goto replace;
|
|
}
|
|
if (off)
|
|
path[i - off] = path[i];
|
|
continue;
|
|
replace:
|
|
off += 3;
|
|
i += 3;
|
|
}
|
|
path[len - off] = 0;
|
|
}
|
|
|
|
static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname)
|
|
{
|
|
unsigned int kmaj, kmin;
|
|
int ret, n, len;
|
|
char *sub, *opt = NULL;
|
|
char link_path[PATH_MAX];
|
|
|
|
new->mountpoint = xmalloc(PATH_MAX);
|
|
if (new->mountpoint == NULL)
|
|
goto err;
|
|
|
|
new->mountpoint[0] = '.';
|
|
ret = sscanf(str, "%i %i %u:%u %ms %s %ms %n", &new->mnt_id, &new->parent_mnt_id, &kmaj, &kmin, &new->root,
|
|
new->mountpoint + 1, &opt, &n);
|
|
if (ret != 7)
|
|
goto err;
|
|
|
|
cure_path(new->mountpoint);
|
|
cure_path(new->root);
|
|
|
|
len = strlen(new->root);
|
|
if (len >= PATH_MAX - 1) {
|
|
pr_err("new root path (%s) exceeds %d\n", new->root, PATH_MAX);
|
|
goto err;
|
|
}
|
|
strcpy(link_path, new->root);
|
|
if (strip_deleted(link_path, len)) {
|
|
strcpy(new->root, link_path);
|
|
new->deleted = true;
|
|
}
|
|
|
|
new->mountpoint = xrealloc(new->mountpoint, strlen(new->mountpoint) + 1);
|
|
if (!new->mountpoint)
|
|
goto err;
|
|
new->ns_mountpoint = new->mountpoint;
|
|
new->is_ns_root = is_root(new->ns_mountpoint + 1);
|
|
|
|
new->s_dev = new->s_dev_rt = MKKDEV(kmaj, kmin);
|
|
new->flags = 0;
|
|
if (parse_mnt_flags(opt, &new->flags))
|
|
goto err;
|
|
|
|
free(opt); /* we are going to reallocate/reuse this buffer */
|
|
opt = NULL;
|
|
|
|
str += n;
|
|
if (parse_mnt_opt(str, new, &n))
|
|
goto err;
|
|
|
|
str += n;
|
|
ret = sscanf(str, "%ms %ms %ms", fsname, &new->source, &opt);
|
|
if (ret == 2) {
|
|
/* src may be empty */
|
|
opt = new->source;
|
|
new->source = xstrdup("");
|
|
if (new->source == NULL)
|
|
goto err;
|
|
} else if (ret != 3)
|
|
goto err;
|
|
|
|
cure_path(new->source);
|
|
|
|
new->fsname = xstrdup(*fsname);
|
|
if (!new->fsname)
|
|
goto err;
|
|
|
|
/*
|
|
* The kernel reports "subtypes" sometimes and the valid
|
|
* type-vs-subtype delimiter is the dot symbol. We disregard
|
|
* any subtypes for the purpose of finding the fstype.
|
|
*/
|
|
sub = strchr(*fsname, '.');
|
|
if (sub)
|
|
*sub = 0;
|
|
|
|
new->fstype = find_fstype_by_name(*fsname);
|
|
|
|
new->options = xmalloc(strlen(opt) + 1);
|
|
if (!new->options)
|
|
goto err;
|
|
|
|
if (parse_sb_opt(opt, &new->sb_flags, new->options))
|
|
goto err;
|
|
|
|
ret = 0;
|
|
ret:
|
|
xfree(opt);
|
|
return ret;
|
|
err:
|
|
ret = -1;
|
|
goto ret;
|
|
}
|
|
|
|
static LIST_HEAD(skip_mount_list);
|
|
|
|
struct str_node {
|
|
struct list_head node;
|
|
char string[];
|
|
};
|
|
|
|
bool add_skip_mount(const char *mountpoint)
|
|
{
|
|
struct str_node *skip = xmalloc(sizeof(struct str_node) + strlen(mountpoint) + 1);
|
|
if (!skip)
|
|
return false;
|
|
|
|
strcpy(skip->string, mountpoint);
|
|
list_add(&skip->node, &skip_mount_list);
|
|
return true;
|
|
}
|
|
|
|
static bool should_skip_mount(char *mountpoint)
|
|
{
|
|
struct str_node *pos;
|
|
|
|
list_for_each_entry(pos, &skip_mount_list, node) {
|
|
if (is_same_path(mountpoint, pos->string))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
int parse_timens_offsets(struct timespec *boff, struct timespec *moff)
|
|
{
|
|
int exit_code = -1;
|
|
FILE *f;
|
|
|
|
f = fopen_proc(PROC_SELF, "timens_offsets");
|
|
if (!f) {
|
|
pr_perror("Unable to open /proc/self/timens_offsets");
|
|
return exit_code;
|
|
}
|
|
while (fgets(buf, BUF_SIZE, f)) {
|
|
int64_t sec, nsec;
|
|
char clockid[10];
|
|
|
|
if (sscanf(buf, "%9s %" PRId64 " %" PRId64 "\n", clockid, &sec, &nsec) != 3) {
|
|
pr_err("Unable to parse: %s\n", buf);
|
|
goto out;
|
|
}
|
|
clockid[sizeof(clockid) - 1] = 0;
|
|
if (strcmp(clockid, "monotonic") == 0 || strcmp(clockid, __stringify(CLOCK_MONOTONIC)) == 0) {
|
|
moff->tv_sec = sec;
|
|
moff->tv_nsec = nsec;
|
|
continue;
|
|
}
|
|
if (strcmp(clockid, "boottime") == 0 || strcmp(clockid, __stringify(CLOCK_BOOTTIME)) == 0) {
|
|
boff->tv_sec = sec;
|
|
boff->tv_nsec = nsec;
|
|
continue;
|
|
}
|
|
pr_err("Unknown clockid: %s\n", clockid);
|
|
goto out;
|
|
}
|
|
exit_code = 0;
|
|
out:
|
|
fclose(f);
|
|
return exit_code;
|
|
}
|
|
|
|
static int get_mountinfo_sdev_from_mntid(int mnt_id, unsigned int *sdev)
|
|
{
|
|
int exit_code = -1;
|
|
FILE *f;
|
|
|
|
f = fopen_proc(PROC_SELF, "mountinfo");
|
|
if (!f)
|
|
return -1;
|
|
|
|
while (fgets(buf, BUF_SIZE, f)) {
|
|
unsigned int kmaj, kmin;
|
|
int id;
|
|
|
|
if (sscanf(buf, "%i %*i %u:%u", &id, &kmaj, &kmin) != 3) {
|
|
pr_err("Failed to parse mountinfo line %s\n", buf);
|
|
goto err;
|
|
}
|
|
|
|
if (id == mnt_id) {
|
|
*sdev = MKKDEV(kmaj, kmin);
|
|
exit_code = 0;
|
|
break;
|
|
}
|
|
}
|
|
err:
|
|
fclose(f);
|
|
return exit_code;
|
|
}
|
|
|
|
/* This works even on btrfs where stat does not show right sdev */
|
|
int get_sdev_from_fd(int fd, unsigned int *sdev, bool parse_mountinfo)
|
|
{
|
|
struct mount_info *mi;
|
|
int ret, mnt_id;
|
|
|
|
ret = get_fd_mntid(fd, &mnt_id);
|
|
if (ret < 0)
|
|
return -1;
|
|
|
|
/* Simple case mnt_id is in dumped mntns */
|
|
mi = lookup_mnt_id(mnt_id);
|
|
if (mi) {
|
|
*sdev = mi->s_dev_rt;
|
|
return 0;
|
|
}
|
|
|
|
if (!parse_mountinfo)
|
|
return -1;
|
|
|
|
/* Complex case mnt_id is in mntns created by criu */
|
|
return get_mountinfo_sdev_from_mntid(mnt_id, sdev);
|
|
}
|
|
|
|
struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump)
|
|
{
|
|
struct mount_info *list = NULL;
|
|
FILE *f;
|
|
|
|
f = fopen_proc(pid, "mountinfo");
|
|
if (!f)
|
|
return NULL;
|
|
|
|
while (fgets(buf, BUF_SIZE, f)) {
|
|
struct mount_info *new;
|
|
int ret = -1;
|
|
char *fsname = NULL;
|
|
|
|
new = mnt_entry_alloc(false);
|
|
if (!new)
|
|
goto end;
|
|
|
|
new->nsid = nsid;
|
|
|
|
ret = parse_mountinfo_ent(buf, new, &fsname);
|
|
if (ret < 0) {
|
|
pr_err("Bad format in %d mountinfo: '%s'\n", pid, buf);
|
|
goto end;
|
|
}
|
|
|
|
/*
|
|
* Drop this mountpoint early, so that lookup_mnt_id/etc will
|
|
* fail loudly at "dump" stage if an opened file or another mnt
|
|
* depends on this one.
|
|
*/
|
|
if (for_dump && should_skip_mount(new->ns_mountpoint)) {
|
|
pr_info("\tskip %s @ %s\n", fsname, new->ns_mountpoint);
|
|
mnt_entry_free(new);
|
|
new = NULL;
|
|
goto end;
|
|
}
|
|
|
|
pr_info("\ttype %s source %s mnt_id %d s_dev %#x %s @ %s flags %#x options %s\n", fsname, new->source,
|
|
new->mnt_id, new->s_dev, new->root, new->ns_mountpoint, new->flags, new->options);
|
|
|
|
if (new->fstype->parse) {
|
|
ret = new->fstype->parse(new);
|
|
if (ret < 0) {
|
|
pr_err("Failed to parse FS specific data on %s\n", service_mountpoint(new));
|
|
mnt_entry_free(new);
|
|
new = NULL;
|
|
goto end;
|
|
}
|
|
|
|
if (ret > 0) {
|
|
pr_info("\tskipping fs mounted at %s\n", service_mountpoint(new) + 1);
|
|
mnt_entry_free(new);
|
|
new = NULL;
|
|
ret = 0;
|
|
goto end;
|
|
}
|
|
}
|
|
end:
|
|
if (fsname)
|
|
free(fsname);
|
|
|
|
if (new)
|
|
mntinfo_add_list_before(&list, new);
|
|
|
|
if (ret)
|
|
goto err;
|
|
}
|
|
out:
|
|
fclose(f);
|
|
return list;
|
|
|
|
err:
|
|
while (list) {
|
|
struct mount_info *next = list->next;
|
|
mnt_entry_free(list);
|
|
list = next;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
static char nybble(const char n)
|
|
{
|
|
if (n >= '0' && n <= '9')
|
|
return n - '0';
|
|
else if (n >= 'A' && n <= 'F')
|
|
return n - ('A' - 10);
|
|
else if (n >= 'a' && n <= 'f')
|
|
return n - ('a' - 10);
|
|
return 0;
|
|
}
|
|
|
|
static void parse_fhandle_encoded(char *tok, FhEntry *fh)
|
|
{
|
|
char *d = (char *)fh->handle;
|
|
int i = 0;
|
|
|
|
memzero(d, pb_repeated_size(fh, handle));
|
|
|
|
while (*tok == ' ')
|
|
tok++;
|
|
|
|
while (*tok) {
|
|
if (i >= pb_repeated_size(fh, handle))
|
|
break;
|
|
d[i++] = (nybble(tok[0]) << 4) | nybble(tok[1]);
|
|
if (tok[1])
|
|
tok += 2;
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int parse_timerfd(struct bfd *f, char *str, TimerfdEntry *tfy)
|
|
{
|
|
/*
|
|
* Format is
|
|
* clockid: 0
|
|
* ticks: 0
|
|
* settime flags: 01
|
|
* it_value: (0, 49406829)
|
|
* it_interval: (1, 0)
|
|
*/
|
|
if (sscanf(str, "clockid: %d", &tfy->clockid) != 1)
|
|
goto parse_err;
|
|
|
|
if (verify_timerfd(tfy) < 0)
|
|
goto parse_err;
|
|
|
|
str = breadline(f);
|
|
if (IS_ERR_OR_NULL(str))
|
|
goto nodata;
|
|
if (sscanf(str, "ticks: %llu", (unsigned long long *)&tfy->ticks) != 1)
|
|
goto parse_err;
|
|
|
|
str = breadline(f);
|
|
if (IS_ERR_OR_NULL(str))
|
|
goto nodata;
|
|
if (sscanf(str, "settime flags: 0%o", &tfy->settime_flags) != 1)
|
|
goto parse_err;
|
|
|
|
str = breadline(f);
|
|
if (IS_ERR_OR_NULL(str))
|
|
goto nodata;
|
|
if (sscanf(str, "it_value: (%llu, %llu)", (unsigned long long *)&tfy->vsec,
|
|
(unsigned long long *)&tfy->vnsec) != 2)
|
|
goto parse_err;
|
|
|
|
str = breadline(f);
|
|
if (IS_ERR_OR_NULL(str))
|
|
goto nodata;
|
|
if (sscanf(str, "it_interval: (%llu, %llu)", (unsigned long long *)&tfy->isec,
|
|
(unsigned long long *)&tfy->insec) != 2)
|
|
goto parse_err;
|
|
return 0;
|
|
|
|
parse_err:
|
|
return -1;
|
|
nodata:
|
|
pr_err("No data left in proc file while parsing timerfd\n");
|
|
goto parse_err;
|
|
}
|
|
|
|
typedef struct bpfmap_fmt {
|
|
char *fmt;
|
|
void *value;
|
|
/*
|
|
* If newer kernels are adding additional entries, these entries need
|
|
* to be marked as optional in the protobuf definition and the parsing
|
|
* must be able to ignore it if running on an older kernel.
|
|
*/
|
|
protobuf_c_boolean *optional;
|
|
} bpfmap_fmt;
|
|
|
|
static int parse_bpfmap(struct bfd *f, char *str, BpfmapFileEntry *bpf)
|
|
{
|
|
/*
|
|
* Format is:
|
|
*
|
|
* uint32_t map_type
|
|
* uint32_t key_size
|
|
* uint32_t value_size
|
|
* uint32_t max_entries
|
|
* uint32_t map_flags
|
|
* uint64_t map_extra
|
|
* uint64_t memlock
|
|
* uint32_t map_id
|
|
* boolean frozen
|
|
*/
|
|
|
|
/* This needs to be in the same order as in the fdinfo entry. */
|
|
bpfmap_fmt map[] = {
|
|
{ "map_type: %u", &bpf->map_type, NULL },
|
|
{ "key_size: %u", &bpf->key_size, NULL },
|
|
{ "value_size: %u", &bpf->value_size, NULL },
|
|
{ "max_entries: %u", &bpf->max_entries, NULL },
|
|
{ "map_flags: %" PRIx32 "", &bpf->map_flags, NULL },
|
|
{ "map_extra: %" PRIx64 "", &bpf->map_extra, &bpf->has_map_extra },
|
|
{ "memlock: %" PRIu64 "", &bpf->memlock, NULL },
|
|
{ "map_id: %u", &bpf->map_id, NULL },
|
|
{ "frozen: %d", &bpf->frozen, NULL },
|
|
};
|
|
|
|
size_t n = sizeof(map) / sizeof(bpfmap_fmt);
|
|
int i;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
if (sscanf(str, map[i].fmt, map[i].value) != 1) {
|
|
if (map[i].optional)
|
|
continue;
|
|
return -1;
|
|
}
|
|
|
|
if (i == n - 1)
|
|
break;
|
|
|
|
str = breadline(f);
|
|
if (IS_ERR_OR_NULL(str)) {
|
|
pr_err("No data left in proc file while parsing bpfmap\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (bpf->has_map_extra && bpf->map_extra)
|
|
pr_warn("Non-zero value for fdinfo map_extra entry found. This will not be restored.\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define fdinfo_field(str, field) !strncmp(str, field ":", sizeof(field))
|
|
|
|
static int parse_file_lock_buf(char *buf, struct file_lock *fl, bool is_blocked);
|
|
static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg)
|
|
{
|
|
struct bfd f;
|
|
char *str;
|
|
bool entry_met = false;
|
|
int ret, exit_code = -1;
|
|
|
|
f.fd = open_proc(pid, "fdinfo/%d", fd);
|
|
if (f.fd < 0)
|
|
return -1;
|
|
|
|
if (bfdopenr(&f))
|
|
return -1;
|
|
|
|
while (1) {
|
|
str = breadline(&f);
|
|
if (!str)
|
|
break;
|
|
if (IS_ERR(str))
|
|
goto out;
|
|
|
|
if (fdinfo_field(str, "pos") || fdinfo_field(str, "flags") || fdinfo_field(str, "mnt_id")) {
|
|
unsigned long long val;
|
|
struct fdinfo_common *fdinfo = arg;
|
|
|
|
if (type != FD_TYPES__UND)
|
|
continue;
|
|
ret = sscanf(str, "%*s %lli", &val);
|
|
if (ret != 1)
|
|
goto parse_err;
|
|
|
|
if (fdinfo_field(str, "pos"))
|
|
fdinfo->pos = val;
|
|
else if (fdinfo_field(str, "flags"))
|
|
fdinfo->flags = val;
|
|
else if (fdinfo_field(str, "mnt_id"))
|
|
fdinfo->mnt_id = val;
|
|
|
|
entry_met = true;
|
|
continue;
|
|
}
|
|
|
|
if (fdinfo_field(str, "lock")) {
|
|
struct file_lock *fl;
|
|
struct fdinfo_common *fdinfo = arg;
|
|
char *flock_status = str + sizeof("lock:\t") - 1;
|
|
|
|
if (type != FD_TYPES__UND)
|
|
continue;
|
|
|
|
/*
|
|
* The lock status can be empty when the owner of the
|
|
* lock is invisible from our PID namespace.
|
|
* This unfortunate behavior is fixed in kernels v4.19
|
|
* and up (see commit 1cf8e5de40).
|
|
*/
|
|
if (flock_status[0] == '\0')
|
|
continue;
|
|
|
|
fl = alloc_file_lock();
|
|
if (!fl) {
|
|
pr_perror("Alloc file lock failed!");
|
|
goto out;
|
|
}
|
|
|
|
if (parse_file_lock_buf(flock_status, fl, 0)) {
|
|
xfree(fl);
|
|
goto parse_err;
|
|
}
|
|
|
|
pr_info("lockinfo: %lld:%d %x %d %02x:%02x:%ld %lld %s\n", fl->fl_id, fl->fl_kind, fl->fl_ltype,
|
|
fl->fl_owner, fl->maj, fl->min, fl->i_no, fl->start, fl->end);
|
|
|
|
if (fl->fl_kind == FL_UNKNOWN) {
|
|
pr_err("Unknown file lock!\n");
|
|
xfree(fl);
|
|
goto out;
|
|
}
|
|
|
|
fl->real_owner = fdinfo->owner;
|
|
fl->fl_holder = pid;
|
|
fl->owners_fd = fd;
|
|
list_add_tail(&fl->list, &file_lock_list);
|
|
}
|
|
|
|
if (type == FD_TYPES__UND)
|
|
continue;
|
|
|
|
if (fdinfo_field(str, "eventfd-count")) {
|
|
EventfdFileEntry *efd = arg;
|
|
|
|
if (type != FD_TYPES__EVENTFD)
|
|
goto parse_err;
|
|
ret = sscanf(str, "eventfd-count: %" PRIx64, &efd->counter);
|
|
if (ret != 1)
|
|
goto parse_err;
|
|
|
|
entry_met = true;
|
|
continue;
|
|
}
|
|
if (fdinfo_field(str, "clockid")) {
|
|
TimerfdEntry *tfe = arg;
|
|
|
|
if (type != FD_TYPES__TIMERFD)
|
|
goto parse_err;
|
|
ret = parse_timerfd(&f, str, tfe);
|
|
if (ret)
|
|
goto parse_err;
|
|
|
|
entry_met = true;
|
|
continue;
|
|
}
|
|
if (fdinfo_field(str, "tfd")) {
|
|
EventpollFileEntry *epfe = arg;
|
|
EventpollTfdEntry *e;
|
|
int i;
|
|
|
|
if (type != FD_TYPES__EVENTPOLL)
|
|
goto parse_err;
|
|
|
|
e = xmalloc(sizeof(EventpollTfdEntry));
|
|
if (!e)
|
|
goto out;
|
|
|
|
eventpoll_tfd_entry__init(e);
|
|
|
|
ret = sscanf(str,
|
|
"tfd: %d events: %x data: %llx"
|
|
" pos:%lli ino:%lx sdev:%x",
|
|
&e->tfd, &e->events, (long long *)&e->data, (long long *)&e->pos,
|
|
(long *)&e->inode, &e->dev);
|
|
if (ret < 3 || ret > 6) {
|
|
eventpoll_tfd_entry__free_unpacked(e, NULL);
|
|
goto parse_err;
|
|
} else if (ret == 3) {
|
|
e->has_dev = false;
|
|
e->has_inode = false;
|
|
e->has_pos = false;
|
|
} else if (ret == 6) {
|
|
e->has_dev = true;
|
|
e->has_inode = true;
|
|
e->has_pos = true;
|
|
} else if (ret < 6) {
|
|
eventpoll_tfd_entry__free_unpacked(e, NULL);
|
|
goto parse_err;
|
|
}
|
|
|
|
i = epfe->n_tfd++;
|
|
if (xrealloc_safe(&epfe->tfd, epfe->n_tfd * sizeof(EventpollTfdEntry *)))
|
|
goto out;
|
|
|
|
epfe->tfd[i] = e;
|
|
entry_met = true;
|
|
continue;
|
|
}
|
|
if (fdinfo_field(str, "sigmask")) {
|
|
SignalfdEntry *sfd = arg;
|
|
|
|
if (type != FD_TYPES__SIGNALFD)
|
|
goto parse_err;
|
|
ret = sscanf(str, "sigmask: %llx", (unsigned long long *)&sfd->sigmask);
|
|
if (ret != 1)
|
|
goto parse_err;
|
|
|
|
entry_met = true;
|
|
continue;
|
|
}
|
|
if (fdinfo_field(str, "fanotify flags")) {
|
|
FanotifyFileEntry *fe = arg;
|
|
|
|
if (type != FD_TYPES__FANOTIFY)
|
|
goto parse_err;
|
|
|
|
ret = sscanf(str, "fanotify flags:%x event-flags:%x", &fe->faflags, &fe->evflags);
|
|
if (ret != 2)
|
|
goto parse_err;
|
|
entry_met = true;
|
|
continue;
|
|
}
|
|
if (fdinfo_field(str, "fanotify ino")) {
|
|
void *buf, *ob;
|
|
FanotifyFileEntry *fe = arg;
|
|
FanotifyMarkEntry *me;
|
|
int hoff = 0, i;
|
|
|
|
if (type != FD_TYPES__FANOTIFY)
|
|
goto parse_err;
|
|
|
|
ob = buf = xmalloc(sizeof(FanotifyMarkEntry) + sizeof(FanotifyInodeMarkEntry) +
|
|
sizeof(FhEntry) + FH_ENTRY_SIZES__min_entries * sizeof(uint64_t));
|
|
if (!buf)
|
|
goto out;
|
|
|
|
me = xptr_pull(&buf, FanotifyMarkEntry);
|
|
fanotify_mark_entry__init(me);
|
|
me->ie = xptr_pull(&buf, FanotifyInodeMarkEntry);
|
|
fanotify_inode_mark_entry__init(me->ie);
|
|
me->ie->f_handle = xptr_pull(&buf, FhEntry);
|
|
fh_entry__init(me->ie->f_handle);
|
|
me->ie->f_handle->n_handle = FH_ENTRY_SIZES__min_entries;
|
|
me->ie->f_handle->handle = xptr_pull_s(&buf, FH_ENTRY_SIZES__min_entries * sizeof(uint64_t));
|
|
|
|
ret = sscanf(str,
|
|
"fanotify ino:%" PRIx64 " sdev:%x mflags:%x mask:%x ignored_mask:%x "
|
|
"fhandle-bytes:%x fhandle-type:%x f_handle: %n",
|
|
&me->ie->i_ino, &me->s_dev, &me->mflags, &me->mask, &me->ignored_mask,
|
|
&me->ie->f_handle->bytes, &me->ie->f_handle->type, &hoff);
|
|
if (ret != 7 || hoff == 0) {
|
|
xfree(ob);
|
|
goto parse_err;
|
|
}
|
|
|
|
parse_fhandle_encoded(str + hoff, me->ie->f_handle);
|
|
me->type = MARK_TYPE__INODE;
|
|
|
|
i = fe->n_mark++;
|
|
if (xrealloc_safe(&fe->mark, fe->n_mark * sizeof(FanotifyMarkEntry *))) {
|
|
xfree(ob);
|
|
goto out;
|
|
}
|
|
|
|
fe->mark[i] = me;
|
|
entry_met = true;
|
|
continue;
|
|
}
|
|
if (fdinfo_field(str, "fanotify mnt_id")) {
|
|
void *buf, *ob;
|
|
FanotifyFileEntry *fe = arg;
|
|
FanotifyMarkEntry *me;
|
|
int i;
|
|
|
|
if (type != FD_TYPES__FANOTIFY)
|
|
goto parse_err;
|
|
|
|
ob = buf = xmalloc(sizeof(FanotifyMarkEntry) + sizeof(FanotifyMountMarkEntry));
|
|
if (!buf)
|
|
goto out;
|
|
|
|
me = xptr_pull(&buf, FanotifyMarkEntry);
|
|
fanotify_mark_entry__init(me);
|
|
me->me = xptr_pull(&buf, FanotifyMountMarkEntry);
|
|
fanotify_mount_mark_entry__init(me->me);
|
|
|
|
ret = sscanf(str, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x", &me->me->mnt_id,
|
|
&me->mflags, &me->mask, &me->ignored_mask);
|
|
if (ret != 4) {
|
|
xfree(ob);
|
|
goto parse_err;
|
|
}
|
|
|
|
me->type = MARK_TYPE__MOUNT;
|
|
|
|
i = fe->n_mark++;
|
|
if (xrealloc_safe(&fe->mark, fe->n_mark * sizeof(FanotifyMarkEntry *))) {
|
|
xfree(ob);
|
|
goto out;
|
|
}
|
|
|
|
fe->mark[i] = me;
|
|
entry_met = true;
|
|
continue;
|
|
}
|
|
if (fdinfo_field(str, "inotify wd")) {
|
|
void *buf, *ob;
|
|
InotifyFileEntry *ie = arg;
|
|
InotifyWdEntry *ify;
|
|
int hoff, i;
|
|
|
|
if (type != FD_TYPES__INOTIFY)
|
|
goto parse_err;
|
|
|
|
ob = buf = xmalloc(sizeof(InotifyWdEntry) + sizeof(FhEntry) +
|
|
FH_ENTRY_SIZES__min_entries * sizeof(uint64_t));
|
|
if (!buf)
|
|
goto out;
|
|
|
|
ify = xptr_pull(&buf, InotifyWdEntry);
|
|
inotify_wd_entry__init(ify);
|
|
ify->f_handle = xptr_pull(&buf, FhEntry);
|
|
fh_entry__init(ify->f_handle);
|
|
ify->f_handle->n_handle = FH_ENTRY_SIZES__min_entries;
|
|
ify->f_handle->handle = xptr_pull_s(&buf, FH_ENTRY_SIZES__min_entries * sizeof(uint64_t));
|
|
|
|
ret = sscanf(str,
|
|
"inotify wd:%x ino:%" PRIx64 " sdev:%x "
|
|
"mask:%x ignored_mask:%x "
|
|
"fhandle-bytes:%x fhandle-type:%x "
|
|
"f_handle: %n",
|
|
&ify->wd, &ify->i_ino, &ify->s_dev, &ify->mask, &ify->ignored_mask,
|
|
&ify->f_handle->bytes, &ify->f_handle->type, &hoff);
|
|
if (ret != 7) {
|
|
xfree(ob);
|
|
goto parse_err;
|
|
}
|
|
|
|
parse_fhandle_encoded(str + hoff, ify->f_handle);
|
|
|
|
i = ie->n_wd++;
|
|
if (xrealloc_safe(&ie->wd, ie->n_wd * sizeof(InotifyWdEntry *))) {
|
|
xfree(ob);
|
|
goto out;
|
|
}
|
|
|
|
ie->wd[i] = ify;
|
|
entry_met = true;
|
|
continue;
|
|
}
|
|
if (fdinfo_field(str, "map_type")) {
|
|
BpfmapFileEntry *bpf = arg;
|
|
if (type != FD_TYPES__BPFMAP)
|
|
goto parse_err;
|
|
|
|
ret = parse_bpfmap(&f, str, bpf);
|
|
if (ret)
|
|
goto parse_err;
|
|
|
|
entry_met = true;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
exit_code = 0;
|
|
if (entry_met)
|
|
goto out;
|
|
/*
|
|
* An eventpoll/inotify file may have no target fds set thus
|
|
* resulting in no tfd: lines in proc. This is normal.
|
|
*/
|
|
if (type == FD_TYPES__EVENTPOLL || type == FD_TYPES__INOTIFY)
|
|
goto out;
|
|
|
|
pr_err("No records of type %d found in fdinfo file\n", type);
|
|
parse_err:
|
|
exit_code = -1;
|
|
pr_perror("%s: error parsing [%s] for %d", __func__, str, type);
|
|
out:
|
|
bclose(&f);
|
|
return exit_code;
|
|
}
|
|
|
|
int parse_fdinfo_pid(int pid, int fd, int type, void *arg)
|
|
{
|
|
return parse_fdinfo_pid_s(pid, fd, type, arg);
|
|
}
|
|
|
|
int parse_fdinfo(int fd, int type, void *arg)
|
|
{
|
|
return parse_fdinfo_pid_s(PROC_SELF, fd, type, arg);
|
|
}
|
|
|
|
int get_fd_mntid(int fd, int *mnt_id)
|
|
{
|
|
struct fdinfo_common fdinfo = { .mnt_id = -1 };
|
|
|
|
if (parse_fdinfo(fd, FD_TYPES__UND, &fdinfo))
|
|
return -1;
|
|
|
|
*mnt_id = fdinfo.mnt_id;
|
|
return 0;
|
|
}
|
|
|
|
static int parse_file_lock_buf(char *buf, struct file_lock *fl, bool is_blocked)
|
|
{
|
|
int num;
|
|
char fl_flag[10], fl_type[15], fl_option[10];
|
|
|
|
if (is_blocked) {
|
|
num = sscanf(buf, "%lld: -> %s %s %s %d %x:%x:%ld %lld %s", &fl->fl_id, fl_flag, fl_type, fl_option,
|
|
&fl->fl_owner, &fl->maj, &fl->min, &fl->i_no, &fl->start, fl->end);
|
|
} else {
|
|
num = sscanf(buf, "%lld:%s %s %s %d %x:%x:%ld %lld %s", &fl->fl_id, fl_flag, fl_type, fl_option,
|
|
&fl->fl_owner, &fl->maj, &fl->min, &fl->i_no, &fl->start, fl->end);
|
|
}
|
|
|
|
if (num < 10) {
|
|
pr_err("Invalid file lock info (%d): %s\n", num, buf);
|
|
return -1;
|
|
}
|
|
|
|
if (!strcmp(fl_flag, "POSIX"))
|
|
fl->fl_kind = FL_POSIX;
|
|
else if (!strcmp(fl_flag, "FLOCK"))
|
|
fl->fl_kind = FL_FLOCK;
|
|
else if (!strcmp(fl_flag, "OFDLCK"))
|
|
fl->fl_kind = FL_OFD;
|
|
else if (!strcmp(fl_flag, "LEASE"))
|
|
fl->fl_kind = FL_LEASE;
|
|
else
|
|
fl->fl_kind = FL_UNKNOWN;
|
|
|
|
if (fl->fl_kind == FL_LEASE && !strcmp(fl_type, "BREAKING")) {
|
|
fl->fl_ltype |= LEASE_BREAKING;
|
|
}
|
|
|
|
if (!strcmp(fl_type, "MSNFS")) {
|
|
fl->fl_ltype |= LOCK_MAND;
|
|
|
|
if (!strcmp(fl_option, "READ")) {
|
|
fl->fl_ltype |= LOCK_READ;
|
|
} else if (!strcmp(fl_option, "RW")) {
|
|
fl->fl_ltype |= LOCK_RW;
|
|
} else if (!strcmp(fl_option, "WRITE")) {
|
|
fl->fl_ltype |= LOCK_WRITE;
|
|
} else {
|
|
pr_err("Unknown lock option!\n");
|
|
return -1;
|
|
}
|
|
} else {
|
|
if (!strcmp(fl_option, "UNLCK")) {
|
|
fl->fl_ltype |= F_UNLCK;
|
|
} else if (!strcmp(fl_option, "WRITE")) {
|
|
fl->fl_ltype |= F_WRLCK;
|
|
} else if (!strcmp(fl_option, "READ")) {
|
|
fl->fl_ltype |= F_RDLCK;
|
|
} else {
|
|
pr_err("Unknown lock option!\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool pid_in_pstree(pid_t pid)
|
|
{
|
|
return pstree_item_by_real(pid) != NULL;
|
|
}
|
|
|
|
int parse_file_locks(void)
|
|
{
|
|
struct file_lock *fl;
|
|
|
|
FILE *fl_locks;
|
|
int exit_code = -1;
|
|
bool is_blocked;
|
|
|
|
if (kdat.has_fdinfo_lock)
|
|
return 0;
|
|
|
|
fl_locks = fopen_proc(PROC_GEN, "locks");
|
|
if (!fl_locks)
|
|
return -1;
|
|
|
|
while (fgets(buf, BUF_SIZE, fl_locks)) {
|
|
is_blocked = strstr(buf, "->") != NULL;
|
|
|
|
fl = alloc_file_lock();
|
|
if (!fl) {
|
|
pr_perror("Alloc file lock failed!");
|
|
goto err;
|
|
}
|
|
|
|
if (parse_file_lock_buf(buf, fl, is_blocked)) {
|
|
xfree(fl);
|
|
goto err;
|
|
}
|
|
|
|
pr_info("lockinfo: %lld:%d %x %d %02x:%02x:%ld %lld %s\n", fl->fl_id, fl->fl_kind, fl->fl_ltype,
|
|
fl->fl_owner, fl->maj, fl->min, fl->i_no, fl->start, fl->end);
|
|
|
|
if (fl->fl_kind == FL_UNKNOWN) {
|
|
pr_err("Unknown file lock: %s!\n", buf);
|
|
xfree(fl);
|
|
goto err;
|
|
}
|
|
|
|
if (is_blocked) {
|
|
/*
|
|
* All target processes are stopped in this moment and
|
|
* can't wait any locks.
|
|
*/
|
|
pr_debug("Skip blocked processes\n");
|
|
xfree(fl);
|
|
continue;
|
|
}
|
|
|
|
if ((fl->fl_kind == FL_POSIX) && !pid_in_pstree(fl->fl_owner)) {
|
|
/*
|
|
* We only care about tasks which are taken
|
|
* into dump, so we only collect file locks
|
|
* belong to these tasks.
|
|
*/
|
|
xfree(fl);
|
|
continue;
|
|
}
|
|
|
|
list_add_tail(&fl->list, &file_lock_list);
|
|
}
|
|
|
|
exit_code = 0;
|
|
err:
|
|
fclose(fl_locks);
|
|
return exit_code;
|
|
}
|
|
|
|
void free_posix_timers(struct proc_posix_timers_stat *st)
|
|
{
|
|
while (!list_empty(&st->timers)) {
|
|
struct proc_posix_timer *timer;
|
|
timer = list_first_entry(&st->timers, struct proc_posix_timer, list);
|
|
list_del(&timer->list);
|
|
xfree(timer);
|
|
}
|
|
}
|
|
|
|
int parse_posix_timers(pid_t pid, struct proc_posix_timers_stat *args)
|
|
{
|
|
int exit_code = -1;
|
|
int pid_t;
|
|
int i = 0;
|
|
|
|
struct bfd f;
|
|
char *s;
|
|
char sigpid[7];
|
|
char tidpid[4];
|
|
|
|
struct proc_posix_timer *timer = NULL;
|
|
|
|
INIT_LIST_HEAD(&args->timers);
|
|
args->timer_n = 0;
|
|
|
|
f.fd = open_proc(pid, "timers");
|
|
if (f.fd < 0)
|
|
return -1;
|
|
|
|
if (bfdopenr(&f))
|
|
return -1;
|
|
|
|
while (1) {
|
|
char pbuf[17]; /* 16 + eol */
|
|
|
|
s = breadline(&f);
|
|
if (!s)
|
|
break;
|
|
if (IS_ERR(s))
|
|
goto err;
|
|
|
|
switch (i % 4) {
|
|
case 0:
|
|
timer = xzalloc(sizeof(struct proc_posix_timer));
|
|
if (timer == NULL)
|
|
goto err;
|
|
|
|
if (sscanf(s, "ID: %ld", &timer->spt.it_id) != 1)
|
|
goto err;
|
|
break;
|
|
case 1:
|
|
if (sscanf(s, "signal: %d/%16s", &timer->spt.si_signo, pbuf) != 2)
|
|
goto err;
|
|
break;
|
|
case 2:
|
|
if (sscanf(s, "notify: %6[a-z]/%3[a-z].%d\n", sigpid, tidpid, &pid_t) != 3)
|
|
goto err;
|
|
break;
|
|
case 3:
|
|
if (sscanf(s, "ClockID: %d\n", &timer->spt.clock_id) != 1)
|
|
goto err;
|
|
|
|
timer->spt.sival_ptr = NULL;
|
|
if (sscanf(pbuf, "%p", &timer->spt.sival_ptr) != 1 && strcmp(pbuf, "(null)")) {
|
|
pr_err("Unable to parse '%s'\n", pbuf);
|
|
goto err;
|
|
}
|
|
|
|
if (tidpid[0] == 't') {
|
|
timer->spt.it_sigev_notify = SIGEV_THREAD_ID;
|
|
timer->spt.notify_thread_id = pid_t;
|
|
} else {
|
|
switch (sigpid[0]) {
|
|
case 's':
|
|
timer->spt.it_sigev_notify = SIGEV_SIGNAL;
|
|
break;
|
|
case 't':
|
|
timer->spt.it_sigev_notify = SIGEV_THREAD;
|
|
break;
|
|
default:
|
|
timer->spt.it_sigev_notify = SIGEV_NONE;
|
|
break;
|
|
}
|
|
}
|
|
|
|
list_add(&timer->list, &args->timers);
|
|
timer = NULL;
|
|
args->timer_n++;
|
|
break;
|
|
}
|
|
i++;
|
|
}
|
|
|
|
exit_code = 0;
|
|
out:
|
|
bclose(&f);
|
|
return exit_code;
|
|
err:
|
|
xfree(timer);
|
|
free_posix_timers(args);
|
|
pr_perror("Parse error in posix timers proc file!");
|
|
goto out;
|
|
}
|
|
|
|
int parse_threads(int pid, struct pid **_t, int *_n)
|
|
{
|
|
struct dirent *de;
|
|
DIR *dir;
|
|
struct pid *t = NULL;
|
|
int nr = 1;
|
|
|
|
if (*_t)
|
|
t = *_t;
|
|
|
|
dir = opendir_proc(pid, "task");
|
|
if (!dir)
|
|
return -1;
|
|
|
|
while ((de = readdir(dir))) {
|
|
struct pid *tmp;
|
|
|
|
/* We expect numbers only here */
|
|
if (de->d_name[0] == '.')
|
|
continue;
|
|
|
|
if (*_t == NULL) {
|
|
tmp = xrealloc(t, nr * sizeof(struct pid));
|
|
if (!tmp) {
|
|
xfree(t);
|
|
closedir(dir);
|
|
return -1;
|
|
}
|
|
t = tmp;
|
|
t[nr - 1].ns[0].virt = -1;
|
|
}
|
|
t[nr - 1].real = atoi(de->d_name);
|
|
t[nr - 1].state = TASK_THREAD;
|
|
nr++;
|
|
}
|
|
|
|
closedir(dir);
|
|
|
|
if (*_t == NULL) {
|
|
*_t = t;
|
|
*_n = nr - 1;
|
|
} else
|
|
BUG_ON(nr - 1 != *_n);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int parse_cgroup_file(FILE *f, struct list_head *retl, unsigned int *n)
|
|
{
|
|
while (fgets(buf, BUF_SIZE, f)) {
|
|
struct cg_ctl *ncc, *cc;
|
|
char *name, *path = NULL, *e;
|
|
|
|
ncc = xmalloc(sizeof(*cc));
|
|
if (!ncc)
|
|
goto err;
|
|
|
|
/*
|
|
* Typical output (':' is a separator here)
|
|
*
|
|
* 4:cpu,cpuacct:/
|
|
* 3:cpuset:/
|
|
* 2:name=systemd:/user.slice/user-1000.slice/session-1.scope
|
|
*/
|
|
name = strchr(buf, ':');
|
|
if (!name) {
|
|
pr_err("Failed parsing cgroup %s\n", buf);
|
|
xfree(ncc);
|
|
goto err;
|
|
}
|
|
path = strchr(++name, ':');
|
|
if (!path) {
|
|
pr_err("Failed parsing cgroup %s\n", buf);
|
|
xfree(ncc);
|
|
goto err;
|
|
}
|
|
e = strchr(name, '\n');
|
|
*path++ = '\0';
|
|
if (e)
|
|
*e = '\0';
|
|
|
|
/*
|
|
* Controllers and their props might be
|
|
* configured the way some of them are
|
|
* not taken into the image for migration
|
|
* sake or container specifics.
|
|
*/
|
|
if (cgp_should_skip_controller(name)) {
|
|
pr_debug("cg-prop: Skipping controller %s\n", name);
|
|
xfree(ncc);
|
|
continue;
|
|
}
|
|
|
|
ncc->name = xstrdup(name);
|
|
ncc->path = xstrdup(path);
|
|
ncc->cgns_prefix = 0;
|
|
if (!ncc->name || !ncc->path) {
|
|
xfree(ncc->name);
|
|
xfree(ncc->path);
|
|
xfree(ncc);
|
|
goto err;
|
|
}
|
|
|
|
list_for_each_entry(cc, retl, l)
|
|
if (strcmp(cc->name, name) >= 0)
|
|
break;
|
|
|
|
list_add_tail(&ncc->l, &cc->l);
|
|
(*n)++;
|
|
}
|
|
|
|
return 0;
|
|
|
|
err:
|
|
put_ctls(retl);
|
|
return -1;
|
|
}
|
|
|
|
int parse_thread_cgroup(int pid, int tid, struct parasite_dump_cgroup_args *args, struct list_head *retl,
|
|
unsigned int *n)
|
|
{
|
|
FILE *f;
|
|
int ret;
|
|
LIST_HEAD(internal);
|
|
unsigned int n_internal = 0;
|
|
struct cg_ctl *intern, *ext;
|
|
|
|
f = fopen_proc(pid, "task/%d/cgroup", tid);
|
|
if (!f)
|
|
return -1;
|
|
|
|
ret = parse_cgroup_file(f, retl, n);
|
|
fclose(f);
|
|
if (ret < 0)
|
|
return -1;
|
|
|
|
/* No parasite args, we're dumping criu's cg set, so we don't need to
|
|
* try and parse the "internal" cgroup set to find namespace
|
|
* boundaries.
|
|
*/
|
|
if (!args)
|
|
return 0;
|
|
|
|
f = fmemopen(args->contents, strlen(args->contents), "r");
|
|
if (!f) {
|
|
pr_perror("couldn't fmemopen cgroup buffer %s", args->contents);
|
|
return -1;
|
|
}
|
|
|
|
ret = parse_cgroup_file(f, &internal, &n_internal);
|
|
fclose(f);
|
|
if (ret < 0) {
|
|
pr_err("couldn't parse internal cgroup file\n");
|
|
return -1;
|
|
}
|
|
|
|
/* Here's where we actually compute the cgns prefix. Consider a task
|
|
* in /foo/bar which has unshared its namespace at /foo. The internal
|
|
* path is /bar, but the external path is /foo/bar, and the cgns
|
|
* prefix is /foo. The algorithm is:
|
|
*
|
|
* // no cg ns unshare in this case
|
|
* if (internal == external)
|
|
* continue;
|
|
* idx = find_suffix_pos(external, internal)
|
|
* cgns_prefix = external[:idx]
|
|
*/
|
|
list_for_each_entry(intern, &internal, l) {
|
|
list_for_each_entry(ext, retl, l) {
|
|
char *pos;
|
|
|
|
if (strcmp(ext->name, intern->name))
|
|
continue;
|
|
|
|
/* If the cgroup namespace was unshared at / (or there
|
|
* is no cgroup namespace relative to criu), the paths
|
|
* are equal and we don't need to set a prefix.
|
|
*/
|
|
if (!strcmp(ext->path, intern->path))
|
|
continue;
|
|
|
|
/* +1 here to chop off the leading / */
|
|
pos = ext->path + strlen(ext->path) - strlen(intern->path + 1);
|
|
if (strcmp(pos, intern->path + 1)) {
|
|
pr_err("invalid cgroup configuration, %s is not a suffix of %s\n", intern->path,
|
|
ext->path);
|
|
ret = -1;
|
|
goto out;
|
|
}
|
|
|
|
ext->cgns_prefix = pos - ext->path;
|
|
if (ext->path[ext->cgns_prefix - 1] == '/')
|
|
ext->cgns_prefix--;
|
|
}
|
|
}
|
|
|
|
out:
|
|
put_ctls(&internal);
|
|
return ret;
|
|
}
|
|
|
|
void put_ctls(struct list_head *l)
|
|
{
|
|
struct cg_ctl *c, *n;
|
|
|
|
list_for_each_entry_safe(c, n, l, l) {
|
|
xfree(c->name);
|
|
xfree(c->path);
|
|
xfree(c);
|
|
}
|
|
INIT_LIST_HEAD(l);
|
|
}
|
|
|
|
/* Parse and create all the real controllers. This does not include things with
|
|
* the "name=" prefix, e.g. systemd.
|
|
*/
|
|
int collect_controllers(struct list_head *cgroups, unsigned int *n_cgroups)
|
|
{
|
|
int exit_code = -1;
|
|
FILE *f;
|
|
|
|
f = fopen_proc(PROC_SELF, "cgroup");
|
|
if (f == NULL)
|
|
return -1;
|
|
|
|
while (fgets(buf, BUF_SIZE, f)) {
|
|
struct cg_controller *nc = NULL;
|
|
char *controllers, *off;
|
|
|
|
controllers = strchr(buf, ':');
|
|
if (!controllers) {
|
|
pr_err("Unable to parse \"%s\"\n", buf);
|
|
goto err;
|
|
}
|
|
controllers++;
|
|
|
|
off = strchr(controllers, ':');
|
|
if (!off) {
|
|
pr_err("Unable to parse \"%s\"\n", buf);
|
|
goto err;
|
|
}
|
|
*off = '\0';
|
|
|
|
if (cgp_should_skip_controller(controllers)) {
|
|
pr_debug("cg-prop: Skipping controller %s\n", controllers);
|
|
continue;
|
|
}
|
|
|
|
while (1) {
|
|
off = strchr(controllers, ',');
|
|
if (off)
|
|
*off = '\0';
|
|
|
|
if (!strncmp("name=", controllers, 5))
|
|
goto skip;
|
|
|
|
if (!nc) {
|
|
nc = new_controller(controllers);
|
|
if (!nc)
|
|
goto err;
|
|
list_add_tail(&nc->l, cgroups);
|
|
(*n_cgroups)++;
|
|
} else {
|
|
void *m;
|
|
char *n;
|
|
|
|
nc->n_controllers++;
|
|
m = xrealloc(nc->controllers, sizeof(char *) * nc->n_controllers);
|
|
if (!m)
|
|
goto err;
|
|
|
|
nc->controllers = m;
|
|
|
|
n = xstrdup(controllers);
|
|
if (!n)
|
|
goto err;
|
|
|
|
nc->controllers[nc->n_controllers - 1] = n;
|
|
}
|
|
|
|
skip:
|
|
if (!off)
|
|
break;
|
|
controllers = off + 1;
|
|
}
|
|
}
|
|
|
|
exit_code = 0;
|
|
err:
|
|
fclose(f);
|
|
return exit_code;
|
|
}
|
|
|
|
/*
|
|
* If an OverlayFS mountpoint is found in the mountinfo table,
|
|
* we enable opts.overlayfs, which is a workaround for the
|
|
* OverlayFS Kernel bug.
|
|
*
|
|
* See fixup_overlayfs for details.
|
|
*/
|
|
int overlayfs_parse(struct mount_info *new)
|
|
{
|
|
opts.overlayfs = true;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* AUFS callback function to "fix up" the root pathname.
|
|
* See sysfs_parse.c for details.
|
|
*/
|
|
int aufs_parse(struct mount_info *new)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (!strcmp(new->ns_mountpoint, "./")) {
|
|
opts.aufs = true;
|
|
ret = parse_aufs_branches(new);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int parse_children(pid_t pid, pid_t **_c, int *_n)
|
|
{
|
|
pid_t *ch = NULL;
|
|
int nr = 0;
|
|
DIR *dir;
|
|
struct dirent *de;
|
|
struct bfd f;
|
|
|
|
dir = opendir_proc(pid, "task");
|
|
if (dir == NULL)
|
|
return -1;
|
|
|
|
while ((de = readdir(dir))) {
|
|
char *pos, *end;
|
|
|
|
if (dir_dots(de))
|
|
continue;
|
|
|
|
f.fd = open_proc(pid, "task/%s/children", de->d_name);
|
|
if (f.fd < 0)
|
|
goto err;
|
|
|
|
if (bfdopenr(&f))
|
|
goto err;
|
|
|
|
while (1) {
|
|
pid_t val, *tmp;
|
|
|
|
pos = breadchr(&f, ' ');
|
|
if (IS_ERR(pos))
|
|
goto err_close;
|
|
if (pos == NULL)
|
|
break;
|
|
|
|
val = strtol(pos, &end, 0);
|
|
|
|
if (*end != 0 && *end != ' ') {
|
|
pr_err("Unable to parse %s\n", end);
|
|
goto err_close;
|
|
}
|
|
|
|
tmp = xrealloc(ch, (nr + 1) * sizeof(pid_t));
|
|
if (!tmp)
|
|
goto err_close;
|
|
|
|
ch = tmp;
|
|
ch[nr] = val;
|
|
nr++;
|
|
}
|
|
bclose(&f);
|
|
}
|
|
|
|
*_c = ch;
|
|
*_n = nr;
|
|
|
|
closedir(dir);
|
|
return 0;
|
|
err_close:
|
|
bclose(&f);
|
|
err:
|
|
closedir(dir);
|
|
xfree(ch);
|
|
return -1;
|
|
}
|
|
|
|
#define CSEC_PER_SEC 100
|
|
|
|
int parse_uptime(uint64_t *upt)
|
|
{
|
|
unsigned long sec, csec;
|
|
FILE *f;
|
|
|
|
f = fopen("/proc/uptime", "r");
|
|
if (!f) {
|
|
pr_perror("Failed to fopen /proc/uptime");
|
|
return -1;
|
|
}
|
|
|
|
if (fscanf(f, "%lu.%2lu", &sec, &csec) != 2) {
|
|
pr_perror("Failed to parse /proc/uptime");
|
|
fclose(f);
|
|
return -1;
|
|
}
|
|
|
|
*upt = sec * USEC_PER_SEC + csec * (USEC_PER_SEC / CSEC_PER_SEC);
|
|
|
|
fclose(f);
|
|
return 0;
|
|
}
|