mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-29 13:28:27 +00:00
kdat: Handle pagemaps with zeroed pfns
Recent kernels allow for user to read proc pagemap file, but zero pfns in it. Support this mode for user dumps. https://github.com/xemul/criu/issues/101 Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com> Acked-by: Andrew Vagin <avagin@virtuozzo.com>
This commit is contained in:
parent
40184b9797
commit
d7684252c8
@ -16,6 +16,13 @@ extern int kerndat_get_dirty_track(void);
|
|||||||
extern int kerndat_fdinfo_has_lock(void);
|
extern int kerndat_fdinfo_has_lock(void);
|
||||||
extern int kerndat_loginuid(bool only_dump);
|
extern int kerndat_loginuid(bool only_dump);
|
||||||
|
|
||||||
|
enum pagemap_func {
|
||||||
|
PM_UNKNOWN,
|
||||||
|
PM_DISABLED, /* /proc/pid/pagemap doesn't open (user mode) */
|
||||||
|
PM_FLAGS_ONLY, /* pagemap zeroes pfn part (user mode) */
|
||||||
|
PM_FULL,
|
||||||
|
};
|
||||||
|
|
||||||
struct kerndat_s {
|
struct kerndat_s {
|
||||||
dev_t shmem_dev;
|
dev_t shmem_dev;
|
||||||
int tcp_max_rshare;
|
int tcp_max_rshare;
|
||||||
@ -27,6 +34,7 @@ struct kerndat_s {
|
|||||||
unsigned long task_size;
|
unsigned long task_size;
|
||||||
bool ipv6;
|
bool ipv6;
|
||||||
bool has_loginuid;
|
bool has_loginuid;
|
||||||
|
enum pagemap_func pmap;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern struct kerndat_s kdat;
|
extern struct kerndat_s kdat;
|
||||||
|
50
kerndat.c
50
kerndat.c
@ -41,6 +41,42 @@ struct kerndat_s kdat = {
|
|||||||
.tcp_max_rshare = 87380,
|
.tcp_max_rshare = 87380,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int check_pagemap(void)
|
||||||
|
{
|
||||||
|
int ret, fd;
|
||||||
|
u64 pfn = 0;
|
||||||
|
|
||||||
|
fd = __open_proc(PROC_SELF, EPERM, O_RDONLY, "pagemap");
|
||||||
|
if (fd < 0) {
|
||||||
|
if (errno == EPERM) {
|
||||||
|
pr_info("Pagemap disabled");
|
||||||
|
kdat.pmap = PM_DISABLED;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the PFN of some present page. Stack is here, so try it :) */
|
||||||
|
ret = pread(fd, &pfn, sizeof(pfn), (((unsigned long)&ret) / page_size()) * sizeof(pfn));
|
||||||
|
if (ret != sizeof(pfn)) {
|
||||||
|
pr_perror("Can't read pagemap");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
if ((pfn & PME_PFRAME_MASK) == 0) {
|
||||||
|
pr_info("Pagemap provides flags only\n");
|
||||||
|
kdat.pmap = PM_FLAGS_ONLY;
|
||||||
|
} else {
|
||||||
|
pr_info("Pagemap is fully functional\n");
|
||||||
|
kdat.pmap = PM_FULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Anonymous shared mappings are backed by hidden tmpfs
|
* Anonymous shared mappings are backed by hidden tmpfs
|
||||||
* mount. Find out its dev to distinguish such mappings
|
* mount. Find out its dev to distinguish such mappings
|
||||||
@ -322,13 +358,15 @@ static int init_zero_page_pfn()
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (kdat.pmap != PM_FULL) {
|
||||||
|
pr_info("Zero page detection failed, optimization turns off.\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
ret = vaddr_to_pfn((unsigned long)addr, &kdat.zero_page_pfn);
|
ret = vaddr_to_pfn((unsigned long)addr, &kdat.zero_page_pfn);
|
||||||
munmap(addr, PAGE_SIZE);
|
munmap(addr, PAGE_SIZE);
|
||||||
|
|
||||||
if (ret == 1) {
|
if (kdat.zero_page_pfn == 0)
|
||||||
pr_info("Zero page detection failed, optimization turns off.\n");
|
|
||||||
ret = 0;
|
|
||||||
} else if (kdat.zero_page_pfn == 0)
|
|
||||||
ret = -1;
|
ret = -1;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -456,6 +494,8 @@ int kerndat_init(void)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
ret = check_pagemap();
|
||||||
|
if (!ret)
|
||||||
ret = kerndat_get_shmemdev();
|
ret = kerndat_get_shmemdev();
|
||||||
if (!ret)
|
if (!ret)
|
||||||
ret = kerndat_get_dirty_track();
|
ret = kerndat_get_dirty_track();
|
||||||
@ -487,6 +527,8 @@ int kerndat_init_rst(void)
|
|||||||
* not available inside namespaces.
|
* not available inside namespaces.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
ret = check_pagemap();
|
||||||
|
if (!ret)
|
||||||
ret = tcp_read_sysctl_limits();
|
ret = tcp_read_sysctl_limits();
|
||||||
if (!ret)
|
if (!ret)
|
||||||
ret = get_last_cap();
|
ret = get_last_cap();
|
||||||
|
@ -46,13 +46,14 @@ int pmc_init(pmc_t *pmc, pid_t pid, const struct list_head *vma_head, size_t siz
|
|||||||
if (!pmc->map)
|
if (!pmc->map)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
pmc->fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap");
|
if (kdat.pmap == PM_DISABLED) {
|
||||||
if (pmc->fd < 0) {
|
pmc->fd = -1;
|
||||||
if (errno != EPERM)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
pr_warn("No pagemap for %d available, "
|
pr_warn("No pagemap for %d available, "
|
||||||
"switching to greedy mode\n", pid);
|
"switching to greedy mode\n", pid);
|
||||||
|
} else {
|
||||||
|
pmc->fd = open_proc(pid, "pagemap");
|
||||||
|
if (pmc->fd < 0)
|
||||||
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_debug("created for pid %d (takes %zu bytes)\n", pid, pmc->map_len);
|
pr_debug("created for pid %d (takes %zu bytes)\n", pid, pmc->map_len);
|
||||||
|
4
util.c
4
util.c
@ -763,9 +763,9 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn)
|
|||||||
int fd, ret = -1;
|
int fd, ret = -1;
|
||||||
off_t off;
|
off_t off;
|
||||||
|
|
||||||
fd = __open_proc(getpid(), EPERM, O_RDONLY, "pagemap");
|
fd = open_proc(getpid(), "pagemap");
|
||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
return errno == EPERM ? 1 : -1;
|
return -1;
|
||||||
|
|
||||||
off = (vaddr / page_size()) * sizeof(u64);
|
off = (vaddr / page_size()) * sizeof(u64);
|
||||||
ret = pread(fd, pfn, sizeof(*pfn), off);
|
ret = pread(fd, pfn, sizeof(*pfn), off);
|
||||||
|
18
vdso.c
18
vdso.c
@ -43,20 +43,19 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
|
|||||||
struct vma_area *proxy_vdso_marked = NULL;
|
struct vma_area *proxy_vdso_marked = NULL;
|
||||||
struct vma_area *proxy_vvar_marked = NULL;
|
struct vma_area *proxy_vvar_marked = NULL;
|
||||||
struct parasite_vdso_vma_entry *args;
|
struct parasite_vdso_vma_entry *args;
|
||||||
int fd, ret, exit_code = -1;
|
int fd = -1, ret, exit_code = -1;
|
||||||
u64 pfn = VDSO_BAD_PFN;
|
u64 pfn = VDSO_BAD_PFN;
|
||||||
struct vma_area *vma;
|
struct vma_area *vma;
|
||||||
off_t off;
|
off_t off;
|
||||||
|
|
||||||
args = parasite_args(ctl, struct parasite_vdso_vma_entry);
|
args = parasite_args(ctl, struct parasite_vdso_vma_entry);
|
||||||
fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap");
|
if (kdat.pmap == PM_FULL) {
|
||||||
if (fd < 0) {
|
BUG_ON(vdso_pfn == VDSO_BAD_PFN);
|
||||||
if (errno == EPERM) {
|
fd = open_proc(pid, "pagemap");
|
||||||
pr_info("Pagemap is unavailable, trying a slow way\n");
|
if (fd < 0)
|
||||||
} else
|
|
||||||
return -1;
|
return -1;
|
||||||
} else
|
} else
|
||||||
BUG_ON(vdso_pfn == VDSO_BAD_PFN);
|
pr_info("Pagemap is unavailable, trying a slow way\n");
|
||||||
|
|
||||||
list_for_each_entry(vma, &vma_area_list->h, list) {
|
list_for_each_entry(vma, &vma_area_list->h, list) {
|
||||||
if (!vma_area_is(vma, VMA_AREA_REGULAR))
|
if (!vma_area_is(vma, VMA_AREA_REGULAR))
|
||||||
@ -311,8 +310,11 @@ int vdso_init(void)
|
|||||||
{
|
{
|
||||||
if (vdso_fill_self_symtable(&vdso_sym_rt))
|
if (vdso_fill_self_symtable(&vdso_sym_rt))
|
||||||
return -1;
|
return -1;
|
||||||
if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn) != 0)
|
|
||||||
|
if (kdat.pmap != PM_FULL)
|
||||||
pr_info("VDSO detection turned off\n");
|
pr_info("VDSO detection turned off\n");
|
||||||
|
else if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn))
|
||||||
|
return -1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user