diff --git a/include/kerndat.h b/include/kerndat.h index 23d91044d..a02d15bc1 100644 --- a/include/kerndat.h +++ b/include/kerndat.h @@ -16,6 +16,13 @@ extern int kerndat_get_dirty_track(void); extern int kerndat_fdinfo_has_lock(void); extern int kerndat_loginuid(bool only_dump); +enum pagemap_func { + PM_UNKNOWN, + PM_DISABLED, /* /proc/pid/pagemap doesn't open (user mode) */ + PM_FLAGS_ONLY, /* pagemap zeroes pfn part (user mode) */ + PM_FULL, +}; + struct kerndat_s { dev_t shmem_dev; int tcp_max_rshare; @@ -27,6 +34,7 @@ struct kerndat_s { unsigned long task_size; bool ipv6; bool has_loginuid; + enum pagemap_func pmap; }; extern struct kerndat_s kdat; diff --git a/kerndat.c b/kerndat.c index a7c72a2f2..696701d4f 100644 --- a/kerndat.c +++ b/kerndat.c @@ -41,6 +41,42 @@ struct kerndat_s kdat = { .tcp_max_rshare = 87380, }; +static int check_pagemap(void) +{ + int ret, fd; + u64 pfn = 0; + + fd = __open_proc(PROC_SELF, EPERM, O_RDONLY, "pagemap"); + if (fd < 0) { + if (errno == EPERM) { + pr_info("Pagemap disabled"); + kdat.pmap = PM_DISABLED; + return 0; + } + + return -1; + } + + /* Get the PFN of some present page. Stack is here, so try it :) */ + ret = pread(fd, &pfn, sizeof(pfn), (((unsigned long)&ret) / page_size()) * sizeof(pfn)); + if (ret != sizeof(pfn)) { + pr_perror("Can't read pagemap"); + return -1; + } + + close(fd); + + if ((pfn & PME_PFRAME_MASK) == 0) { + pr_info("Pagemap provides flags only\n"); + kdat.pmap = PM_FLAGS_ONLY; + } else { + pr_info("Pagemap is fully functional\n"); + kdat.pmap = PM_FULL; + } + + return 0; +} + /* * Anonymous shared mappings are backed by hidden tmpfs * mount. Find out its dev to distinguish such mappings @@ -322,13 +358,15 @@ static int init_zero_page_pfn() return -1; } + if (kdat.pmap != PM_FULL) { + pr_info("Zero page detection failed, optimization turns off.\n"); + return 0; + } + ret = vaddr_to_pfn((unsigned long)addr, &kdat.zero_page_pfn); munmap(addr, PAGE_SIZE); - if (ret == 1) { - pr_info("Zero page detection failed, optimization turns off.\n"); - ret = 0; - } else if (kdat.zero_page_pfn == 0) + if (kdat.zero_page_pfn == 0) ret = -1; return ret; @@ -456,7 +494,9 @@ int kerndat_init(void) { int ret; - ret = kerndat_get_shmemdev(); + ret = check_pagemap(); + if (!ret) + ret = kerndat_get_shmemdev(); if (!ret) ret = kerndat_get_dirty_track(); if (!ret) @@ -487,7 +527,9 @@ int kerndat_init_rst(void) * not available inside namespaces. */ - ret = tcp_read_sysctl_limits(); + ret = check_pagemap(); + if (!ret) + ret = tcp_read_sysctl_limits(); if (!ret) ret = get_last_cap(); if (!ret) diff --git a/pagemap-cache.c b/pagemap-cache.c index 54205868c..c2e467b67 100644 --- a/pagemap-cache.c +++ b/pagemap-cache.c @@ -46,13 +46,14 @@ int pmc_init(pmc_t *pmc, pid_t pid, const struct list_head *vma_head, size_t siz if (!pmc->map) goto err; - pmc->fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap"); - if (pmc->fd < 0) { - if (errno != EPERM) - goto err; - + if (kdat.pmap == PM_DISABLED) { + pmc->fd = -1; pr_warn("No pagemap for %d available, " "switching to greedy mode\n", pid); + } else { + pmc->fd = open_proc(pid, "pagemap"); + if (pmc->fd < 0) + goto err; } pr_debug("created for pid %d (takes %zu bytes)\n", pid, pmc->map_len); diff --git a/util.c b/util.c index 87642f850..241670cb9 100644 --- a/util.c +++ b/util.c @@ -763,9 +763,9 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn) int fd, ret = -1; off_t off; - fd = __open_proc(getpid(), EPERM, O_RDONLY, "pagemap"); + fd = open_proc(getpid(), "pagemap"); if (fd < 0) - return errno == EPERM ? 1 : -1; + return -1; off = (vaddr / page_size()) * sizeof(u64); ret = pread(fd, pfn, sizeof(*pfn), off); diff --git a/vdso.c b/vdso.c index a6a9b3087..c547cf53a 100644 --- a/vdso.c +++ b/vdso.c @@ -43,20 +43,19 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, struct vma_area *proxy_vdso_marked = NULL; struct vma_area *proxy_vvar_marked = NULL; struct parasite_vdso_vma_entry *args; - int fd, ret, exit_code = -1; + int fd = -1, ret, exit_code = -1; u64 pfn = VDSO_BAD_PFN; struct vma_area *vma; off_t off; args = parasite_args(ctl, struct parasite_vdso_vma_entry); - fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap"); - if (fd < 0) { - if (errno == EPERM) { - pr_info("Pagemap is unavailable, trying a slow way\n"); - } else + if (kdat.pmap == PM_FULL) { + BUG_ON(vdso_pfn == VDSO_BAD_PFN); + fd = open_proc(pid, "pagemap"); + if (fd < 0) return -1; } else - BUG_ON(vdso_pfn == VDSO_BAD_PFN); + pr_info("Pagemap is unavailable, trying a slow way\n"); list_for_each_entry(vma, &vma_area_list->h, list) { if (!vma_area_is(vma, VMA_AREA_REGULAR)) @@ -311,8 +310,11 @@ int vdso_init(void) { if (vdso_fill_self_symtable(&vdso_sym_rt)) return -1; - if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn) != 0) + + if (kdat.pmap != PM_FULL) pr_info("VDSO detection turned off\n"); + else if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn)) + return -1; return 0; }