mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-30 22:05:36 +00:00
lazy-pages: fix memory corruption when combining pre-dump with lazy pages
When we combine pre-dump with lazy pages, we populate a part of a memory region with data that was saved during the pre-dump. Afterwards, the region is registered with userfaultfd and we expect to get page faults for the parts of the region that were not yet populated. However, khugepaged collapses the pages and the page faults we would expect do not occur. To mitigate this problem we temporarily disable THP for the restored process, up to the point when we register all the memory regions with userfaultfd. https://lists.openvz.org/pipermail/criu/2017-May/037728.html Reported-by: Adrian Reber <areber@redhat.com> Acked-by: Pavel Emelyanov <xemul@virtuozzo.com> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
This commit is contained in:
committed by
Andrei Vagin
parent
bbbd597b41
commit
9d45bb3987
@@ -2555,6 +2555,9 @@ static int prepare_mm(pid_t pid, struct task_restore_args *args)
|
||||
goto out;
|
||||
|
||||
args->fd_exe_link = exe_fd;
|
||||
|
||||
args->has_thp_enabled = rsti(current)->has_thp_enabled;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
|
@@ -127,6 +127,11 @@ static inline unsigned long pagemap_len(PagemapEntry *pe)
|
||||
return pe->nr_pages * PAGE_SIZE;
|
||||
}
|
||||
|
||||
static inline bool page_read_has_parent(struct page_read *pr)
|
||||
{
|
||||
return pr->parent != NULL;
|
||||
}
|
||||
|
||||
/* Pagemap flags */
|
||||
#define PE_PARENT (1 << 0) /* pages are in parent snapshot */
|
||||
#define PE_LAZY (1 << 1) /* pages can be lazily restored */
|
||||
|
@@ -118,6 +118,7 @@ struct task_restore_args {
|
||||
struct timeval logstart;
|
||||
|
||||
int uffd;
|
||||
bool has_thp_enabled;
|
||||
|
||||
/* threads restoration */
|
||||
int nr_threads; /* number of threads */
|
||||
|
@@ -61,6 +61,8 @@ struct rst_info {
|
||||
*/
|
||||
bool has_seccomp;
|
||||
|
||||
bool has_thp_enabled;
|
||||
|
||||
void *breakpoint;
|
||||
};
|
||||
|
||||
|
38
criu/mem.c
38
criu/mem.c
@@ -4,6 +4,7 @@
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "cr_options.h"
|
||||
@@ -27,6 +28,7 @@
|
||||
#include "files-reg.h"
|
||||
#include "pagemap-cache.h"
|
||||
#include "fault-injection.h"
|
||||
#include "prctl.h"
|
||||
#include <compel/compel.h>
|
||||
|
||||
#include "protobuf.h"
|
||||
@@ -1036,6 +1038,38 @@ err_addr:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int maybe_disable_thp(struct pstree_item *t, struct page_read *pr)
|
||||
{
|
||||
struct _MmEntry *mm = rsti(t)->mm;
|
||||
|
||||
/*
|
||||
* There is no need to disable it if the page read doesn't
|
||||
* have parent. In this case VMA will be empty until
|
||||
* userfaultfd_register, so there would be no pages to
|
||||
* collapse. And, once we register the VMA with uffd,
|
||||
* khugepaged will skip it.
|
||||
*/
|
||||
if (!(opts.lazy_pages && page_read_has_parent(pr)))
|
||||
return 0;
|
||||
|
||||
if (!kdat.has_thp_disable)
|
||||
pr_warn("Disabling transparent huge pages. "
|
||||
"It may affect performance!\n");
|
||||
|
||||
/*
|
||||
* temporarily disable THP to avoid collapse of pages
|
||||
* in the areas that will be monitored by uffd
|
||||
*/
|
||||
if (prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0)) {
|
||||
pr_perror("Cannot disable THP");
|
||||
return -1;
|
||||
}
|
||||
if (!(mm->has_thp_disabled && mm->thp_disabled))
|
||||
rsti(t)->has_thp_enabled = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int prepare_mappings(struct pstree_item *t)
|
||||
{
|
||||
int ret = 0;
|
||||
@@ -1067,6 +1101,9 @@ int prepare_mappings(struct pstree_item *t)
|
||||
if (ret <= 0)
|
||||
return -1;
|
||||
|
||||
if (maybe_disable_thp(t, &pr))
|
||||
return -1;
|
||||
|
||||
pr.advance(&pr); /* shift to the 1st iovec */
|
||||
|
||||
ret = premap_priv_vmas(t, vmas, &addr, &pr);
|
||||
@@ -1216,4 +1253,3 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta)
|
||||
|
||||
return prepare_vma_ios(t, ta);
|
||||
}
|
||||
|
||||
|
@@ -1244,6 +1244,14 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
}
|
||||
|
||||
if (args->uffd > -1) {
|
||||
/* re-enable THP if we disabled it previously */
|
||||
if (args->has_thp_enabled) {
|
||||
if (sys_prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0)) {
|
||||
pr_err("Cannot re-enable THP\n");
|
||||
goto core_restore_end;
|
||||
}
|
||||
}
|
||||
|
||||
pr_debug("lazy-pages: closing uffd %d\n", args->uffd);
|
||||
/*
|
||||
* All userfaultfd configuration has finished at this point.
|
||||
|
Reference in New Issue
Block a user