diff --git a/criu/cr-restore.c b/criu/cr-restore.c index fc27dd4fa..bbe0bc670 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2555,6 +2555,9 @@ static int prepare_mm(pid_t pid, struct task_restore_args *args) goto out; args->fd_exe_link = exe_fd; + + args->has_thp_enabled = rsti(current)->has_thp_enabled; + ret = 0; out: return ret; diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h index d0a28b9f1..800768308 100644 --- a/criu/include/pagemap.h +++ b/criu/include/pagemap.h @@ -127,6 +127,11 @@ static inline unsigned long pagemap_len(PagemapEntry *pe) return pe->nr_pages * PAGE_SIZE; } +static inline bool page_read_has_parent(struct page_read *pr) +{ + return pr->parent != NULL; +} + /* Pagemap flags */ #define PE_PARENT (1 << 0) /* pages are in parent snapshot */ #define PE_LAZY (1 << 1) /* pages can be lazily restored */ diff --git a/criu/include/restorer.h b/criu/include/restorer.h index 57b53e849..cad8cdf7b 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -118,6 +118,7 @@ struct task_restore_args { struct timeval logstart; int uffd; + bool has_thp_enabled; /* threads restoration */ int nr_threads; /* number of threads */ diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h index c3dbe2dd3..336c08b6b 100644 --- a/criu/include/rst_info.h +++ b/criu/include/rst_info.h @@ -61,6 +61,8 @@ struct rst_info { */ bool has_seccomp; + bool has_thp_enabled; + void *breakpoint; }; diff --git a/criu/mem.c b/criu/mem.c index a6ef995db..dbeffe12b 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "types.h" #include "cr_options.h" @@ -27,6 +28,7 @@ #include "files-reg.h" #include "pagemap-cache.h" #include "fault-injection.h" +#include "prctl.h" #include #include "protobuf.h" @@ -1036,6 +1038,38 @@ err_addr: return -1; } +static int maybe_disable_thp(struct pstree_item *t, struct page_read *pr) +{ + struct _MmEntry *mm = rsti(t)->mm; + + /* + * There is no need to disable it if the page read doesn't + * have parent. In this case VMA will be empty until + * userfaultfd_register, so there would be no pages to + * collapse. And, once we register the VMA with uffd, + * khugepaged will skip it. + */ + if (!(opts.lazy_pages && page_read_has_parent(pr))) + return 0; + + if (!kdat.has_thp_disable) + pr_warn("Disabling transparent huge pages. " + "It may affect performance!\n"); + + /* + * temporarily disable THP to avoid collapse of pages + * in the areas that will be monitored by uffd + */ + if (prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0)) { + pr_perror("Cannot disable THP"); + return -1; + } + if (!(mm->has_thp_disabled && mm->thp_disabled)) + rsti(t)->has_thp_enabled = true; + + return 0; +} + int prepare_mappings(struct pstree_item *t) { int ret = 0; @@ -1067,6 +1101,9 @@ int prepare_mappings(struct pstree_item *t) if (ret <= 0) return -1; + if (maybe_disable_thp(t, &pr)) + return -1; + pr.advance(&pr); /* shift to the 1st iovec */ ret = premap_priv_vmas(t, vmas, &addr, &pr); @@ -1216,4 +1253,3 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta) return prepare_vma_ios(t, ta); } - diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index d478aa5bd..6e19ca356 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1244,6 +1244,14 @@ long __export_restore_task(struct task_restore_args *args) } if (args->uffd > -1) { + /* re-enable THP if we disabled it previously */ + if (args->has_thp_enabled) { + if (sys_prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0)) { + pr_err("Cannot re-enable THP\n"); + goto core_restore_end; + } + } + pr_debug("lazy-pages: closing uffd %d\n", args->uffd); /* * All userfaultfd configuration has finished at this point.