From e0ea21ad5ecafadad653f46a0ed8cbef69c7b883 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:19 +0530 Subject: [PATCH] Handling iov generation for non-PROT_READ regions Skip iov-generation for regions not having PROT_READ, since process_vm_readv syscall can't process them during "read" pre-dump. Handle random order of "read" & "splice" pre-dumps. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 5 ++++ criu/mem.c | 56 ++++++++++++++++++++++++++++++++++++++++-- images/inventory.proto | 1 + 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index fcbe816e8..dd5b62dd0 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1485,6 +1485,9 @@ static int cr_pre_dump_finish(int status) if (ret) goto err; + he.has_pre_dump_mode = true; + he.pre_dump_mode = opts.pre_dump_mode; + pstree_switch_state(root_item, TASK_ALIVE); timing_stop(TIME_FROZEN); @@ -1914,6 +1917,8 @@ int cr_dump_tasks(pid_t pid) if (ret) goto err; + he.has_pre_dump_mode = false; + ret = write_img_inventory(&he); if (ret) goto err; diff --git a/criu/mem.c b/criu/mem.c index 911b9d21c..a5de23755 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -351,7 +351,8 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, struct page_pipe *pp, struct page_xfer *xfer, struct parasite_dump_pages_args *args, struct parasite_ctl *ctl, pmc_t *pmc, - bool has_parent, bool pre_dump) + bool has_parent, bool pre_dump, + int parent_predump_mode) { u64 off = 0; u64 *map; @@ -361,6 +362,52 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, !vma_area_is(vma, VMA_ANON_SHARED)) return 0; + /* + * To facilitate any combination of pre-dump modes to run after + * one another, we need to take extra care as discussed below. + * + * The SPLICE mode pre-dump, processes all type of memory regions, + * whereas READ mode pre-dump skips processing those memory regions + * which lacks PROT_READ flag. + * + * Now on mixing pre-dump modes: + * If SPLICE mode follows SPLICE mode : no issue + * -> everything dumped both the times + * + * If READ mode follows READ mode : no issue + * -> non-PROT_READ skipped both the time + * + * If READ mode follows SPLICE mode : no issue + * -> everything dumped at first, + * the non-PROT_READ skipped later + * + * If SPLICE mode follows READ mode : Need special care + * + * If READ pre-dump happens first, then it has skipped processing + * non-PROT_READ regions. Following SPLICE pre-dump expects pagemap + * entries for all mappings in parent pagemap, but last READ mode + * pre-dump cycle has skipped processing & pagemap generation for + * non-PROT_READ regions. So SPLICE mode throws error of missing + * pagemap entry for encountered non-PROT_READ mapping. + * + * To resolve this, the pre-dump-mode is stored in current pre-dump's + * inventoy file. This pre-dump mode is read back from this file + * (present in parent pre-dump dir) as parent-pre-dump-mode during + * next pre-dump. + * + * If parent-pre-dump-mode and next-pre-dump-mode are in READ-mode -> + * SPLICE-mode order, then SPLICE mode doesn't expect mappings for + * non-PROT_READ regions in parent-image and marks "has_parent=false". + */ + + if (!(vma->e->prot & PROT_READ)) { + if (opts.pre_dump_mode == PRE_DUMP_READ && pre_dump) + return 0; + if ((parent_predump_mode == PRE_DUMP_READ && + opts.pre_dump_mode == PRE_DUMP_SPLICE) || !pre_dump) + has_parent = false; + } + if (vma_entry_is(vma->e, VMA_AREA_AIORING)) { if (pre_dump) return 0; @@ -406,6 +453,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, unsigned long pmc_size; int possible_pid_reuse = 0; bool has_parent; + int parent_predump_mode = -1; pr_info("\n"); pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real); @@ -472,9 +520,13 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, */ args->off = 0; has_parent = !!xfer.parent && !possible_pid_reuse; + if(mdc->parent_ie) + parent_predump_mode = mdc->parent_ie->pre_dump_mode; + list_for_each_entry(vma_area, &vma_area_list->h, list) { ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, - &pmc, has_parent, mdc->pre_dump); + &pmc, has_parent, mdc->pre_dump, + parent_predump_mode); if (ret < 0) goto out_xfer; } diff --git a/images/inventory.proto b/images/inventory.proto index 7bc2b0c02..d1438e8c8 100644 --- a/images/inventory.proto +++ b/images/inventory.proto @@ -16,4 +16,5 @@ message inventory_entry { optional uint32 root_cg_set = 5; optional lsmtype lsmtype = 6; optional uint64 dump_uptime = 8; + optional uint32 pre_dump_mode = 9; }