diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 710451bb2..e367ebd95 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -10,6 +10,7 @@ struct parasite_ctl; struct vm_area_list; #define VDSO_PROT (PROT_READ | PROT_EXEC) +#define VVAR_PROT (PROT_READ) #define VDSO_BAD_ADDR (-1ul) #define VVAR_BAD_ADDR VDSO_BAD_ADDR @@ -68,6 +69,10 @@ static inline unsigned long vdso_vma_size(struct vdso_symtable *t) return t->vma_end - t->vma_start; } +static inline unsigned long vvar_vma_size(struct vdso_symtable *t) +{ + return t->vvar_end - t->vvar_start; +} /* * Special mark which allows to identify runtime vdso where * calls from proxy vdso are redirected. This mark usually @@ -142,7 +147,9 @@ extern u64 vdso_pfn; extern int vdso_init(void); extern int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size); extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t); -extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt, VmaEntry *vma_entry, unsigned long vdso_rt_parked_at); +extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt, + VmaEntry *vdso_vma, VmaEntry *vvar_vma, + unsigned long vdso_rt_parked_at); extern int vdso_redirect_calls(void *base_to, void *base_from, struct vdso_symtable *to, struct vdso_symtable *from); extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, diff --git a/arch/x86/vdso-pie.c b/arch/x86/vdso-pie.c index 967108d8f..327cad950 100644 --- a/arch/x86/vdso-pie.c +++ b/arch/x86/vdso-pie.c @@ -15,6 +15,7 @@ #include "compiler.h" #include "syscall.h" +#include "image.h" #include "vdso.h" #include "vma.h" #include "log.h" @@ -261,44 +262,84 @@ int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size) return 0; } -int vdso_proxify(char *who, struct vdso_symtable *sym_rt, VmaEntry *vma, unsigned long vdso_rt_parked_at) +int vdso_proxify(char *who, struct vdso_symtable *sym_rt, + VmaEntry *vdso_vma, VmaEntry *vvar_vma, + unsigned long vdso_rt_parked_at) { struct vdso_symtable s = VDSO_SYMTABLE_INIT; - size_t size = vma_entry_len(vma); - bool remap_rt = true; + size_t size = vma_entry_len(vdso_vma); + bool remap_rt = false; + + /* + * vDSO mark overwrites Elf program header of proxy vDSO thus + * it must never ever be greater in size. + */ + BUILD_BUG_ON(sizeof(struct vdso_mark) > sizeof(Elf64_Phdr)); /* * Find symbols in dumpee vdso. */ - if (vdso_fill_symtable((void *)vma->start, size, &s)) + if (vdso_fill_symtable((void *)vdso_vma->start, size, &s)) return -1; + /* + * Try to figure out if the vDSO in image has the same symbols + * as run time vDSO, if yes we might try to reuse runtime vDSO + * instead of one in image. + * + * In case if VVAR area is present at least it must have same + * size as dumped one for inplace remap. + */ if (size == vdso_vma_size(sym_rt)) { - int i; + size_t i; for (i = 0; i < ARRAY_SIZE(s.symbols); i++) { - if (s.symbols[i].offset != sym_rt->symbols[i].offset) { - remap_rt = false; + if (s.symbols[i].offset != sym_rt->symbols[i].offset) break; } + if (i == ARRAY_SIZE(s.symbols)) { + remap_rt = true; + + if (vvar_vma && sym_rt->vvar_start != VVAR_BAD_ADDR) + remap_rt = (vvar_vma_size(sym_rt) == vma_entry_len(vvar_vma)); } - } else - remap_rt = false; + } /* * Easy case -- the vdso from image has same offsets and size * as runtime, so we simply remap runtime vdso to dumpee position - * without generating any proxy. + * without generating any proxy. Note we may remap VVAR vdso as + * well which might not yet been mapped by a caller code. So + * drop VMA_AREA_REGULAR from it and caller would not touch it + * anymore. */ if (remap_rt) { - pr_info("Runtime vdso matches dumpee, remap inplace\n"); + unsigned long vvar_rt_parked_at = VVAR_BAD_ADDR; + int ret = 0; - if (sys_munmap((void *)vma->start, size)) { + pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n"); + + if (sys_munmap((void *)vdso_vma->start, vma_entry_len(vdso_vma))) { pr_err("Failed to unmap %s\n", who); return -1; } - return vdso_remap(who, vdso_rt_parked_at, vma->start, size); + if (vvar_vma) { + if (sys_munmap((void *)vvar_vma->start, vma_entry_len(vvar_vma))) { + pr_err("Failed to unmap %s\n", who); + return -1; + } + + vvar_rt_parked_at = ALIGN(vvar_vma_size(sym_rt), PAGE_SIZE); + vvar_rt_parked_at+= vdso_rt_parked_at; + + ret = vdso_remap(who, vvar_rt_parked_at, vvar_vma->start, vma_entry_len(vvar_vma)); + vvar_vma->status &= ~VMA_AREA_REGULAR; + } + + ret |= vdso_remap(who, vdso_rt_parked_at, vdso_vma->start, vma_entry_len(vdso_vma)); + + return ret; } /* @@ -309,7 +350,7 @@ int vdso_proxify(char *who, struct vdso_symtable *sym_rt, VmaEntry *vma, unsigne pr_info("Runtime vdso mismatches dumpee, generate proxy\n"); if (vdso_redirect_calls((void *)vdso_rt_parked_at, - (void *)vma->start, + (void *)vdso_vma->start, sym_rt, &s)) { pr_err("Failed to proxify dumpee contents\n"); return -1; @@ -321,7 +362,7 @@ int vdso_proxify(char *who, struct vdso_symtable *sym_rt, VmaEntry *vma, unsigne * it's auto-generated every new session if proxy required. */ sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), PROT_WRITE); - vdso_put_mark((void *)vdso_rt_parked_at, vma->start, VVAR_BAD_ADDR); + vdso_put_mark((void *)vdso_rt_parked_at, vdso_vma->start, vvar_vma ? vvar_vma->start : VVAR_BAD_ADDR); sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), VDSO_PROT); return 0; } diff --git a/arch/x86/vdso.c b/arch/x86/vdso.c index 6a158ffd2..e7c3842c2 100644 --- a/arch/x86/vdso.c +++ b/arch/x86/vdso.c @@ -30,15 +30,19 @@ struct vdso_symtable vdso_sym_rt = VDSO_SYMTABLE_INIT; u64 vdso_pfn = VDSO_BAD_PFN; /* - * Find out proxy vdso vma and drop it from the list. Also - * fix vdso status on vmas if wrong status found. + * The VMAs list might have proxy vdso/vvar areas left + * from previous dump/restore cycle so we need to detect + * them and eliminated from the VMAs list, they will be + * generated again on restore if needed. */ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, struct vm_area_list *vma_area_list) { - unsigned long proxy_addr = VDSO_BAD_ADDR; + unsigned long proxy_vdso_addr = VDSO_BAD_ADDR; + unsigned long proxy_vvar_addr = VVAR_BAD_ADDR; + struct vma_area *proxy_vdso_marked = NULL; + struct vma_area *proxy_vvar_marked = NULL; struct parasite_vdso_vma_entry *args; - struct vma_area *marked = NULL; struct vma_area *vma; int fd, ret = -1; off_t off; @@ -56,6 +60,23 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, if (vma_area_is(vma, VMA_FILE_SHARED) || vma_area_is(vma, VMA_FILE_PRIVATE)) continue; + /* + * It might be possible VVAR area from marked + * vDSO zone, we need to detect it earlier than + * VDSO_PROT test because VVAR_PROT is a subset + * of it but don't yield continue here, + * sigh... what a mess. + */ + BUILD_BUG_ON(!(VDSO_PROT & VVAR_PROT)); + + if ((vma->e->prot & VVAR_PROT) == VVAR_PROT) { + if (proxy_vvar_addr != VVAR_BAD_ADDR && + proxy_vvar_addr == vma->e->start) { + BUG_ON(proxy_vvar_marked); + proxy_vvar_marked = vma; + continue; + } + } if ((vma->e->prot & VDSO_PROT) != VDSO_PROT) continue; @@ -81,13 +102,19 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, } /* - * Defer handling marked vdso. + * Defer handling marked vdso until we walked over + * all vmas and restore potentially remapped vDSO + * area status. */ if (unlikely(args->is_marked)) { - BUG_ON(args->proxy_vdso_addr == VDSO_BAD_ADDR); - BUG_ON(marked); - marked = vma; - proxy_addr = args->proxy_vdso_addr; + if (proxy_vdso_marked) { + pr_err("Ow! Second vdso mark detected!\n"); + ret = -1; + goto err; + } + proxy_vdso_marked = vma; + proxy_vdso_addr = args->proxy_vdso_addr; + proxy_vvar_addr = args->proxy_vvar_addr; continue; } @@ -103,17 +130,21 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, BUG_ON(!pfn); /* - * Set proper VMA statuses. + * Setup proper VMA status. Note starting with 3.16 + * the [vdso]/[vvar] marks are reported correctly + * even when they are remapped into a new place, + * but only since that particular version of the + * kernel! */ if (pfn == vdso_pfn) { if (!vma_area_is(vma, VMA_AREA_VDSO)) { - pr_debug("vdso: Restore status by pfn at %lx\n", + pr_debug("vdso: Restore vDSO status by pfn at %lx\n", (long)vma->e->start); vma->e->status |= VMA_AREA_VDSO; } } else { - if (vma_area_is(vma, VMA_AREA_VDSO)) { - pr_debug("vdso: Drop mishinted status at %lx\n", + if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) { + pr_debug("vdso: Drop mishinted vDSO status at %lx\n", (long)vma->e->start); vma->e->status &= ~VMA_AREA_VDSO; } @@ -124,28 +155,40 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, * There is marked vdso, it means such vdso is autogenerated * and must be dropped from vma list. */ - if (marked) { - pr_debug("vdso: Found marked at %lx (proxy at %lx)\n", - (long)marked->e->start, (long)proxy_addr); + if (proxy_vdso_marked) { + pr_debug("vdso: Found marked at %lx (proxy vDSO at %lx VVAR at %lx)\n", + (long)proxy_vdso_marked->e->start, + (long)proxy_vdso_addr, (long)proxy_vvar_addr); /* - * Don't forget to restore the proxy vdso status, since - * it's being not recognized by the kernel as vdso. + * Don't forget to restore the proxy vdso/vvar status, since + * it's unknown to the kernel. */ list_for_each_entry(vma, &vma_area_list->h, list) { - if (vma->e->start == proxy_addr) { + if (vma->e->start == proxy_vdso_addr) { vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VDSO; - pr_debug("vdso: Restore proxy status at %lx\n", + pr_debug("vdso: Restore proxy vDSO status at %lx\n", + (long)vma->e->start); + } else if (vma->e->start == proxy_vvar_addr) { + vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VVAR; + pr_debug("vdso: Restore proxy VVAR status at %lx\n", (long)vma->e->start); - break; } } pr_debug("vdso: Droppping marked vdso at %lx\n", - (long)vma->e->start); - list_del(&marked->list); - xfree(marked); + (long)proxy_vdso_marked->e->start); + list_del(&proxy_vdso_marked->list); + xfree(proxy_vdso_marked); vma_area_list->nr--; + + if (proxy_vvar_marked) { + pr_debug("vdso: Droppping marked vvar at %lx\n", + (long)proxy_vvar_marked->e->start); + list_del(&proxy_vvar_marked->list); + xfree(proxy_vvar_marked); + vma_area_list->nr--; + } } ret = 0; err: @@ -169,24 +212,74 @@ static int vdso_fill_self_symtable(struct vdso_symtable *s) while (fgets(buf, sizeof(buf), maps)) { unsigned long start, end; + char *has_vdso, *has_vvar; - if (strstr(buf, "[vdso]") == NULL) + has_vdso = strstr(buf, "[vdso]"); + if (!has_vdso) + has_vvar = strstr(buf, "[vvar]"); + else + has_vvar = NULL; + + if (!has_vdso && !has_vvar) continue; ret = sscanf(buf, "%lx-%lx", &start, &end); if (ret != 2) { ret = -1; - pr_err("Can't find vDSO bounds\n"); - break; + pr_err("Can't find vDSO/VVAR bounds\n"); + goto err; } - s->vma_start = start; - s->vma_end = end; + if (has_vdso) { + if (s->vma_start != VDSO_BAD_ADDR) { + pr_err("Got second vDSO entry\n"); + ret = -1; + goto err; + } + s->vma_start = start; + s->vma_end = end; - ret = vdso_fill_symtable((void *)start, end - start, s); - break; + ret = vdso_fill_symtable((void *)start, end - start, s); + if (ret) + goto err; + } else { + if (s->vvar_start != VVAR_BAD_ADDR) { + pr_err("Got second VVAR entry\n"); + ret = -1; + goto err; + } + s->vvar_start = start; + s->vvar_end = end; + } } + /* + * Validate its structure -- for new vDSO format the + * structure must be like + * + * 7fff1f5fd000-7fff1f5fe000 r-xp 00000000 00:00 0 [vdso] + * 7fff1f5fe000-7fff1f600000 r--p 00000000 00:00 0 [vvar] + */ + ret = 0; + if (s->vma_start != VDSO_BAD_ADDR) { + if (s->vvar_start != VVAR_BAD_ADDR) { + if (s->vma_end != s->vvar_start) { + ret = -1; + pr_err("Unexpected rt vDSO area bounds\n"); + goto err; + } + } + } else { + ret = -1; + pr_err("Can't find rt vDSO\n"); + goto err; + } + + pr_debug("rt [vdso] %lx-%lx [vvar] %lx-%lx\n", + s->vma_start, s->vma_end, + s->vvar_start, s->vvar_end); + +err: fclose(maps); return ret; } diff --git a/cr-restore.c b/cr-restore.c index 37985d7c2..573b989b4 100644 --- a/cr-restore.c +++ b/cr-restore.c @@ -2377,12 +2377,14 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) #ifdef CONFIG_VDSO /* - * Figure out how much memory runtime vdso will need. + * Figure out how much memory runtime vdso and vvar will need. */ vdso_rt_vma_size = vdso_vma_size(&vdso_sym_rt); if (vdso_rt_vma_size) { vdso_rt_delta = ALIGN(restore_bootstrap_len, PAGE_SIZE) - restore_bootstrap_len; vdso_rt_size = vdso_rt_vma_size + vdso_rt_delta; + if (vvar_vma_size(&vdso_sym_rt)) + vdso_rt_size += ALIGN(vvar_vma_size(&vdso_sym_rt), PAGE_SIZE); } restore_bootstrap_len += vdso_rt_size; diff --git a/include/vdso.h b/include/vdso.h index c0725a3bf..50549d566 100644 --- a/include/vdso.h +++ b/include/vdso.h @@ -16,7 +16,8 @@ #define parasite_fixup_vdso(ctl, pid, vma_area_list) (0) #define vdso_vma_size(t) (0) #define vdso_remap(who, from, to, size) (0) -#define vdso_proxify(who, sym_rt, vma, vdso_rt_parked_at) (0) +#define vdso_proxify(who, sym_rt, vdso_vma, \ + vvar_vma, vdso_rt_parked_at) (0) #endif /* CONFIG_VDSO */ diff --git a/pie/restorer.c b/pie/restorer.c index fb0a9cfa8..27e69293f 100644 --- a/pie/restorer.c +++ b/pie/restorer.c @@ -686,6 +686,12 @@ long __export_restore_task(struct task_restore_args *args) args->vdso_rt_parked_at, vdso_vma_size(&args->vdso_sym_rt))) goto core_restore_end; + if (args->vdso_sym_rt.vvar_start != VVAR_BAD_ADDR) { + if (vdso_remap("rt-vvar", args->vdso_sym_rt.vvar_start, + args->vdso_rt_parked_at + vdso_vma_size(&args->vdso_sym_rt), + vvar_vma_size(&args->vdso_sym_rt))) + goto core_restore_end; + } #endif if (unmap_old_vmas((void *)args->premmapped_addr, args->premmapped_len, @@ -713,8 +719,17 @@ long __export_restore_task(struct task_restore_args *args) goto core_restore_end; #ifdef CONFIG_VDSO if (vma_entry_is(vma_entry, VMA_AREA_VDSO)) { + VmaEntry *vma_vvar; + + if (i + 1 < args->nr_vmas) { + vma_vvar = args->tgt_vmas + i + 1; + if (!vma_entry_is(vma_entry, VMA_AREA_VVAR)) + vma_vvar = NULL; + } else + vma_vvar = NULL; if (vdso_proxify("left dumpee", &args->vdso_sym_rt, - vma_entry, args->vdso_rt_parked_at)) + vma_entry, vma_vvar, + args->vdso_rt_parked_at)) goto core_restore_end; } #endif @@ -741,8 +756,17 @@ long __export_restore_task(struct task_restore_args *args) goto core_restore_end; #ifdef CONFIG_VDSO if (vma_entry_is(vma_entry, VMA_AREA_VDSO)) { + VmaEntry *vma_vvar; + + if (i + 1 < args->nr_vmas) { + vma_vvar = args->tgt_vmas + i + 1; + if (!vma_entry_is(vma_entry, VMA_AREA_VVAR)) + vma_vvar = NULL; + } else + vma_vvar = NULL; if (vdso_proxify("right dumpee", &args->vdso_sym_rt, - vma_entry, args->vdso_rt_parked_at)) + vma_entry, vma_vvar, + args->vdso_rt_parked_at)) goto core_restore_end; } #endif diff --git a/proc_parse.c b/proc_parse.c index c6076d796..ed0e444d8 100644 --- a/proc_parse.c +++ b/proc_parse.c @@ -138,8 +138,16 @@ static int parse_vmflags(char *buf, struct vma_area *vma_area) vma_area->e->madv |= (1ul << MADV_NOHUGEPAGE); /* vmsplice doesn't work for VM_IO and VM_PFNMAP mappings. */ - if (_vmflag_match(tok, "io") || _vmflag_match(tok, "pf")) - vma_area->e->status |= VMA_UNSUPP; + if (_vmflag_match(tok, "io") || _vmflag_match(tok, "pf")) { +#ifdef CONFIG_VDSO + /* + * VVAR area mapped by the kernel as + * VM_IO | VM_PFNMAP| VM_DONTEXPAND | VM_DONTDUMP + */ + if (!vma_area_is(vma_area, VMA_AREA_VVAR)) +#endif + vma_area->e->status |= VMA_UNSUPP; + } /* * Anything else is just ignored. @@ -408,6 +416,15 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, bool use_map_file #else pr_warn_once("Found vDSO area without support\n"); goto err; +#endif + } else if (strstr(buf, "[vvar]")) { +#ifdef CONFIG_VDSO + vma_area->e->status |= VMA_AREA_REGULAR; + if ((vma_area->e->prot & VVAR_PROT) == VVAR_PROT) + vma_area->e->status |= VMA_AREA_VVAR; +#else + pr_warn_once("Found VVAR area without support\n"); + goto err; #endif } else if (strstr(buf, "[heap]")) { vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;