2013-04-15 13:24:32 +04:00
|
|
|
#include <unistd.h>
|
|
|
|
#include <fcntl.h>
|
2013-04-15 13:02:09 +04:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/mman.h>
|
2013-04-23 23:22:25 +04:00
|
|
|
#include <errno.h>
|
2013-04-15 13:02:09 +04:00
|
|
|
|
|
|
|
#include "log.h"
|
2014-01-30 14:31:39 +04:00
|
|
|
#include "bug.h"
|
2013-04-15 13:02:09 +04:00
|
|
|
#include "kerndat.h"
|
2014-04-02 11:12:00 +04:00
|
|
|
#include "fs-magic.h"
|
2013-05-17 16:13:49 +04:00
|
|
|
#include "mem.h"
|
2013-10-04 16:18:24 +04:00
|
|
|
#include "compiler.h"
|
|
|
|
#include "sysctl.h"
|
2014-10-14 13:10:53 +04:00
|
|
|
#include "syscall.h"
|
2013-04-15 13:02:09 +04:00
|
|
|
#include "asm/types.h"
|
2014-01-16 14:43:00 +04:00
|
|
|
#include "cr_options.h"
|
2014-01-30 14:34:27 +04:00
|
|
|
#include "util.h"
|
2013-04-15 13:02:09 +04:00
|
|
|
|
2014-11-10 10:47:42 +04:00
|
|
|
struct kerndat_s kdat = {
|
|
|
|
.tcp_max_wshare = 2U << 20,
|
|
|
|
.tcp_max_rshare = 3U << 20,
|
|
|
|
};
|
2013-04-15 13:02:09 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Anonymous shared mappings are backed by hidden tmpfs
|
|
|
|
* mount. Find out its dev to distinguish such mappings
|
|
|
|
* from real tmpfs files maps.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int kerndat_get_shmemdev(void)
|
|
|
|
{
|
|
|
|
void *map;
|
|
|
|
char maps[128];
|
|
|
|
struct stat buf;
|
|
|
|
|
|
|
|
map = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
|
|
|
|
MAP_SHARED | MAP_ANONYMOUS, 0, 0);
|
|
|
|
if (map == MAP_FAILED) {
|
2013-08-09 15:37:35 +04:00
|
|
|
pr_perror("Can't mmap memory for shmemdev test");
|
2013-04-15 13:02:09 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
sprintf(maps, "/proc/self/map_files/%lx-%lx",
|
|
|
|
(unsigned long)map, (unsigned long)map + PAGE_SIZE);
|
|
|
|
if (stat(maps, &buf) < 0) {
|
2013-05-04 22:03:08 +04:00
|
|
|
munmap(map, PAGE_SIZE);
|
2013-08-09 15:37:35 +04:00
|
|
|
pr_perror("Can't stat self map_files");
|
2013-04-15 13:02:09 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
munmap(map, PAGE_SIZE);
|
|
|
|
|
2014-11-10 10:47:42 +04:00
|
|
|
kdat.shmem_dev = buf.st_dev;
|
|
|
|
pr_info("Found anon-shmem device at %"PRIx64"\n", kdat.shmem_dev);
|
2013-04-15 13:02:09 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-11-10 10:48:23 +04:00
|
|
|
static dev_t get_host_dev(unsigned int which)
|
2014-04-02 11:12:00 +04:00
|
|
|
{
|
2014-11-10 10:48:23 +04:00
|
|
|
static struct kst {
|
2014-10-20 21:01:02 +04:00
|
|
|
const char *name;
|
|
|
|
const char *path;
|
|
|
|
unsigned int magic;
|
2014-11-10 10:48:23 +04:00
|
|
|
dev_t fs_dev;
|
2014-10-20 21:01:02 +04:00
|
|
|
} kstat[KERNDAT_FS_STAT_MAX] = {
|
|
|
|
[KERNDAT_FS_STAT_DEVPTS] = {
|
|
|
|
.name = "devpts",
|
|
|
|
.path = "/dev/pts",
|
|
|
|
.magic = DEVPTS_SUPER_MAGIC,
|
|
|
|
},
|
|
|
|
[KERNDAT_FS_STAT_DEVTMPFS] = {
|
|
|
|
.name = "devtmpfs",
|
|
|
|
.path = "/dev",
|
|
|
|
.magic = TMPFS_MAGIC,
|
|
|
|
},
|
|
|
|
};
|
2014-04-02 11:12:00 +04:00
|
|
|
|
2014-10-20 21:01:02 +04:00
|
|
|
if (which >= KERNDAT_FS_STAT_MAX) {
|
|
|
|
pr_err("Wrong fs type %u passed\n", which);
|
2014-11-10 10:48:23 +04:00
|
|
|
return 0;
|
2014-10-20 21:01:02 +04:00
|
|
|
}
|
|
|
|
|
2014-11-10 10:48:23 +04:00
|
|
|
if (kstat[which].fs_dev == 0) {
|
|
|
|
struct statfs fst;
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (statfs(kstat[which].path, &fst)) {
|
|
|
|
pr_perror("Unable to statefs %s", kstat[which].path);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX: If the fs we need is not there, it still
|
|
|
|
* may mean that it's virtualized, but just not
|
|
|
|
* mounted on the host.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (fst.f_type != kstat[which].magic) {
|
|
|
|
pr_err("%s isn't mount on the host\n", kstat[which].name);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (stat(kstat[which].path, &st)) {
|
|
|
|
pr_perror("Unable to stat %s", kstat[which].path);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
BUG_ON(st.st_dev == 0);
|
|
|
|
kstat[which].fs_dev = st.st_dev;
|
2014-04-02 11:12:00 +04:00
|
|
|
}
|
|
|
|
|
2014-11-10 10:48:23 +04:00
|
|
|
return kstat[which].fs_dev;
|
2014-04-02 11:12:00 +04:00
|
|
|
}
|
|
|
|
|
2014-11-10 10:47:56 +04:00
|
|
|
int kerndat_fs_virtualized(unsigned int which, u32 kdev)
|
|
|
|
{
|
2014-11-10 10:48:23 +04:00
|
|
|
dev_t host_fs_dev;
|
2014-11-10 10:47:56 +04:00
|
|
|
|
2014-11-10 10:48:23 +04:00
|
|
|
host_fs_dev = get_host_dev(which);
|
|
|
|
if (host_fs_dev == 0)
|
2014-11-10 10:47:56 +04:00
|
|
|
return -1;
|
|
|
|
|
2014-11-10 10:48:23 +04:00
|
|
|
return (kdev_to_odev(kdev) == host_fs_dev) ? 0 : 1;
|
2014-11-10 10:47:56 +04:00
|
|
|
}
|
|
|
|
|
2013-04-15 13:24:32 +04:00
|
|
|
/*
|
2013-07-02 09:51:33 +04:00
|
|
|
* Check whether pagemap reports soft dirty bit. Kernel has
|
2013-04-15 13:24:32 +04:00
|
|
|
* this functionality under CONFIG_MEM_SOFT_DIRTY option.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int kerndat_get_dirty_track(void)
|
|
|
|
{
|
|
|
|
char *map;
|
|
|
|
int pm2;
|
|
|
|
u64 pmap = 0;
|
2013-04-16 21:51:27 +04:00
|
|
|
int ret = -1;
|
2013-04-15 13:24:32 +04:00
|
|
|
|
|
|
|
map = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
|
|
|
|
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
|
|
|
|
if (map == MAP_FAILED) {
|
2013-08-09 15:37:35 +04:00
|
|
|
pr_perror("Can't mmap memory for pagemap test");
|
2013-04-16 21:51:27 +04:00
|
|
|
return ret;
|
2013-04-15 13:24:32 +04:00
|
|
|
}
|
|
|
|
|
2013-07-02 09:51:33 +04:00
|
|
|
/*
|
|
|
|
* Kernel shows soft-dirty bits only if this soft-dirty
|
|
|
|
* was at least once re-set. (this is to be removed in
|
|
|
|
* a couple of kernel releases)
|
|
|
|
*/
|
|
|
|
do_task_reset_dirty_track(getpid());
|
|
|
|
pm2 = open("/proc/self/pagemap", O_RDONLY);
|
2013-04-15 13:24:32 +04:00
|
|
|
if (pm2 < 0) {
|
2013-07-02 09:51:33 +04:00
|
|
|
pr_perror("Can't open pagemap file");
|
2013-04-23 23:22:25 +04:00
|
|
|
munmap(map, PAGE_SIZE);
|
2013-04-16 21:51:27 +04:00
|
|
|
return ret;
|
2013-04-15 13:24:32 +04:00
|
|
|
}
|
|
|
|
|
2013-05-17 16:13:49 +04:00
|
|
|
map[0] = '\0';
|
|
|
|
|
2013-04-15 13:24:32 +04:00
|
|
|
lseek(pm2, (unsigned long)map / PAGE_SIZE * sizeof(u64), SEEK_SET);
|
2013-04-16 21:51:27 +04:00
|
|
|
ret = read(pm2, &pmap, sizeof(pmap));
|
2013-07-02 09:52:22 +04:00
|
|
|
if (ret < 0)
|
2013-04-16 21:51:27 +04:00
|
|
|
pr_perror("Read pmap err!");
|
|
|
|
|
2013-04-15 13:24:32 +04:00
|
|
|
close(pm2);
|
|
|
|
munmap(map, PAGE_SIZE);
|
|
|
|
|
|
|
|
if (pmap & PME_SOFT_DIRTY) {
|
|
|
|
pr_info("Dirty track supported on kernel\n");
|
2014-11-10 10:47:42 +04:00
|
|
|
kdat.has_dirty_track = true;
|
2014-01-16 14:43:00 +04:00
|
|
|
} else {
|
2013-07-02 09:15:50 +04:00
|
|
|
pr_info("Dirty tracking support is OFF\n");
|
2014-01-16 14:43:00 +04:00
|
|
|
if (opts.track_mem) {
|
|
|
|
pr_err("Tracking memory is not available\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
2013-04-15 13:24:32 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-10-04 16:18:24 +04:00
|
|
|
/*
|
|
|
|
* Strictly speaking, if there is a machine with huge amount
|
|
|
|
* of memory, we're allowed to send up to 4M and read up to
|
|
|
|
* 6M of tcp data at once. But we will figure out precise size
|
|
|
|
* of a limit a bit later when restore starts.
|
|
|
|
*
|
|
|
|
* Meanwhile set it up to 2M and 3M, which is safe enough to
|
|
|
|
* proceed without errors.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int tcp_read_sysctl_limits(void)
|
|
|
|
{
|
|
|
|
u32 vect[2][3] = { };
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
struct sysctl_req req[] = {
|
|
|
|
{ "net/ipv4/tcp_wmem", &vect[0], CTL_U32A(ARRAY_SIZE(vect[0])) },
|
|
|
|
{ "net/ipv4/tcp_rmem", &vect[1], CTL_U32A(ARRAY_SIZE(vect[1])) },
|
|
|
|
{ },
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lets figure out which exactly amount of memory is
|
|
|
|
* availabe for send/read queues on restore.
|
|
|
|
*/
|
|
|
|
ret = sysctl_op(req, CTL_READ);
|
2013-10-11 17:26:34 +04:00
|
|
|
if (ret) {
|
|
|
|
pr_warn("TCP mem sysctls are not available. Using defaults.\n");
|
|
|
|
goto out;
|
|
|
|
}
|
2013-10-04 16:18:24 +04:00
|
|
|
|
2014-11-10 10:47:42 +04:00
|
|
|
kdat.tcp_max_wshare = min(kdat.tcp_max_wshare, (int)vect[0][2]);
|
|
|
|
kdat.tcp_max_rshare = min(kdat.tcp_max_rshare, (int)vect[1][2]);
|
2013-10-04 16:18:24 +04:00
|
|
|
|
2014-11-10 10:47:42 +04:00
|
|
|
if (kdat.tcp_max_wshare < 128 || kdat.tcp_max_rshare < 128)
|
2013-10-04 16:18:24 +04:00
|
|
|
pr_warn("The memory limits for TCP queues are suspiciously small\n");
|
2013-10-11 17:26:34 +04:00
|
|
|
out:
|
2014-11-10 10:47:42 +04:00
|
|
|
pr_debug("TCP queue memory limits are %d:%d\n", kdat.tcp_max_wshare, kdat.tcp_max_rshare);
|
2013-10-04 16:18:24 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-01-30 14:31:39 +04:00
|
|
|
/* The page frame number (PFN) is constant for the zero page */
|
|
|
|
static int init_zero_page_pfn()
|
|
|
|
{
|
|
|
|
void *addr;
|
2014-01-30 23:34:53 +04:00
|
|
|
int ret;
|
2014-01-30 14:31:39 +04:00
|
|
|
|
|
|
|
addr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
|
|
if (addr == MAP_FAILED) {
|
|
|
|
pr_perror("Unable to map zero page");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*((int *) addr) != 0) {
|
|
|
|
BUG();
|
2014-01-30 23:34:53 +04:00
|
|
|
return -1;
|
2014-01-30 14:31:39 +04:00
|
|
|
}
|
|
|
|
|
2014-11-10 10:47:42 +04:00
|
|
|
ret = vaddr_to_pfn((unsigned long)addr, &kdat.zero_page_pfn);
|
2014-01-30 23:34:53 +04:00
|
|
|
munmap(addr, PAGE_SIZE);
|
2014-01-30 14:31:39 +04:00
|
|
|
|
2014-11-10 10:47:42 +04:00
|
|
|
if (kdat.zero_page_pfn == 0)
|
2014-01-30 23:34:53 +04:00
|
|
|
ret = -1;
|
2014-01-30 14:31:39 +04:00
|
|
|
|
2014-01-30 23:34:53 +04:00
|
|
|
return ret;
|
2014-01-30 14:31:39 +04:00
|
|
|
}
|
|
|
|
|
2014-08-09 09:22:00 +04:00
|
|
|
int get_last_cap(void)
|
|
|
|
{
|
|
|
|
struct sysctl_req req[] = {
|
2014-11-10 10:47:42 +04:00
|
|
|
{ "kernel/cap_last_cap", &kdat.last_cap, CTL_U32 },
|
2014-08-09 09:22:00 +04:00
|
|
|
{ },
|
|
|
|
};
|
|
|
|
|
|
|
|
return sysctl_op(req, CTL_READ);
|
|
|
|
}
|
|
|
|
|
2014-10-14 13:10:53 +04:00
|
|
|
static bool kerndat_has_memfd_create(void)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = sys_memfd_create(NULL, 0);
|
|
|
|
|
|
|
|
if (ret == -ENOSYS)
|
2014-11-10 10:47:42 +04:00
|
|
|
kdat.has_memfd = false;
|
2014-10-14 13:10:53 +04:00
|
|
|
else if (ret == -EFAULT)
|
2014-11-10 10:47:42 +04:00
|
|
|
kdat.has_memfd = true;
|
2014-10-14 13:10:53 +04:00
|
|
|
else {
|
|
|
|
pr_err("Unexpected error %d from memfd_create(NULL, 0)\n", ret);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-04-15 13:02:09 +04:00
|
|
|
int kerndat_init(void)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = kerndat_get_shmemdev();
|
2013-04-15 13:24:32 +04:00
|
|
|
if (!ret)
|
|
|
|
ret = kerndat_get_dirty_track();
|
2014-01-30 14:31:39 +04:00
|
|
|
if (!ret)
|
|
|
|
ret = init_zero_page_pfn();
|
2014-08-14 06:47:00 +04:00
|
|
|
if (!ret)
|
|
|
|
ret = get_last_cap();
|
2013-04-15 13:02:09 +04:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2013-10-11 17:38:57 +04:00
|
|
|
|
|
|
|
int kerndat_init_rst(void)
|
|
|
|
{
|
2013-10-12 00:03:25 +04:00
|
|
|
int ret;
|
|
|
|
|
2013-10-11 17:38:57 +04:00
|
|
|
/*
|
|
|
|
* Read TCP sysctls before anything else,
|
|
|
|
* since the limits we're interested in are
|
|
|
|
* not available inside namespaces.
|
|
|
|
*/
|
|
|
|
|
2013-10-12 00:03:25 +04:00
|
|
|
ret = tcp_read_sysctl_limits();
|
|
|
|
if (!ret)
|
|
|
|
ret = get_last_cap();
|
2014-10-27 15:17:00 +04:00
|
|
|
if (!ret)
|
|
|
|
ret = kerndat_has_memfd_create();
|
2013-10-12 00:03:25 +04:00
|
|
|
|
|
|
|
return ret;
|
2013-10-11 17:38:57 +04:00
|
|
|
}
|