2012-12-24 15:36:14 +04:00
|
|
|
#ifndef __CR_IMAGE_H__
|
|
|
|
#define __CR_IMAGE_H__
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2013-04-01 19:27:34 +04:00
|
|
|
#include <stdbool.h>
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2013-04-01 19:27:34 +04:00
|
|
|
#include "compiler.h"
|
2013-11-06 17:21:13 +04:00
|
|
|
#include "servicefd.h"
|
2013-05-28 22:12:06 +04:00
|
|
|
#include "image-desc.h"
|
2014-03-14 17:57:00 +04:00
|
|
|
#include "fcntl.h"
|
2013-03-05 02:24:30 +04:00
|
|
|
#include "magic.h"
|
2014-09-29 12:50:13 +04:00
|
|
|
#include "bfd.h"
|
|
|
|
#include "bug.h"
|
2012-08-02 08:26:43 +04:00
|
|
|
|
2015-04-30 09:57:49 +03:00
|
|
|
#ifdef _ARCH_PPC64
|
|
|
|
#define PAGE_IMAGE_SIZE 65536
|
|
|
|
#else
|
2011-09-23 12:00:45 +04:00
|
|
|
#define PAGE_IMAGE_SIZE 4096
|
2015-04-30 09:57:49 +03:00
|
|
|
#endif /* _ARCH_PPC64 */
|
2011-09-23 12:00:45 +04:00
|
|
|
#define PAGE_RSS 1
|
2011-11-25 18:04:36 +04:00
|
|
|
#define PAGE_ANON 2
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-04-13 17:54:36 +04:00
|
|
|
/*
|
|
|
|
* Top bit set in the tgt id means we've remapped
|
|
|
|
* to a ghost file.
|
|
|
|
*/
|
|
|
|
#define REMAP_GHOST (1 << 31)
|
|
|
|
|
2013-09-10 12:24:11 +04:00
|
|
|
/*
|
|
|
|
* By-default, when dumping a unix socket, we should dump its peer
|
|
|
|
* as well. Which in turn means, we should dump the task(s) that have
|
|
|
|
* this peer opened.
|
|
|
|
*
|
|
|
|
* Sometimes, we can break this rule and dump only one end of the
|
|
|
|
* unix sockets pair, and on restore time connect() this end back to
|
|
|
|
* its peer.
|
|
|
|
*
|
|
|
|
* So, to resolve this situation we mark the peers we don't dump
|
|
|
|
* as "external" and require the --ext-unix-sk option.
|
|
|
|
*/
|
|
|
|
|
2012-04-19 11:48:00 +04:00
|
|
|
#define USK_EXTERN (1 << 0)
|
2013-09-13 13:44:15 +04:00
|
|
|
#define USK_SERVICE (1 << 1)
|
2013-12-19 21:35:01 +04:00
|
|
|
#define USK_CALLBACK (1 << 2)
|
2015-07-29 15:30:24 +03:00
|
|
|
#define USK_INHERIT (1 << 3)
|
2012-04-19 11:48:00 +04:00
|
|
|
|
2015-02-25 18:42:00 +03:00
|
|
|
/*
|
|
|
|
* VMA_AREA status:
|
|
|
|
*
|
|
|
|
* - none
|
|
|
|
* VmaEntry is just allocated and has not been used
|
|
|
|
* for anything yet
|
|
|
|
* - regular
|
|
|
|
* VmaEntry represent some memory area which should be
|
|
|
|
* dumped and restored; this is a general sign that we
|
|
|
|
* should not skip the area content from processing in
|
|
|
|
* compare with special areas such as vsyscall
|
|
|
|
* - stack
|
|
|
|
* the memory area is used in application stack so we
|
|
|
|
* should be careful about guard page here
|
|
|
|
* - vsyscall
|
|
|
|
* special memory area injected into the task memory
|
|
|
|
* space by the kernel itself, represent virtual syscall
|
|
|
|
* implementation and it is specific to every kernel version,
|
|
|
|
* its contents should not be dumped ever
|
|
|
|
* - vdso,vvar
|
|
|
|
* the vDSO area, it might reqire additional memory
|
|
|
|
* contents modification especially when tasks are
|
|
|
|
* migrating between different kernel versions
|
|
|
|
* - heap
|
|
|
|
* "heap" area in application, currently for inforamtion only
|
|
|
|
* - file private
|
|
|
|
* stands for privately memory mapped files
|
|
|
|
* - file shared
|
|
|
|
* stands for shared memory mapped files
|
|
|
|
* - anon shared
|
|
|
|
* represent shared anonymous memory areas
|
|
|
|
* - anon private
|
|
|
|
* represent private anonymous memory areas
|
|
|
|
* - SysV IPC
|
|
|
|
* IPC shared memory area
|
|
|
|
* - socket
|
|
|
|
* memory map for socket
|
|
|
|
* - AIO ring
|
|
|
|
* memory area serves AIO buffers
|
|
|
|
* - unsupported
|
|
|
|
* stands for any unknown memory areas, usually means
|
|
|
|
* we don't know how to work with it and should stop
|
|
|
|
* processing exiting with error; while the rest of bits
|
|
|
|
* are part of image ABI, this particular one must never
|
|
|
|
* be used in image.
|
|
|
|
*/
|
2011-11-15 13:37:17 +04:00
|
|
|
#define VMA_AREA_NONE (0 << 0)
|
2015-02-25 18:42:00 +03:00
|
|
|
#define VMA_AREA_REGULAR (1 << 0)
|
2011-11-15 11:57:24 +04:00
|
|
|
#define VMA_AREA_STACK (1 << 1)
|
|
|
|
#define VMA_AREA_VSYSCALL (1 << 2)
|
|
|
|
#define VMA_AREA_VDSO (1 << 3)
|
|
|
|
#define VMA_AREA_HEAP (1 << 5)
|
|
|
|
|
|
|
|
#define VMA_FILE_PRIVATE (1 << 6)
|
|
|
|
#define VMA_FILE_SHARED (1 << 7)
|
|
|
|
#define VMA_ANON_SHARED (1 << 8)
|
|
|
|
#define VMA_ANON_PRIVATE (1 << 9)
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-02-14 20:19:49 +03:00
|
|
|
#define VMA_AREA_SYSVIPC (1 << 10)
|
2012-11-02 16:00:18 +03:00
|
|
|
#define VMA_AREA_SOCKET (1 << 11)
|
2014-06-20 19:35:04 +04:00
|
|
|
#define VMA_AREA_VVAR (1 << 12)
|
2014-12-19 16:01:54 +03:00
|
|
|
#define VMA_AREA_AIORING (1 << 13)
|
2012-02-14 20:19:49 +03:00
|
|
|
|
2015-02-25 18:42:00 +03:00
|
|
|
#define VMA_UNSUPP (1 << 31)
|
2014-05-19 23:02:00 +04:00
|
|
|
|
2012-01-27 21:43:32 +04:00
|
|
|
#define CR_CAP_SIZE 2
|
|
|
|
|
2011-10-01 13:24:34 +04:00
|
|
|
#define TASK_COMM_LEN 16
|
2011-09-23 12:00:45 +04:00
|
|
|
|
ctrools: Rewrite task/threads stopping engine is back
This commit brings the former "Rewrite task/threads stopping engine"
commit back. Handling it separately is too complex so better try
to handle it in-place.
Note some tests might fault, it's expected.
---
Stopping tasks with STOP and proceeding with SEIZE is actually excessive --
the SEIZE if enough. Moreover, just killing a task with STOP is also racy,
since task should be given some time to come to sleep before its proc
can be parsed.
Rewrite all this code to SEIZE task and all its threads from the very beginning.
With this we can distinguish stopped task state and migrate it properly (not
supported now, need to implement).
This thing however has one BIG problem -- after we SEIZE-d a task we should
seize
it's threads, but we should do it in a loop -- reading /proc/pid/task and
seizing
them again and again, until the contents of this dir stops changing (not done
now).
Besides, after we seized a task and all its threads we cannot scan it's children
list once -- task can get reparented to init and any task's child can call clone
with CLONE_PARENT flag thus repopulating the children list of the already seized
task (not done also)
This patch is ugly, yes, but splitting it doesn't help to review it much, sorry
:(
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
2012-02-01 19:45:31 +04:00
|
|
|
#define TASK_ALIVE 0x1
|
|
|
|
#define TASK_DEAD 0x2
|
2013-11-15 00:27:45 +04:00
|
|
|
#define TASK_STOPPED 0x3
|
2012-06-22 00:38:00 +04:00
|
|
|
#define TASK_HELPER 0x4
|
2012-01-22 20:24:04 +04:00
|
|
|
|
2013-11-05 12:33:04 +04:00
|
|
|
#define CR_PARENT_LINK "parent"
|
|
|
|
|
2013-01-11 18:16:25 +04:00
|
|
|
extern bool fdinfo_per_id;
|
2013-09-30 17:16:51 +04:00
|
|
|
extern bool ns_per_id;
|
2015-04-08 16:37:28 +03:00
|
|
|
extern bool img_common_magic;
|
2013-01-11 18:16:25 +04:00
|
|
|
|
2015-04-08 16:37:28 +03:00
|
|
|
#define O_NOBUF (O_DIRECT)
|
|
|
|
#define O_SERVICE (O_DIRECTORY)
|
|
|
|
#define O_DUMP (O_WRONLY | O_CREAT | O_TRUNC)
|
|
|
|
#define O_SHOW (O_RDONLY | O_NOBUF)
|
|
|
|
#define O_RSTR (O_RDONLY)
|
2013-11-06 14:34:30 +04:00
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
struct cr_img {
|
2015-03-10 21:47:27 +03:00
|
|
|
union {
|
|
|
|
struct bfd _x;
|
|
|
|
struct {
|
|
|
|
int fd; /* should be first to coincide with _x.fd */
|
|
|
|
int type;
|
|
|
|
unsigned long oflags;
|
|
|
|
char *path;
|
|
|
|
};
|
|
|
|
};
|
2014-09-29 12:48:53 +04:00
|
|
|
};
|
|
|
|
|
2015-03-06 18:02:43 +03:00
|
|
|
#define EMPTY_IMG_FD (-404)
|
2015-03-10 21:47:27 +03:00
|
|
|
#define LAZY_IMG_FD (-505)
|
2015-03-06 18:02:43 +03:00
|
|
|
|
|
|
|
static inline bool empty_image(struct cr_img *img)
|
|
|
|
{
|
|
|
|
return img && img->_x.fd == EMPTY_IMG_FD;
|
|
|
|
}
|
|
|
|
|
2015-03-10 21:47:27 +03:00
|
|
|
static inline bool lazy_image(struct cr_img *img)
|
|
|
|
{
|
|
|
|
return img->_x.fd == LAZY_IMG_FD;
|
|
|
|
}
|
|
|
|
|
2015-03-16 16:51:00 +03:00
|
|
|
extern int open_image_lazy(struct cr_img *img);
|
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
static inline int img_raw_fd(struct cr_img *img)
|
|
|
|
{
|
2015-03-16 16:51:00 +03:00
|
|
|
if (lazy_image(img) && open_image_lazy(img))
|
|
|
|
return -1;
|
|
|
|
|
2014-09-29 12:50:13 +04:00
|
|
|
BUG_ON(bfd_buffered(&img->_x));
|
|
|
|
return img->_x.fd;
|
2014-09-29 12:48:53 +04:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:04:27 +04:00
|
|
|
extern int open_image_dir(char *dir);
|
2013-11-06 14:34:30 +04:00
|
|
|
extern void close_image_dir(void);
|
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
extern struct cr_img *open_image_at(int dfd, int type, unsigned long flags, ...);
|
2015-03-06 00:00:00 +03:00
|
|
|
#define open_image(typ, flags, ...) open_image_at(-1, typ, flags, ##__VA_ARGS__)
|
2015-03-10 21:47:27 +03:00
|
|
|
extern int open_image_lazy(struct cr_img *img);
|
2014-09-29 12:48:53 +04:00
|
|
|
extern struct cr_img *open_pages_image(unsigned long flags, struct cr_img *pmi);
|
|
|
|
extern struct cr_img *open_pages_image_at(int dfd, unsigned long flags, struct cr_img *pmi);
|
2013-11-15 14:04:48 +04:00
|
|
|
extern void up_page_ids_base(void);
|
2013-11-06 14:34:30 +04:00
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
extern struct cr_img *img_from_fd(int fd); /* for cr-show mostly */
|
|
|
|
|
|
|
|
extern int write_img_buf(struct cr_img *, const void *ptr, int size);
|
|
|
|
#define write_img(img, ptr) write_img_buf((img), (ptr), sizeof(*(ptr)))
|
|
|
|
extern int read_img_buf_eof(struct cr_img *, void *ptr, int size);
|
|
|
|
#define read_img_eof(img, ptr) read_img_buf_eof((img), (ptr), sizeof(*(ptr)))
|
|
|
|
extern int read_img_buf(struct cr_img *, void *ptr, int size);
|
|
|
|
#define read_img(img, ptr) read_img_buf((img), (ptr), sizeof(*(ptr)))
|
|
|
|
extern int read_img_str(struct cr_img *, char **pstr, int size);
|
|
|
|
|
|
|
|
extern void close_image(struct cr_img *);
|
2014-09-29 12:47:21 +04:00
|
|
|
|
2012-12-25 22:40:24 +04:00
|
|
|
#endif /* __CR_IMAGE_H__ */
|