2012-04-28 17:29:14 +04:00
|
|
|
#include <netinet/tcp.h>
|
2012-04-28 17:46:12 +04:00
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <linux/sockios.h>
|
|
|
|
#include <unistd.h>
|
2012-05-29 20:11:00 +04:00
|
|
|
#include <stdlib.h>
|
2012-09-17 20:02:57 +04:00
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <string.h>
|
2012-04-28 17:46:12 +04:00
|
|
|
|
2013-11-06 17:21:11 +04:00
|
|
|
#include "cr_options.h"
|
2012-04-28 17:46:12 +04:00
|
|
|
#include "util.h"
|
|
|
|
#include "list.h"
|
|
|
|
#include "log.h"
|
2013-01-09 17:02:47 +04:00
|
|
|
#include "asm/types.h"
|
2012-04-28 17:46:12 +04:00
|
|
|
#include "files.h"
|
2012-04-28 17:29:14 +04:00
|
|
|
#include "sockets.h"
|
|
|
|
#include "sk-inet.h"
|
2012-04-28 17:46:12 +04:00
|
|
|
#include "netfilter.h"
|
|
|
|
#include "image.h"
|
2013-01-17 18:14:55 +04:00
|
|
|
#include "namespaces.h"
|
2013-07-05 15:04:57 +04:00
|
|
|
#include "xmalloc.h"
|
2013-05-20 16:02:14 +04:00
|
|
|
#include "config.h"
|
2013-09-19 12:16:07 +04:00
|
|
|
#include "cr-show.h"
|
2013-10-04 16:18:24 +04:00
|
|
|
#include "kerndat.h"
|
2015-06-25 15:36:44 +03:00
|
|
|
#include "rst-malloc.h"
|
2012-04-28 17:46:12 +04:00
|
|
|
|
2012-07-13 21:05:00 +04:00
|
|
|
#include "protobuf.h"
|
|
|
|
#include "protobuf/tcp-stream.pb-c.h"
|
|
|
|
|
2015-01-12 15:57:00 +03:00
|
|
|
#ifndef SIOCOUTQNSD
|
|
|
|
/* MAO - Define SIOCOUTQNSD ioctl if we don't have it */
|
|
|
|
#define SIOCOUTQNSD 0x894B
|
|
|
|
#endif
|
|
|
|
|
2013-05-20 16:02:14 +04:00
|
|
|
#ifndef CONFIG_HAS_TCP_REPAIR
|
|
|
|
/*
|
|
|
|
* It's been reported that both tcp_repair_opt
|
|
|
|
* and TCP_ enum already shipped in netinet/tcp.h
|
|
|
|
* system header by some distros thus we need a
|
|
|
|
* test if we can use predefined ones or provide
|
|
|
|
* our own.
|
|
|
|
*/
|
2012-04-28 17:46:12 +04:00
|
|
|
struct tcp_repair_opt {
|
|
|
|
u32 opt_code;
|
|
|
|
u32 opt_val;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
TCP_NO_QUEUE,
|
|
|
|
TCP_RECV_QUEUE,
|
|
|
|
TCP_SEND_QUEUE,
|
|
|
|
TCP_QUEUES_NR,
|
|
|
|
};
|
2013-05-20 16:02:14 +04:00
|
|
|
#endif
|
2012-04-28 17:46:12 +04:00
|
|
|
|
2013-02-14 20:27:39 +04:00
|
|
|
#ifndef TCP_TIMESTAMP
|
|
|
|
#define TCP_TIMESTAMP 24
|
|
|
|
#endif
|
|
|
|
|
2012-04-28 17:46:12 +04:00
|
|
|
#ifndef TCPOPT_SACK_PERM
|
|
|
|
#define TCPOPT_SACK_PERM TCPOPT_SACK_PERMITTED
|
|
|
|
#endif
|
|
|
|
|
2012-09-17 20:05:32 +04:00
|
|
|
static LIST_HEAD(cpt_tcp_repair_sockets);
|
|
|
|
static LIST_HEAD(rst_tcp_repair_sockets);
|
2012-04-28 17:46:12 +04:00
|
|
|
|
|
|
|
static int tcp_repair_on(int fd)
|
|
|
|
{
|
|
|
|
int ret, aux = 1;
|
|
|
|
|
|
|
|
ret = setsockopt(fd, SOL_TCP, TCP_REPAIR, &aux, sizeof(aux));
|
|
|
|
if (ret < 0)
|
2012-05-03 15:21:37 +04:00
|
|
|
pr_perror("Can't turn TCP repair mode ON");
|
2012-04-28 17:46:12 +04:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-11-20 17:17:30 +04:00
|
|
|
static int refresh_inet_sk(struct inet_sk_desc *sk)
|
|
|
|
{
|
|
|
|
int size;
|
2012-11-20 17:17:51 +04:00
|
|
|
struct tcp_info info;
|
|
|
|
|
|
|
|
if (dump_opt(sk->rfd, SOL_TCP, TCP_INFO, &info)) {
|
2013-04-12 13:00:05 -07:00
|
|
|
pr_perror("Failed to obtain TCP_INFO");
|
2012-11-20 17:17:51 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (info.tcpi_state) {
|
|
|
|
case TCP_ESTABLISHED:
|
|
|
|
case TCP_CLOSE:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
pr_err("Unknown state %d\n", sk->state);
|
|
|
|
return -1;
|
|
|
|
}
|
2012-11-20 17:17:30 +04:00
|
|
|
|
|
|
|
if (ioctl(sk->rfd, SIOCOUTQ, &size) == -1) {
|
|
|
|
pr_perror("Unable to get size of snd queue");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
sk->wqlen = size;
|
|
|
|
|
2013-11-14 01:01:56 +04:00
|
|
|
if (ioctl(sk->rfd, SIOCOUTQNSD, &size) == -1) {
|
|
|
|
pr_perror("Unable to get size of unsent data");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
sk->uwqlen = size;
|
|
|
|
|
2012-11-20 17:17:30 +04:00
|
|
|
if (ioctl(sk->rfd, SIOCINQ, &size) == -1) {
|
|
|
|
pr_perror("Unable to get size of recv queue");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
sk->rqlen = size;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-04-28 17:46:12 +04:00
|
|
|
static int tcp_repair_establised(int fd, struct inet_sk_desc *sk)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
pr_info("\tTurning repair on for socket %x\n", sk->sd.ino);
|
|
|
|
/*
|
2013-05-09 10:58:04 -07:00
|
|
|
* Keep the socket open in criu till the very end. In
|
2012-04-28 17:46:12 +04:00
|
|
|
* case we close this fd after one task fd dumping and
|
|
|
|
* fail we'll have to turn repair mode off
|
|
|
|
*/
|
|
|
|
sk->rfd = dup(fd);
|
|
|
|
if (sk->rfd < 0) {
|
|
|
|
pr_perror("Can't save socket fd for repair");
|
|
|
|
goto err1;
|
|
|
|
}
|
|
|
|
|
2014-04-21 18:23:22 +04:00
|
|
|
if (!(root_ns_mask & CLONE_NEWNET)) {
|
2012-09-17 20:05:49 +04:00
|
|
|
ret = nf_lock_connection(sk);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err2;
|
|
|
|
}
|
2012-04-28 17:46:12 +04:00
|
|
|
|
|
|
|
ret = tcp_repair_on(sk->rfd);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err3;
|
|
|
|
|
2012-09-17 20:05:32 +04:00
|
|
|
list_add_tail(&sk->rlist, &cpt_tcp_repair_sockets);
|
2012-11-20 17:17:30 +04:00
|
|
|
|
|
|
|
ret = refresh_inet_sk(sk);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err1;
|
|
|
|
|
2012-04-28 17:46:12 +04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
err3:
|
2014-04-21 18:23:22 +04:00
|
|
|
if (!(root_ns_mask & CLONE_NEWNET))
|
2012-09-17 20:05:49 +04:00
|
|
|
nf_unlock_connection(sk);
|
2012-04-28 17:46:12 +04:00
|
|
|
err2:
|
|
|
|
close(sk->rfd);
|
|
|
|
err1:
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void tcp_unlock_one(struct inet_sk_desc *sk)
|
|
|
|
{
|
2012-05-18 15:39:00 +04:00
|
|
|
int ret;
|
2012-04-28 17:46:12 +04:00
|
|
|
|
|
|
|
list_del(&sk->rlist);
|
|
|
|
|
2014-04-21 18:23:22 +04:00
|
|
|
if (!(root_ns_mask & CLONE_NEWNET)) {
|
2013-10-31 20:12:55 +04:00
|
|
|
ret = nf_unlock_connection(sk);
|
|
|
|
if (ret < 0)
|
|
|
|
pr_perror("Failed to unlock TCP connection");
|
|
|
|
}
|
2012-04-28 17:46:12 +04:00
|
|
|
|
|
|
|
tcp_repair_off(sk->rfd);
|
2013-07-16 14:48:24 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* tcp_repair_off modifies SO_REUSEADDR so
|
|
|
|
* don't forget to restore original value.
|
|
|
|
*/
|
|
|
|
restore_opt(sk->rfd, SOL_SOCKET, SO_REUSEADDR, &sk->cpt_reuseaddr);
|
|
|
|
|
2012-04-28 17:46:12 +04:00
|
|
|
close(sk->rfd);
|
|
|
|
}
|
2012-04-28 17:29:14 +04:00
|
|
|
|
2012-09-17 20:07:03 +04:00
|
|
|
void cpt_unlock_tcp_connections(void)
|
2012-04-28 17:38:46 +04:00
|
|
|
{
|
2012-04-28 17:46:12 +04:00
|
|
|
struct inet_sk_desc *sk, *n;
|
|
|
|
|
2012-09-17 20:05:32 +04:00
|
|
|
list_for_each_entry_safe(sk, n, &cpt_tcp_repair_sockets, rlist)
|
2012-04-28 17:46:12 +04:00
|
|
|
tcp_unlock_one(sk);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TCP queues sequences and their relations to the code below
|
|
|
|
*
|
|
|
|
* output queue
|
|
|
|
* net <----------------------------- sk
|
|
|
|
* ^ ^ ^ seq >>
|
|
|
|
* snd_una snd_nxt write_seq
|
|
|
|
*
|
|
|
|
* input queue
|
|
|
|
* net -----------------------------> sk
|
|
|
|
* << seq ^ ^
|
|
|
|
* rcv_nxt copied_seq
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* inq_len = rcv_nxt - copied_seq = SIOCINQ
|
|
|
|
* outq_len = write_seq - snd_una = SIOCOUTQ
|
|
|
|
* inq_seq = rcv_nxt
|
|
|
|
* outq_seq = write_seq
|
|
|
|
*
|
|
|
|
* On restore kernel moves the option we configure with setsockopt,
|
|
|
|
* thus we should advance them on the _len value in restore_tcp_seqs.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int tcp_stream_get_queue(int sk, int queue_id,
|
|
|
|
u32 *seq, u32 len, char **bufp)
|
|
|
|
{
|
|
|
|
int ret, aux;
|
|
|
|
socklen_t auxl;
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
pr_debug("\tSet repair queue %d\n", queue_id);
|
|
|
|
aux = queue_id;
|
|
|
|
auxl = sizeof(aux);
|
|
|
|
ret = setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &aux, auxl);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_sopt;
|
|
|
|
|
|
|
|
pr_debug("\tGet queue seq\n");
|
|
|
|
auxl = sizeof(*seq);
|
|
|
|
ret = getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, seq, &auxl);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_sopt;
|
|
|
|
|
|
|
|
pr_info("\t`- seq %u len %u\n", *seq, len);
|
|
|
|
|
|
|
|
if (len) {
|
|
|
|
/*
|
|
|
|
* Try to grab one byte more from the queue to
|
|
|
|
* make sure there are len bytes for real
|
|
|
|
*/
|
|
|
|
buf = xmalloc(len + 1);
|
|
|
|
if (!buf)
|
|
|
|
goto err_buf;
|
|
|
|
|
|
|
|
pr_debug("\tReading queue (%d bytes)\n", len);
|
|
|
|
ret = recv(sk, buf, len + 1, MSG_PEEK | MSG_DONTWAIT);
|
|
|
|
if (ret != len)
|
|
|
|
goto err_recv;
|
|
|
|
} else
|
|
|
|
buf = NULL;
|
|
|
|
|
|
|
|
*bufp = buf;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_sopt:
|
|
|
|
pr_perror("\tsockopt failed");
|
|
|
|
err_buf:
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
err_recv:
|
|
|
|
pr_perror("\trecv failed (%d, want %d, errno %d)", ret, len, errno);
|
|
|
|
xfree(buf);
|
|
|
|
goto err_buf;
|
|
|
|
}
|
|
|
|
|
2012-07-13 21:05:00 +04:00
|
|
|
static int tcp_stream_get_options(int sk, TcpStreamEntry *tse)
|
2012-04-28 17:46:12 +04:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
socklen_t auxl;
|
|
|
|
struct tcp_info ti;
|
2013-02-14 20:27:39 +04:00
|
|
|
int val;
|
2012-04-28 17:46:12 +04:00
|
|
|
|
|
|
|
auxl = sizeof(ti);
|
|
|
|
ret = getsockopt(sk, SOL_TCP, TCP_INFO, &ti, &auxl);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_sopt;
|
|
|
|
|
|
|
|
auxl = sizeof(tse->mss_clamp);
|
|
|
|
ret = getsockopt(sk, SOL_TCP, TCP_MAXSEG, &tse->mss_clamp, &auxl);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_sopt;
|
|
|
|
|
|
|
|
tse->opt_mask = ti.tcpi_options;
|
2012-09-19 16:17:11 +04:00
|
|
|
if (ti.tcpi_options & TCPI_OPT_WSCALE) {
|
2012-04-28 17:46:12 +04:00
|
|
|
tse->snd_wscale = ti.tcpi_snd_wscale;
|
2012-09-19 16:17:11 +04:00
|
|
|
tse->rcv_wscale = ti.tcpi_rcv_wscale;
|
|
|
|
tse->has_rcv_wscale = true;
|
|
|
|
}
|
2012-04-28 17:46:12 +04:00
|
|
|
|
2013-02-14 20:27:39 +04:00
|
|
|
if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS) {
|
|
|
|
auxl = sizeof(val);
|
|
|
|
ret = getsockopt(sk, SOL_TCP, TCP_TIMESTAMP, &val, &auxl);
|
2013-04-30 18:39:07 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto err_sopt;
|
|
|
|
|
|
|
|
tse->has_timestamp = true;
|
|
|
|
tse->timestamp = val;
|
2013-02-14 20:27:39 +04:00
|
|
|
}
|
|
|
|
|
2012-04-28 17:46:12 +04:00
|
|
|
pr_info("\toptions: mss_clamp %x wscale %x tstamp %d sack %d\n",
|
|
|
|
(int)tse->mss_clamp,
|
|
|
|
ti.tcpi_options & TCPI_OPT_WSCALE ? (int)tse->snd_wscale : -1,
|
|
|
|
ti.tcpi_options & TCPI_OPT_TIMESTAMPS ? 1 : 0,
|
|
|
|
ti.tcpi_options & TCPI_OPT_SACK ? 1 : 0);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_sopt:
|
|
|
|
pr_perror("\tsockopt failed");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dump_tcp_conn_state(struct inet_sk_desc *sk)
|
|
|
|
{
|
2014-09-29 12:48:53 +04:00
|
|
|
int ret, aux;
|
|
|
|
struct cr_img *img;
|
2012-07-13 21:05:00 +04:00
|
|
|
TcpStreamEntry tse = TCP_STREAM_ENTRY__INIT;
|
2012-04-28 17:46:12 +04:00
|
|
|
char *in_buf, *out_buf;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read queue
|
|
|
|
*/
|
|
|
|
|
|
|
|
pr_info("Reading inq for socket\n");
|
|
|
|
tse.inq_len = sk->rqlen;
|
|
|
|
ret = tcp_stream_get_queue(sk->rfd, TCP_RECV_QUEUE,
|
|
|
|
&tse.inq_seq, tse.inq_len, &in_buf);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_in;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write queue
|
|
|
|
*/
|
|
|
|
|
|
|
|
pr_info("Reading outq for socket\n");
|
|
|
|
tse.outq_len = sk->wqlen;
|
2013-11-14 01:01:56 +04:00
|
|
|
tse.unsq_len = sk->uwqlen;
|
|
|
|
tse.has_unsq_len = true;
|
2012-04-28 17:46:12 +04:00
|
|
|
ret = tcp_stream_get_queue(sk->rfd, TCP_SEND_QUEUE,
|
|
|
|
&tse.outq_seq, tse.outq_len, &out_buf);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_out;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initial options
|
|
|
|
*/
|
|
|
|
|
2013-04-12 13:00:05 -07:00
|
|
|
pr_info("Reading options for socket\n");
|
2012-04-28 17:46:12 +04:00
|
|
|
ret = tcp_stream_get_options(sk->rfd, &tse);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_opt;
|
|
|
|
|
2013-09-25 11:43:02 +04:00
|
|
|
/*
|
|
|
|
* TCP socket options
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (dump_opt(sk->rfd, SOL_TCP, TCP_NODELAY, &aux))
|
|
|
|
goto err_opt;
|
|
|
|
|
|
|
|
if (aux) {
|
|
|
|
tse.has_nodelay = true;
|
|
|
|
tse.nodelay = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dump_opt(sk->rfd, SOL_TCP, TCP_CORK, &aux))
|
|
|
|
goto err_opt;
|
|
|
|
|
|
|
|
if (aux) {
|
|
|
|
tse.has_cork = true;
|
|
|
|
tse.cork = true;
|
|
|
|
}
|
|
|
|
|
2012-04-28 17:46:12 +04:00
|
|
|
/*
|
|
|
|
* Push the stuff to image
|
|
|
|
*/
|
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
img = open_image(CR_FD_TCP_STREAM, O_DUMP, sk->sd.ino);
|
|
|
|
if (!img)
|
2012-04-28 17:46:12 +04:00
|
|
|
goto err_img;
|
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
ret = pb_write_one(img, &tse, PB_TCP_STREAM);
|
2012-04-28 17:46:12 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto err_iw;
|
|
|
|
|
|
|
|
if (in_buf) {
|
2014-09-29 12:48:53 +04:00
|
|
|
ret = write_img_buf(img, in_buf, tse.inq_len);
|
2012-04-28 17:46:12 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto err_iw;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (out_buf) {
|
2014-09-29 12:48:53 +04:00
|
|
|
ret = write_img_buf(img, out_buf, tse.outq_len);
|
2012-04-28 17:46:12 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto err_iw;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_info("Done\n");
|
|
|
|
err_iw:
|
2014-09-29 12:48:53 +04:00
|
|
|
close_image(img);
|
2012-04-28 17:46:12 +04:00
|
|
|
err_img:
|
|
|
|
err_opt:
|
|
|
|
xfree(out_buf);
|
|
|
|
err_out:
|
|
|
|
xfree(in_buf);
|
|
|
|
err_in:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int dump_one_tcp(int fd, struct inet_sk_desc *sk)
|
|
|
|
{
|
2012-10-30 22:25:57 +03:00
|
|
|
if (sk->state != TCP_ESTABLISHED)
|
|
|
|
return 0;
|
|
|
|
|
2012-04-28 17:46:12 +04:00
|
|
|
pr_info("Dumping TCP connection\n");
|
|
|
|
|
|
|
|
if (tcp_repair_establised(fd, sk))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (dump_tcp_conn_state(sk))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Socket is left in repair mode, so that at the end it's just
|
|
|
|
* closed and the connection is silently terminated
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int set_tcp_queue_seq(int sk, int queue, u32 seq)
|
|
|
|
{
|
|
|
|
pr_debug("\tSetting %d queue seq to %u\n", queue, seq);
|
|
|
|
|
|
|
|
if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
|
|
|
|
pr_perror("Can't set repair queue");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (setsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &seq, sizeof(seq)) < 0) {
|
|
|
|
pr_perror("Can't set queue seq");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-07-13 21:05:00 +04:00
|
|
|
static int restore_tcp_seqs(int sk, TcpStreamEntry *tse)
|
2012-04-28 17:46:12 +04:00
|
|
|
{
|
|
|
|
if (set_tcp_queue_seq(sk, TCP_RECV_QUEUE,
|
|
|
|
tse->inq_seq - tse->inq_len))
|
|
|
|
return -1;
|
|
|
|
if (set_tcp_queue_seq(sk, TCP_SEND_QUEUE,
|
|
|
|
tse->outq_seq - tse->outq_len))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
static int __send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
|
2012-04-28 17:46:12 +04:00
|
|
|
{
|
2013-04-10 00:56:20 +04:00
|
|
|
int ret, err = -1;
|
2014-11-21 17:10:00 +03:00
|
|
|
int off;
|
2012-04-28 17:46:12 +04:00
|
|
|
char *buf;
|
|
|
|
|
|
|
|
buf = xmalloc(len);
|
|
|
|
if (!buf)
|
|
|
|
return -1;
|
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
if (read_img_buf(img, buf, len) < 0)
|
2013-04-10 00:56:20 +04:00
|
|
|
goto err;
|
2012-04-28 17:46:12 +04:00
|
|
|
|
2013-10-04 16:18:24 +04:00
|
|
|
off = 0;
|
|
|
|
while (len) {
|
2014-11-21 17:10:00 +03:00
|
|
|
int chunk = len;
|
|
|
|
|
|
|
|
if (queue == TCP_RECV_QUEUE && len > kdat.tcp_max_rshare)
|
|
|
|
chunk = kdat.tcp_max_rshare;
|
2013-10-04 16:18:24 +04:00
|
|
|
|
|
|
|
ret = send(sk, buf + off, chunk, 0);
|
2014-11-26 22:30:00 +03:00
|
|
|
if (ret <= 0) {
|
2013-10-04 16:18:24 +04:00
|
|
|
pr_perror("Can't restore %d queue data (%d), want (%d:%d)",
|
|
|
|
queue, ret, chunk, len);
|
|
|
|
goto err;
|
|
|
|
}
|
2014-11-26 22:30:00 +03:00
|
|
|
off += ret;
|
|
|
|
len -= ret;
|
2012-04-28 17:46:12 +04:00
|
|
|
}
|
|
|
|
|
2013-04-10 00:56:20 +04:00
|
|
|
err = 0;
|
|
|
|
err:
|
|
|
|
xfree(buf);
|
|
|
|
|
|
|
|
return err;
|
2012-04-28 17:46:12 +04:00
|
|
|
}
|
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
static int send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
|
2013-11-14 01:01:57 +04:00
|
|
|
{
|
|
|
|
pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
|
|
|
|
|
|
|
|
if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
|
|
|
|
pr_perror("Can't set repair queue");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
return __send_tcp_queue(sk, queue, len, img);
|
2013-11-14 01:01:57 +04:00
|
|
|
}
|
|
|
|
|
2015-02-16 13:18:32 +03:00
|
|
|
static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img, mutex_t *reuse_lock)
|
2012-04-28 17:46:12 +04:00
|
|
|
{
|
2013-11-14 01:01:57 +04:00
|
|
|
u32 len;
|
|
|
|
|
2012-11-02 13:40:54 +04:00
|
|
|
if (restore_prepare_socket(sk))
|
|
|
|
return -1;
|
|
|
|
|
2013-11-14 01:01:57 +04:00
|
|
|
len = tse->inq_len;
|
2014-09-29 12:48:53 +04:00
|
|
|
if (len && send_tcp_queue(sk, TCP_RECV_QUEUE, len, img))
|
2013-11-14 01:01:57 +04:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* All data in a write buffer can be divided on two parts sent
|
|
|
|
* but not yet acknowledged data and unsent data.
|
|
|
|
* The TCP stack must know which data have been sent, because
|
|
|
|
* acknowledgment can be received for them. These data must be
|
|
|
|
* restored in repair mode.
|
|
|
|
*/
|
|
|
|
len = tse->outq_len - tse->unsq_len;
|
2014-09-29 12:48:53 +04:00
|
|
|
if (len && send_tcp_queue(sk, TCP_SEND_QUEUE, len, img))
|
2013-11-14 01:01:57 +04:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The second part of data have never been sent to outside, so
|
|
|
|
* they can be restored without any tricks.
|
|
|
|
*/
|
|
|
|
len = tse->unsq_len;
|
2015-02-16 13:18:32 +03:00
|
|
|
mutex_lock(reuse_lock);
|
2013-11-14 01:01:57 +04:00
|
|
|
tcp_repair_off(sk);
|
2015-02-16 13:18:32 +03:00
|
|
|
if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img)) {
|
|
|
|
mutex_unlock(reuse_lock);
|
2012-04-28 17:46:12 +04:00
|
|
|
return -1;
|
2015-02-16 13:18:32 +03:00
|
|
|
}
|
|
|
|
if (tcp_repair_on(sk)) {
|
|
|
|
mutex_unlock(reuse_lock);
|
2012-04-28 17:46:12 +04:00
|
|
|
return -1;
|
2015-02-16 13:18:32 +03:00
|
|
|
}
|
|
|
|
mutex_unlock(reuse_lock);
|
2012-04-28 17:46:12 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-07-13 21:05:00 +04:00
|
|
|
static int restore_tcp_opts(int sk, TcpStreamEntry *tse)
|
2012-04-28 17:46:12 +04:00
|
|
|
{
|
|
|
|
struct tcp_repair_opt opts[4];
|
|
|
|
int onr = 0;
|
|
|
|
|
|
|
|
pr_debug("\tRestoring TCP options\n");
|
|
|
|
|
|
|
|
if (tse->opt_mask & TCPI_OPT_SACK) {
|
|
|
|
pr_debug("\t\tWill turn SAK on\n");
|
|
|
|
opts[onr].opt_code = TCPOPT_SACK_PERM;
|
|
|
|
opts[onr].opt_val = 0;
|
|
|
|
onr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tse->opt_mask & TCPI_OPT_WSCALE) {
|
2012-09-19 16:17:11 +04:00
|
|
|
pr_debug("\t\tWill set snd_wscale to %u\n", tse->snd_wscale);
|
|
|
|
pr_debug("\t\tWill set rcv_wscale to %u\n", tse->rcv_wscale);
|
2012-04-28 17:46:12 +04:00
|
|
|
opts[onr].opt_code = TCPOPT_WINDOW;
|
2012-09-19 16:17:11 +04:00
|
|
|
opts[onr].opt_val = tse->snd_wscale + (tse->rcv_wscale << 16);
|
2012-04-28 17:46:12 +04:00
|
|
|
onr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tse->opt_mask & TCPI_OPT_TIMESTAMPS) {
|
|
|
|
pr_debug("\t\tWill turn timestamps on\n");
|
|
|
|
opts[onr].opt_code = TCPOPT_TIMESTAMP;
|
|
|
|
opts[onr].opt_val = 0;
|
|
|
|
onr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_debug("Will set mss clamp to %u\n", tse->mss_clamp);
|
|
|
|
opts[onr].opt_code = TCPOPT_MAXSEG;
|
|
|
|
opts[onr].opt_val = tse->mss_clamp;
|
|
|
|
onr++;
|
|
|
|
|
|
|
|
if (setsockopt(sk, SOL_TCP, TCP_REPAIR_OPTIONS,
|
|
|
|
opts, onr * sizeof(struct tcp_repair_opt)) < 0) {
|
|
|
|
pr_perror("Can't repair options");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2013-02-14 20:27:39 +04:00
|
|
|
if (tse->has_timestamp) {
|
|
|
|
if (setsockopt(sk, SOL_TCP, TCP_TIMESTAMP,
|
|
|
|
&tse->timestamp, sizeof(tse->timestamp)) < 0) {
|
|
|
|
pr_perror("Can't set timestamp");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-04-28 17:46:12 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int restore_tcp_conn_state(int sk, struct inet_sk_info *ii)
|
|
|
|
{
|
2014-09-29 12:48:53 +04:00
|
|
|
int aux;
|
|
|
|
struct cr_img *img;
|
2012-07-13 21:05:00 +04:00
|
|
|
TcpStreamEntry *tse;
|
2012-04-28 17:46:12 +04:00
|
|
|
|
2012-07-13 21:05:00 +04:00
|
|
|
pr_info("Restoring TCP connection id %x ino %x\n", ii->ie->id, ii->ie->ino);
|
2012-04-28 17:46:12 +04:00
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
img = open_image(CR_FD_TCP_STREAM, O_RSTR, ii->ie->ino);
|
|
|
|
if (!img)
|
2012-04-28 17:46:12 +04:00
|
|
|
goto err;
|
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
if (pb_read_one(img, &tse, PB_TCP_STREAM) < 0)
|
2012-04-28 17:46:12 +04:00
|
|
|
goto err_c;
|
|
|
|
|
2012-07-13 21:05:00 +04:00
|
|
|
if (restore_tcp_seqs(sk, tse))
|
2012-04-28 17:46:12 +04:00
|
|
|
goto err_c;
|
|
|
|
|
|
|
|
if (inet_bind(sk, ii))
|
|
|
|
goto err_c;
|
|
|
|
|
|
|
|
if (inet_connect(sk, ii))
|
|
|
|
goto err_c;
|
|
|
|
|
2012-07-13 21:05:00 +04:00
|
|
|
if (restore_tcp_opts(sk, tse))
|
2012-04-28 17:46:12 +04:00
|
|
|
goto err_c;
|
|
|
|
|
2015-02-16 13:18:32 +03:00
|
|
|
if (restore_tcp_queues(sk, tse, img, inet_get_reuseaddr_lock(ii)))
|
2012-04-28 17:46:12 +04:00
|
|
|
goto err_c;
|
|
|
|
|
2013-09-25 11:43:02 +04:00
|
|
|
if (tse->has_nodelay && tse->nodelay) {
|
|
|
|
aux = 1;
|
|
|
|
if (restore_opt(sk, SOL_TCP, TCP_NODELAY, &aux))
|
|
|
|
goto err_c;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tse->has_cork && tse->cork) {
|
|
|
|
aux = 1;
|
|
|
|
if (restore_opt(sk, SOL_TCP, TCP_CORK, &aux))
|
|
|
|
goto err_c;
|
|
|
|
}
|
|
|
|
|
2012-07-13 21:05:00 +04:00
|
|
|
tcp_stream_entry__free_unpacked(tse, NULL);
|
2014-09-29 12:48:53 +04:00
|
|
|
close_image(img);
|
2012-04-28 17:46:12 +04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_c:
|
2012-07-13 21:05:00 +04:00
|
|
|
tcp_stream_entry__free_unpacked(tse, NULL);
|
2014-09-29 12:48:53 +04:00
|
|
|
close_image(img);
|
2012-04-28 17:46:12 +04:00
|
|
|
err:
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2015-06-25 15:36:44 +03:00
|
|
|
unsigned long rst_tcp_socks_cpos;
|
|
|
|
unsigned int rst_tcp_socks_nr = 0;
|
2012-09-17 20:02:57 +04:00
|
|
|
|
2015-06-25 15:36:44 +03:00
|
|
|
int rst_tcp_socks_prep(void)
|
2012-09-17 20:02:57 +04:00
|
|
|
{
|
2015-06-25 15:36:44 +03:00
|
|
|
struct inet_sk_info *ii;
|
2012-09-17 20:02:57 +04:00
|
|
|
|
2015-06-25 15:36:44 +03:00
|
|
|
rst_tcp_socks_cpos = rst_mem_cpos(RM_PRIVATE);
|
|
|
|
list_for_each_entry(ii, &rst_tcp_repair_sockets, rlist) {
|
|
|
|
struct rst_tcp_sock *rs;
|
2012-09-17 20:02:57 +04:00
|
|
|
|
2015-06-25 15:36:44 +03:00
|
|
|
rs = rst_mem_alloc(sizeof(*rs), RM_PRIVATE);
|
|
|
|
if (!rs)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
rs->sk = ii->sk_fd;
|
|
|
|
rs->reuseaddr = ii->ie->opts->reuseaddr;
|
|
|
|
rst_tcp_socks_nr++;
|
|
|
|
}
|
2013-07-05 15:04:57 +04:00
|
|
|
|
2012-09-17 20:02:57 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-04-28 17:46:12 +04:00
|
|
|
int restore_one_tcp(int fd, struct inet_sk_info *ii)
|
|
|
|
{
|
|
|
|
pr_info("Restoring TCP connection\n");
|
|
|
|
|
|
|
|
if (tcp_repair_on(fd))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (restore_tcp_conn_state(fd, ii))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return 0;
|
2012-04-28 17:38:46 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void tcp_locked_conn_add(struct inet_sk_info *ii)
|
|
|
|
{
|
2012-09-17 20:05:32 +04:00
|
|
|
list_add_tail(&ii->rlist, &rst_tcp_repair_sockets);
|
2015-06-25 15:36:44 +03:00
|
|
|
ii->sk_fd = -1;
|
2012-04-28 17:38:46 +04:00
|
|
|
}
|
|
|
|
|
2012-09-17 20:07:03 +04:00
|
|
|
void rst_unlock_tcp_connections(void)
|
2012-04-28 17:38:46 +04:00
|
|
|
{
|
2012-04-28 17:46:12 +04:00
|
|
|
struct inet_sk_info *ii;
|
|
|
|
|
2013-10-31 20:12:55 +04:00
|
|
|
/* Network will be unlocked by network-unlock scripts */
|
2014-04-21 18:23:22 +04:00
|
|
|
if (root_ns_mask & CLONE_NEWNET)
|
2013-10-31 20:12:55 +04:00
|
|
|
return;
|
|
|
|
|
2012-09-17 20:05:32 +04:00
|
|
|
list_for_each_entry(ii, &rst_tcp_repair_sockets, rlist)
|
2012-04-28 17:46:12 +04:00
|
|
|
nf_unlock_connection_info(ii);
|
2012-04-28 17:38:46 +04:00
|
|
|
}
|
|
|
|
|
2013-02-14 20:27:55 +04:00
|
|
|
int check_tcp(void)
|
2012-05-03 15:21:37 +04:00
|
|
|
{
|
2013-02-14 20:27:55 +04:00
|
|
|
socklen_t optlen;
|
2012-05-03 15:21:37 +04:00
|
|
|
int sk, ret;
|
2013-02-14 20:27:55 +04:00
|
|
|
int val;
|
2012-05-03 15:21:37 +04:00
|
|
|
|
|
|
|
sk = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
|
|
|
|
if (sk < 0) {
|
2013-05-02 22:44:24 +04:00
|
|
|
pr_perror("Can't create TCP socket :(");
|
2012-05-03 15:21:37 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = tcp_repair_on(sk);
|
2013-02-14 20:27:55 +04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
optlen = sizeof(val);
|
|
|
|
ret = getsockopt(sk, SOL_TCP, TCP_TIMESTAMP, &val, &optlen);
|
|
|
|
if (ret)
|
|
|
|
pr_perror("Can't get TCP_TIMESTAMP");
|
|
|
|
|
|
|
|
out:
|
2012-05-03 15:21:37 +04:00
|
|
|
close(sk);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2013-09-19 12:16:07 +04:00
|
|
|
|
2014-09-29 12:48:53 +04:00
|
|
|
void show_tcp_stream(struct cr_img *img, void *obj)
|
2013-09-19 12:16:07 +04:00
|
|
|
{
|
|
|
|
TcpStreamEntry *e = obj;
|
|
|
|
if (opts.show_pages_content) {
|
|
|
|
pr_msg("In-queue:");
|
2014-09-29 12:48:53 +04:00
|
|
|
print_image_data(img, e->inq_len, 1);
|
2013-09-19 12:16:07 +04:00
|
|
|
pr_msg("Out-queue:");
|
2014-09-29 12:48:53 +04:00
|
|
|
print_image_data(img, e->outq_len, 1);
|
2013-09-19 12:16:07 +04:00
|
|
|
}
|
|
|
|
}
|