From 1cb75c0b1ead6f9ec415ea3c0372e77b4fa6c7c6 Mon Sep 17 00:00:00 2001 From: Juntong Deng Date: Sun, 19 May 2024 12:49:18 +0100 Subject: [PATCH] sk-tcp: Move TCP socket options from TcpStreamEntry to TcpOptsEntry Currently some of the TCP socket option information is stored in the TcpStreamEntry, but the information in the TcpStreamEntry is only restored after the TCP socket has established connection, which results in these TCP socket options not being restored for unconnected TCP sockets. In this commit move the TCP socket options from TcpStreamEntry to TcpOptsEntry and add dump_tcp_opts() and restore_tcp_opts() for TCP socket options dump and restore. Signed-off-by: Juntong Deng --- criu/include/sk-inet.h | 3 +++ criu/sk-inet.c | 18 +++++++++++++- criu/sk-tcp.c | 55 +++++++++++++++++++++++++---------------- images/sk-inet.proto | 2 ++ images/tcp-stream.proto | 6 +++++ 5 files changed, 62 insertions(+), 22 deletions(-) diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h index b3a70fb27..69ee8589e 100644 --- a/criu/include/sk-inet.h +++ b/criu/include/sk-inet.h @@ -87,6 +87,9 @@ extern void cpt_unlock_tcp_connections(void); extern int dump_one_tcp(int sk, struct inet_sk_desc *sd, SkOptsEntry *soe); extern int restore_one_tcp(int sk, struct inet_sk_info *si); +extern int dump_tcp_opts(int sk, TcpOptsEntry *toe); +extern int restore_tcp_opts(int sk, TcpOptsEntry *toe); + #define SK_EST_PARAM "tcp-established" #define SK_INFLIGHT_PARAM "skip-in-flight" #define SK_CLOSE_PARAM "tcp-close" diff --git a/criu/sk-inet.c b/criu/sk-inet.c index a6a767c73..92f53e569 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -454,6 +454,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa IpOptsEntry ipopts = IP_OPTS_ENTRY__INIT; IpOptsRawEntry ipopts_raw = IP_OPTS_RAW_ENTRY__INIT; SkOptsEntry skopts = SK_OPTS_ENTRY__INIT; + TcpOptsEntry tcpopts = TCP_OPTS_ENTRY__INIT; int ret = -1, err = -1, proto, aux, type; ret = do_dump_opt(lfd, SOL_SOCKET, SO_PROTOCOL, &proto, sizeof(proto)); @@ -521,6 +522,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa ie.opts = &skopts; ie.ip_opts = &ipopts; ie.ip_opts->raw = &ipopts_raw; + ie.tcp_opts = &tcpopts; ie.n_src_addr = PB_ALEN_INET; ie.n_dst_addr = PB_ALEN_INET; @@ -581,9 +583,20 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa switch (proto) { case IPPROTO_TCP: - err = (type != SOCK_RAW) ? dump_one_tcp(lfd, sk, &skopts) : 0; if (sk->shutdown) sk_encode_shutdown(&ie, sk->shutdown); + + if (type == SOCK_RAW) { + err = 0; + } else { + err = dump_tcp_opts(lfd, &tcpopts); + if (err < 0) + goto err; + + err = dump_one_tcp(lfd, sk, &skopts); + if (err < 0) + goto err; + } break; case IPPROTO_UDP: case IPPROTO_UDPLITE: @@ -939,6 +952,9 @@ done: if (restore_socket_opts(sk, ie->opts)) goto err; + if (ie->proto == IPPROTO_TCP && restore_tcp_opts(sk, ie->tcp_opts)) + goto err; + if (ie->has_shutdown && (ie->proto == IPPROTO_UDP || ie->proto == IPPROTO_UDPLITE || ie->proto == IPPROTO_TCP)) { if (shutdown(sk, sk_decode_shutdown(ie->shutdown))) { diff --git a/criu/sk-tcp.c b/criu/sk-tcp.c index b8d9ba46e..f80a4cb9c 100644 --- a/criu/sk-tcp.c +++ b/criu/sk-tcp.c @@ -136,7 +136,7 @@ static int dump_tcp_conn_state(struct inet_sk_desc *sk) { struct libsoccr_sk *socr = sk->priv; int exit_code = -1; - int ret, aux; + int ret; struct cr_img *img; TcpStreamEntry tse = TCP_STREAM_ENTRY__INIT; char *buf; @@ -186,26 +186,6 @@ static int dump_tcp_conn_state(struct inet_sk_desc *sk) tse.rcv_wup = data.rcv_wup; } - /* - * TCP socket options - */ - - if (dump_opt(sk->rfd, SOL_TCP, TCP_NODELAY, &aux)) - goto err; - - if (aux) { - tse.has_nodelay = true; - tse.nodelay = true; - } - - if (dump_opt(sk->rfd, SOL_TCP, TCP_CORK, &aux)) - goto err; - - if (aux) { - tse.has_cork = true; - tse.cork = true; - } - /* * Push the stuff to image */ @@ -243,6 +223,19 @@ err: return exit_code; } +int dump_tcp_opts(int fd, TcpOptsEntry *toe) +{ + int ret = 0; + + ret |= dump_opt(fd, SOL_TCP, TCP_NODELAY, &toe->nodelay); + ret |= dump_opt(fd, SOL_TCP, TCP_CORK, &toe->cork); + + toe->has_nodelay = !!toe->nodelay; + toe->has_cork = !!toe->cork; + + return ret; +} + int dump_one_tcp(int fd, struct inet_sk_desc *sk, SkOptsEntry *soe) { soe->has_tcp_keepcnt = true; @@ -396,6 +389,11 @@ static int restore_tcp_conn_state(int sk, struct libsoccr_sk *socr, struct inet_ if (libsoccr_restore(socr, &data, sizeof(data))) goto err_c; + /* + * Restoring TCP socket options in TcpStreamEntry is + * for backward compatibility only, newer versions + * of CRIU use TcpOptsEntry. + */ if (tse->has_nodelay && tse->nodelay) { aux = 1; if (restore_opt(sk, SOL_TCP, TCP_NODELAY, &aux)) @@ -448,6 +446,21 @@ int prepare_tcp_socks(struct task_restore_args *ta) return 0; } +int restore_tcp_opts(int sk, TcpOptsEntry *toe) +{ + int ret = 0; + + if(!toe) + return ret; + + if (toe->has_nodelay) + ret |= restore_opt(sk, SOL_TCP, TCP_NODELAY, &toe->nodelay); + if (toe->has_cork) + ret |= restore_opt(sk, SOL_TCP, TCP_CORK, &toe->cork); + + return ret; +} + int restore_one_tcp(int fd, struct inet_sk_info *ii) { struct libsoccr_sk *sk; diff --git a/images/sk-inet.proto b/images/sk-inet.proto index 03a679e7f..2c709e018 100644 --- a/images/sk-inet.proto +++ b/images/sk-inet.proto @@ -5,6 +5,7 @@ syntax = "proto2"; import "opts.proto"; import "fown.proto"; import "sk-opts.proto"; +import "tcp-stream.proto"; message ip_opts_raw_entry { optional bool hdrincl = 1; @@ -56,4 +57,5 @@ message inet_sk_entry { optional string ifname = 17; optional uint32 ns_id = 18; optional sk_shutdown shutdown = 19; + optional tcp_opts_entry tcp_opts = 20; } diff --git a/images/tcp-stream.proto b/images/tcp-stream.proto index c2244ba3b..4f85282e2 100644 --- a/images/tcp-stream.proto +++ b/images/tcp-stream.proto @@ -4,6 +4,11 @@ syntax = "proto2"; import "opts.proto"; +message tcp_opts_entry { + optional bool cork = 1; + optional bool nodelay = 2; +} + message tcp_stream_entry { required uint32 inq_len = 1; required uint32 inq_seq = 2; @@ -16,6 +21,7 @@ message tcp_stream_entry { optional uint32 rcv_wscale = 8; optional uint32 timestamp = 9; + /* These two are deprecated, use tcp_opts_entry instead */ optional bool cork = 10; optional bool nodelay = 11;