net: Port samsung MPTCP modifications from SM-N986B
- MultiPath TCP (MPTCP) is an effort towards enabling the simultaneous use of several IP-addresses/interfaces by a modification of TCP that presents a regular TCP interface to applications, while in fact spreading data across several subflows. Benefits of this include better resource utilization, better throughput and smoother reaction to failures. Change-Id: I50e8cbda93ed133fb6cb937b49b5d23879f92270 Signed-off-by: UtsavBalar1231 <utsavbalar1231@gmail.com>
This commit is contained in:
parent
ea8506a15b
commit
98a694aafb
@ -378,6 +378,7 @@ gen_headers_out_arm = [
|
|||||||
"linux/mmtimer.h",
|
"linux/mmtimer.h",
|
||||||
"linux/module.h",
|
"linux/module.h",
|
||||||
"linux/mpls.h",
|
"linux/mpls.h",
|
||||||
|
"linux/mptcp.h",
|
||||||
"linux/mpls_iptunnel.h",
|
"linux/mpls_iptunnel.h",
|
||||||
"linux/mqueue.h",
|
"linux/mqueue.h",
|
||||||
"linux/mroute.h",
|
"linux/mroute.h",
|
||||||
|
@ -373,6 +373,7 @@ gen_headers_out_arm64 = [
|
|||||||
"linux/module.h",
|
"linux/module.h",
|
||||||
"linux/mpls.h",
|
"linux/mpls.h",
|
||||||
"linux/mpls_iptunnel.h",
|
"linux/mpls_iptunnel.h",
|
||||||
|
"linux/mptcp.h",
|
||||||
"linux/mqueue.h",
|
"linux/mqueue.h",
|
||||||
"linux/mroute.h",
|
"linux/mroute.h",
|
||||||
"linux/mroute6.h",
|
"linux/mroute6.h",
|
||||||
|
@ -696,7 +696,11 @@ struct sk_buff {
|
|||||||
* want to keep them across layers you have to do a skb_clone()
|
* want to keep them across layers you have to do a skb_clone()
|
||||||
* first. This is owned by whoever has the skb queued ATM.
|
* first. This is owned by whoever has the skb queued ATM.
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
char cb[80] __aligned(8);
|
||||||
|
#else
|
||||||
char cb[48] __aligned(8);
|
char cb[48] __aligned(8);
|
||||||
|
#endif
|
||||||
|
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
|
@ -58,7 +58,11 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
|
|||||||
/* TCP Fast Open */
|
/* TCP Fast Open */
|
||||||
#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */
|
#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */
|
||||||
#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */
|
#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by this impl for MPTCP. */
|
||||||
|
#else
|
||||||
#define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */
|
#define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */
|
||||||
|
#endif
|
||||||
|
|
||||||
/* TCP Fast Open Cookie as stored in memory */
|
/* TCP Fast Open Cookie as stored in memory */
|
||||||
struct tcp_fastopen_cookie {
|
struct tcp_fastopen_cookie {
|
||||||
@ -83,6 +87,56 @@ struct tcp_sack_block {
|
|||||||
u32 end_seq;
|
u32 end_seq;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct tcp_out_options {
|
||||||
|
u16 options; /* bit field of OPTION_* */
|
||||||
|
u16 mss; /* 0 to disable */
|
||||||
|
u8 ws; /* window scale, 0 to disable */
|
||||||
|
u8 num_sack_blocks;/* number of SACK blocks to include */
|
||||||
|
u8 hash_size; /* bytes in hash_location */
|
||||||
|
__u8 *hash_location; /* temporary pointer, overloaded */
|
||||||
|
__u32 tsval, tsecr; /* need to include OPTION_TS */
|
||||||
|
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
|
||||||
|
u16 mptcp_options; /* bit field of MPTCP related OPTION_* */
|
||||||
|
u8 dss_csum:1, /* dss-checksum required? */
|
||||||
|
add_addr_v4:1,
|
||||||
|
add_addr_v6:1,
|
||||||
|
mptcp_ver:4;
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
__u64 sender_key; /* sender's key for mptcp */
|
||||||
|
__u64 receiver_key; /* receiver's key for mptcp */
|
||||||
|
} mp_capable;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__u64 sender_truncated_mac;
|
||||||
|
__u32 sender_nonce;
|
||||||
|
/* random number of the sender */
|
||||||
|
__u32 token; /* token for mptcp */
|
||||||
|
u8 low_prio:1;
|
||||||
|
} mp_join_syns;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__u64 trunc_mac;
|
||||||
|
struct in_addr addr;
|
||||||
|
u16 port;
|
||||||
|
u8 addr_id;
|
||||||
|
} add_addr4;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__u64 trunc_mac;
|
||||||
|
struct in6_addr addr;
|
||||||
|
u16 port;
|
||||||
|
u8 addr_id;
|
||||||
|
} add_addr6;
|
||||||
|
|
||||||
|
u16 remove_addrs; /* list of address id */
|
||||||
|
u8 addr_id; /* address id (mp_join or add_address) */
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
/*These are used to set the sack_ok field in struct tcp_options_received */
|
/*These are used to set the sack_ok field in struct tcp_options_received */
|
||||||
#define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */
|
#define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */
|
||||||
#define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/
|
#define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/
|
||||||
@ -106,6 +160,11 @@ struct tcp_options_received {
|
|||||||
u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
|
u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct mptcp_cb;
|
||||||
|
struct mptcp_tcp_sock;
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
|
static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
|
||||||
{
|
{
|
||||||
rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
|
rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
|
||||||
@ -144,6 +203,10 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
|
|||||||
return (struct tcp_request_sock *)req;
|
return (struct tcp_request_sock *)req;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct tcp_md5sig_key;
|
||||||
|
#endif
|
||||||
|
|
||||||
struct tcp_sock {
|
struct tcp_sock {
|
||||||
/* inet_connection_sock has to be the first member of tcp_sock */
|
/* inet_connection_sock has to be the first member of tcp_sock */
|
||||||
struct inet_connection_sock inet_conn;
|
struct inet_connection_sock inet_conn;
|
||||||
@ -401,6 +464,43 @@ struct tcp_sock {
|
|||||||
*/
|
*/
|
||||||
struct request_sock *fastopen_rsk;
|
struct request_sock *fastopen_rsk;
|
||||||
u32 *saved_syn;
|
u32 *saved_syn;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* MPTCP/TCP-specific callbacks */
|
||||||
|
const struct tcp_sock_ops *ops;
|
||||||
|
|
||||||
|
struct mptcp_cb *mpcb;
|
||||||
|
struct sock *meta_sk;
|
||||||
|
/* We keep these flags even if CONFIG_MPTCP is not checked, because
|
||||||
|
* it allows checking MPTCP capability just by checking the mpc flag,
|
||||||
|
* rather than adding ifdefs everywhere.
|
||||||
|
*/
|
||||||
|
u32 mpc:1, /* Other end is multipath capable */
|
||||||
|
inside_tk_table:1, /* Is the tcp_sock inside the token-table? */
|
||||||
|
send_mp_fclose:1,
|
||||||
|
request_mptcp:1, /* Did we send out an MP_CAPABLE?
|
||||||
|
* (this speeds up mptcp_doit() in tcp_recvmsg)
|
||||||
|
*/
|
||||||
|
pf:1, /* Potentially Failed state: when this flag is set, we
|
||||||
|
* stop using the subflow
|
||||||
|
*/
|
||||||
|
mp_killed:1, /* Killed with a tcp_done in mptcp? */
|
||||||
|
is_master_sk:1,
|
||||||
|
close_it:1, /* Must close socket in mptcp_data_ready? */
|
||||||
|
closing:1,
|
||||||
|
mptcp_ver:4,
|
||||||
|
mptcp_sched_setsockopt:1,
|
||||||
|
mptcp_pm_setsockopt:1,
|
||||||
|
record_master_info:1,
|
||||||
|
tcp_disconnect:1;
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
#define MPTCP_SCHED_NAME_MAX 16
|
||||||
|
#define MPTCP_PM_NAME_MAX 16
|
||||||
|
struct hlist_nulls_node tk_table;
|
||||||
|
u32 mptcp_loc_token;
|
||||||
|
u64 mptcp_loc_key;
|
||||||
|
char mptcp_sched_name[MPTCP_SCHED_NAME_MAX];
|
||||||
|
char mptcp_pm_name[MPTCP_PM_NAME_MAX];
|
||||||
|
#endif /* CONFIG_MPTCP */
|
||||||
};
|
};
|
||||||
|
|
||||||
enum tsq_enum {
|
enum tsq_enum {
|
||||||
@ -412,6 +512,10 @@ enum tsq_enum {
|
|||||||
TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call
|
TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call
|
||||||
* tcp_v{4|6}_mtu_reduced()
|
* tcp_v{4|6}_mtu_reduced()
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
MPTCP_PATH_MANAGER_DEFERRED, /* MPTCP deferred creation of new subflows */
|
||||||
|
MPTCP_SUB_DEFERRED, /* A subflow got deferred - process them */
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
enum tsq_flags {
|
enum tsq_flags {
|
||||||
@ -421,6 +525,10 @@ enum tsq_flags {
|
|||||||
TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED),
|
TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED),
|
||||||
TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED),
|
TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED),
|
||||||
TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED),
|
TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED),
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
TCPF_PATH_MANAGER_DEFERRED = (1UL << MPTCP_PATH_MANAGER_DEFERRED),
|
||||||
|
TCPF_SUB_DEFERRED = (1UL << MPTCP_SUB_DEFERRED),
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
|
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
|
||||||
@ -443,6 +551,9 @@ struct tcp_timewait_sock {
|
|||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
struct tcp_md5sig_key *tw_md5_key;
|
struct tcp_md5sig_key *tw_md5_key;
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct mptcp_tw *mptcp_tw;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
|
static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
|
||||||
|
@ -2,6 +2,10 @@
|
|||||||
#ifndef _INET_COMMON_H
|
#ifndef _INET_COMMON_H
|
||||||
#define _INET_COMMON_H
|
#define _INET_COMMON_H
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/sock.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
extern const struct proto_ops inet_stream_ops;
|
extern const struct proto_ops inet_stream_ops;
|
||||||
extern const struct proto_ops inet_dgram_ops;
|
extern const struct proto_ops inet_dgram_ops;
|
||||||
|
|
||||||
@ -14,6 +18,11 @@ struct sock;
|
|||||||
struct sockaddr;
|
struct sockaddr;
|
||||||
struct socket;
|
struct socket;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
int inet_create(struct net *net, struct socket *sock, int protocol, int kern);
|
||||||
|
int inet6_create(struct net *net, struct socket *sock, int protocol, int kern);
|
||||||
|
#endif
|
||||||
|
|
||||||
int inet_release(struct socket *sock);
|
int inet_release(struct socket *sock);
|
||||||
int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
|
int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
|
||||||
int addr_len, int flags);
|
int addr_len, int flags);
|
||||||
|
@ -29,6 +29,9 @@
|
|||||||
|
|
||||||
struct inet_bind_bucket;
|
struct inet_bind_bucket;
|
||||||
struct tcp_congestion_ops;
|
struct tcp_congestion_ops;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct tcp_options_received;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Pointers to address related TCP functions
|
* Pointers to address related TCP functions
|
||||||
|
@ -83,6 +83,20 @@ struct inet_request_sock {
|
|||||||
#define ireq_state req.__req_common.skc_state
|
#define ireq_state req.__req_common.skc_state
|
||||||
#define ireq_family req.__req_common.skc_family
|
#define ireq_family req.__req_common.skc_family
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
u32 snd_wscale : 4,
|
||||||
|
rcv_wscale : 4,
|
||||||
|
tstamp_ok : 1,
|
||||||
|
sack_ok : 1,
|
||||||
|
wscale_ok : 1,
|
||||||
|
ecn_ok : 1,
|
||||||
|
acked : 1,
|
||||||
|
no_srccheck: 1,
|
||||||
|
mptcp_rqsk : 1,
|
||||||
|
saw_mpc : 1,
|
||||||
|
smc_ok : 1;
|
||||||
|
u32 ir_mark;
|
||||||
|
#else
|
||||||
u16 snd_wscale : 4,
|
u16 snd_wscale : 4,
|
||||||
rcv_wscale : 4,
|
rcv_wscale : 4,
|
||||||
tstamp_ok : 1,
|
tstamp_ok : 1,
|
||||||
@ -93,6 +107,7 @@ struct inet_request_sock {
|
|||||||
no_srccheck: 1,
|
no_srccheck: 1,
|
||||||
smc_ok : 1;
|
smc_ok : 1;
|
||||||
u32 ir_mark;
|
u32 ir_mark;
|
||||||
|
#endif
|
||||||
union {
|
union {
|
||||||
struct ip_options_rcu __rcu *ireq_opt;
|
struct ip_options_rcu __rcu *ireq_opt;
|
||||||
#if IS_ENABLED(CONFIG_IPV6)
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
|
1497
include/net/mptcp.h
Executable file
1497
include/net/mptcp.h
Executable file
File diff suppressed because it is too large
Load Diff
76
include/net/mptcp_v4.h
Executable file
76
include/net/mptcp_v4.h
Executable file
@ -0,0 +1,76 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP implementation
|
||||||
|
*
|
||||||
|
* Initial Design & Implementation:
|
||||||
|
* Sébastien Barré <sebastien.barre@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Current Maintainer & Author:
|
||||||
|
* Christoph Paasch <christoph.paasch@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Additional authors:
|
||||||
|
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
|
||||||
|
* Gregory Detal <gregory.detal@uclouvain.be>
|
||||||
|
* Fabien Duchêne <fabien.duchene@uclouvain.be>
|
||||||
|
* Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
|
||||||
|
* Lavkesh Lahngir <lavkesh51@gmail.com>
|
||||||
|
* Andreas Ripke <ripke@neclab.eu>
|
||||||
|
* Vlad Dogaru <vlad.dogaru@intel.com>
|
||||||
|
* Octavian Purdila <octavian.purdila@intel.com>
|
||||||
|
* John Ronan <jronan@tssg.org>
|
||||||
|
* Catalin Nicutar <catalin.nicutar@gmail.com>
|
||||||
|
* Brandon Heller <brandonh@stanford.edu>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MPTCP_V4_H_
|
||||||
|
#define MPTCP_V4_H_
|
||||||
|
|
||||||
|
|
||||||
|
#include <linux/in.h>
|
||||||
|
#include <linux/skbuff.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/request_sock.h>
|
||||||
|
#include <net/sock.h>
|
||||||
|
|
||||||
|
extern struct request_sock_ops mptcp_request_sock_ops;
|
||||||
|
extern const struct inet_connection_sock_af_ops mptcp_v4_specific;
|
||||||
|
extern struct tcp_request_sock_ops mptcp_request_sock_ipv4_ops;
|
||||||
|
extern struct tcp_request_sock_ops mptcp_join_request_sock_ipv4_ops;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
|
||||||
|
int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb);
|
||||||
|
struct sock *mptcp_v4_search_req(const __be16 rport, const __be32 raddr,
|
||||||
|
const __be32 laddr, const struct net *net);
|
||||||
|
int __mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc,
|
||||||
|
__be16 sport, struct mptcp_rem4 *rem,
|
||||||
|
struct sock **subsk);
|
||||||
|
int mptcp_pm_v4_init(void);
|
||||||
|
void mptcp_pm_v4_undo(void);
|
||||||
|
u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport);
|
||||||
|
u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
|
||||||
|
u32 seed);
|
||||||
|
|
||||||
|
static inline int mptcp_init4_subsockets(struct sock *meta_sk,
|
||||||
|
const struct mptcp_loc4 *loc,
|
||||||
|
struct mptcp_rem4 *rem)
|
||||||
|
{
|
||||||
|
return __mptcp_init4_subsockets(meta_sk, loc, 0, rem, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
static inline int mptcp_v4_do_rcv(const struct sock *meta_sk,
|
||||||
|
const struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* CONFIG_MPTCP */
|
||||||
|
|
||||||
|
#endif /* MPTCP_V4_H_ */
|
77
include/net/mptcp_v6.h
Executable file
77
include/net/mptcp_v6.h
Executable file
@ -0,0 +1,77 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP implementation
|
||||||
|
*
|
||||||
|
* Initial Design & Implementation:
|
||||||
|
* Sébastien Barré <sebastien.barre@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Current Maintainer & Author:
|
||||||
|
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
|
||||||
|
*
|
||||||
|
* Additional authors:
|
||||||
|
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
|
||||||
|
* Gregory Detal <gregory.detal@uclouvain.be>
|
||||||
|
* Fabien Duchêne <fabien.duchene@uclouvain.be>
|
||||||
|
* Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
|
||||||
|
* Lavkesh Lahngir <lavkesh51@gmail.com>
|
||||||
|
* Andreas Ripke <ripke@neclab.eu>
|
||||||
|
* Vlad Dogaru <vlad.dogaru@intel.com>
|
||||||
|
* Octavian Purdila <octavian.purdila@intel.com>
|
||||||
|
* John Ronan <jronan@tssg.org>
|
||||||
|
* Catalin Nicutar <catalin.nicutar@gmail.com>
|
||||||
|
* Brandon Heller <brandonh@stanford.edu>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _MPTCP_V6_H
|
||||||
|
#define _MPTCP_V6_H
|
||||||
|
|
||||||
|
#include <linux/in6.h>
|
||||||
|
#include <net/if_inet6.h>
|
||||||
|
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
extern const struct inet_connection_sock_af_ops mptcp_v6_mapped;
|
||||||
|
extern const struct inet_connection_sock_af_ops mptcp_v6_specific;
|
||||||
|
extern struct request_sock_ops mptcp6_request_sock_ops;
|
||||||
|
extern struct tcp_request_sock_ops mptcp_request_sock_ipv6_ops;
|
||||||
|
extern struct tcp_request_sock_ops mptcp_join_request_sock_ipv6_ops;
|
||||||
|
|
||||||
|
int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb);
|
||||||
|
struct sock *mptcp_v6_search_req(const __be16 rport, const struct in6_addr *raddr,
|
||||||
|
const struct in6_addr *laddr, const struct net *net);
|
||||||
|
int __mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc,
|
||||||
|
__be16 sport, struct mptcp_rem6 *rem,
|
||||||
|
struct sock **subsk);
|
||||||
|
int mptcp_pm_v6_init(void);
|
||||||
|
void mptcp_pm_v6_undo(void);
|
||||||
|
__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr,
|
||||||
|
__be16 sport, __be16 dport);
|
||||||
|
u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr,
|
||||||
|
__be16 sport, __be16 dport, u32 seed);
|
||||||
|
|
||||||
|
static inline int mptcp_init6_subsockets(struct sock *meta_sk,
|
||||||
|
const struct mptcp_loc6 *loc,
|
||||||
|
struct mptcp_rem6 *rem)
|
||||||
|
{
|
||||||
|
return __mptcp_init6_subsockets(meta_sk, loc, 0, rem, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* CONFIG_MPTCP */
|
||||||
|
|
||||||
|
#define mptcp_v6_mapped ipv6_mapped
|
||||||
|
|
||||||
|
static inline int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* CONFIG_MPTCP */
|
||||||
|
|
||||||
|
#endif /* _MPTCP_V6_H */
|
@ -19,6 +19,9 @@
|
|||||||
#include <net/netns/packet.h>
|
#include <net/netns/packet.h>
|
||||||
#include <net/netns/ipv4.h>
|
#include <net/netns/ipv4.h>
|
||||||
#include <net/netns/ipv6.h>
|
#include <net/netns/ipv6.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/netns/mptcp.h>
|
||||||
|
#endif
|
||||||
#include <net/netns/ieee802154_6lowpan.h>
|
#include <net/netns/ieee802154_6lowpan.h>
|
||||||
#include <net/netns/sctp.h>
|
#include <net/netns/sctp.h>
|
||||||
#include <net/netns/dccp.h>
|
#include <net/netns/dccp.h>
|
||||||
@ -110,6 +113,9 @@ struct net {
|
|||||||
#if IS_ENABLED(CONFIG_IPV6)
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
struct netns_ipv6 ipv6;
|
struct netns_ipv6 ipv6;
|
||||||
#endif
|
#endif
|
||||||
|
#if IS_ENABLED(CONFIG_MPTCP)
|
||||||
|
struct netns_mptcp mptcp;
|
||||||
|
#endif
|
||||||
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
|
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
|
||||||
struct netns_ieee802154_lowpan ieee802154_lowpan;
|
struct netns_ieee802154_lowpan ieee802154_lowpan;
|
||||||
#endif
|
#endif
|
||||||
|
52
include/net/netns/mptcp.h
Executable file
52
include/net/netns/mptcp.h
Executable file
@ -0,0 +1,52 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP implementation - MPTCP namespace
|
||||||
|
*
|
||||||
|
* Initial Design & Implementation:
|
||||||
|
* Sébastien Barré <sebastien.barre@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Current Maintainer:
|
||||||
|
* Christoph Paasch <christoph.paasch@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Additional authors:
|
||||||
|
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
|
||||||
|
* Gregory Detal <gregory.detal@uclouvain.be>
|
||||||
|
* Fabien Duchêne <fabien.duchene@uclouvain.be>
|
||||||
|
* Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
|
||||||
|
* Lavkesh Lahngir <lavkesh51@gmail.com>
|
||||||
|
* Andreas Ripke <ripke@neclab.eu>
|
||||||
|
* Vlad Dogaru <vlad.dogaru@intel.com>
|
||||||
|
* Octavian Purdila <octavian.purdila@intel.com>
|
||||||
|
* John Ronan <jronan@tssg.org>
|
||||||
|
* Catalin Nicutar <catalin.nicutar@gmail.com>
|
||||||
|
* Brandon Heller <brandonh@stanford.edu>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __NETNS_MPTCP_H__
|
||||||
|
#define __NETNS_MPTCP_H__
|
||||||
|
|
||||||
|
#include <linux/compiler.h>
|
||||||
|
|
||||||
|
enum {
|
||||||
|
MPTCP_PM_FULLMESH = 0,
|
||||||
|
MPTCP_PM_MAX
|
||||||
|
};
|
||||||
|
|
||||||
|
struct mptcp_mib;
|
||||||
|
|
||||||
|
struct netns_mptcp {
|
||||||
|
DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROC_FS
|
||||||
|
struct proc_dir_entry *proc_net_mptcp;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void *path_managers[MPTCP_PM_MAX];
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* __NETNS_MPTCP_H__ */
|
@ -835,6 +835,9 @@ enum sock_flags {
|
|||||||
SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
|
SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
|
||||||
SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
|
SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
|
||||||
SOCK_TXTIME,
|
SOCK_TXTIME,
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
SOCK_MPTCP, /* MPTCP set on this socket */
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
|
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
|
||||||
@ -1143,6 +1146,10 @@ struct proto {
|
|||||||
void (*rehash)(struct sock *sk);
|
void (*rehash)(struct sock *sk);
|
||||||
int (*get_port)(struct sock *sk, unsigned short snum);
|
int (*get_port)(struct sock *sk, unsigned short snum);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
void (*clear_sk)(struct sock *sk, int size);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Keeping track of sockets in use */
|
/* Keeping track of sockets in use */
|
||||||
#ifdef CONFIG_PROC_FS
|
#ifdef CONFIG_PROC_FS
|
||||||
unsigned int inuse_idx;
|
unsigned int inuse_idx;
|
||||||
|
@ -188,6 +188,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
|
|||||||
#define TCPOPT_SACK 5 /* SACK Block */
|
#define TCPOPT_SACK 5 /* SACK Block */
|
||||||
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
|
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
|
||||||
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
|
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#define TCPOPT_MPTCP 30
|
||||||
|
#endif
|
||||||
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
|
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
|
||||||
#define TCPOPT_EXP 254 /* Experimental */
|
#define TCPOPT_EXP 254 /* Experimental */
|
||||||
/* Magic number to be after the option value for sharing TCP
|
/* Magic number to be after the option value for sharing TCP
|
||||||
@ -244,6 +247,33 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
|
|||||||
*/
|
*/
|
||||||
#define TFO_SERVER_WO_SOCKOPT1 0x400
|
#define TFO_SERVER_WO_SOCKOPT1 0x400
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* Flags from tcp_input.c for tcp_ack */
|
||||||
|
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
|
||||||
|
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
|
||||||
|
#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
|
||||||
|
#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */
|
||||||
|
#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
|
||||||
|
#define FLAG_DATA_SACKED 0x20 /* New SACK. */
|
||||||
|
#define FLAG_ECE 0x40 /* ECE in this ACK */
|
||||||
|
#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */
|
||||||
|
#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
|
||||||
|
#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
|
||||||
|
#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
|
||||||
|
#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
|
||||||
|
#define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */
|
||||||
|
#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
|
||||||
|
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
|
||||||
|
#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
|
||||||
|
#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
|
||||||
|
|
||||||
|
#define MPTCP_FLAG_DATA_ACKED 0x20000
|
||||||
|
|
||||||
|
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
|
||||||
|
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
|
||||||
|
#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
|
||||||
|
#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
|
||||||
|
#endif
|
||||||
|
|
||||||
/* sysctl variables for tcp */
|
/* sysctl variables for tcp */
|
||||||
extern int sysctl_tcp_max_orphans;
|
extern int sysctl_tcp_max_orphans;
|
||||||
@ -321,6 +351,98 @@ extern struct proto tcp_prot;
|
|||||||
#define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
|
#define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
|
||||||
#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
|
#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/**** START - Exports needed for MPTCP ****/
|
||||||
|
extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
|
||||||
|
extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
|
||||||
|
|
||||||
|
struct mptcp_options_received;
|
||||||
|
|
||||||
|
void tcp_cleanup_rbuf(struct sock *sk, int copied);
|
||||||
|
void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited);
|
||||||
|
int tcp_close_state(struct sock *sk);
|
||||||
|
void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
|
||||||
|
const struct sk_buff *skb);
|
||||||
|
int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib);
|
||||||
|
void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb);
|
||||||
|
int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
|
||||||
|
gfp_t gfp_mask);
|
||||||
|
unsigned int tcp_mss_split_point(const struct sock *sk,
|
||||||
|
const struct sk_buff *skb,
|
||||||
|
unsigned int mss_now,
|
||||||
|
unsigned int max_segs,
|
||||||
|
int nonagle);
|
||||||
|
bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
|
||||||
|
unsigned int cur_mss, int nonagle);
|
||||||
|
bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
|
||||||
|
unsigned int cur_mss);
|
||||||
|
unsigned int tcp_cwnd_test(const struct tcp_sock *tp, const struct sk_buff *skb);
|
||||||
|
int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now);
|
||||||
|
int __pskb_trim_head(struct sk_buff *skb, int len);
|
||||||
|
void tcp_queue_skb(struct sock *sk, struct sk_buff *skb);
|
||||||
|
void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags);
|
||||||
|
void tcp_reset(struct sock *sk);
|
||||||
|
bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
|
||||||
|
const u32 ack_seq, const u32 nwin);
|
||||||
|
bool tcp_urg_mode(const struct tcp_sock *tp);
|
||||||
|
void tcp_ack_probe(struct sock *sk);
|
||||||
|
void tcp_rearm_rto(struct sock *sk);
|
||||||
|
int tcp_write_timeout(struct sock *sk);
|
||||||
|
bool retransmits_timed_out(struct sock *sk,
|
||||||
|
unsigned int boundary,
|
||||||
|
unsigned int timeout);
|
||||||
|
void tcp_write_err(struct sock *sk);
|
||||||
|
void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr);
|
||||||
|
void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb);
|
||||||
|
void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now);
|
||||||
|
|
||||||
|
void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
||||||
|
struct request_sock *req);
|
||||||
|
void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb);
|
||||||
|
struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb);
|
||||||
|
void tcp_v4_reqsk_destructor(struct request_sock *req);
|
||||||
|
|
||||||
|
void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
||||||
|
struct request_sock *req);
|
||||||
|
void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
|
||||||
|
struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb);
|
||||||
|
int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
|
||||||
|
int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
|
||||||
|
void tcp_v6_destroy_sock(struct sock *sk);
|
||||||
|
void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
|
||||||
|
void tcp_v6_hash(struct sock *sk);
|
||||||
|
struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb);
|
||||||
|
struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
|
||||||
|
struct request_sock *req,
|
||||||
|
struct dst_entry *dst,
|
||||||
|
struct request_sock *req_unhash,
|
||||||
|
bool *own_req);
|
||||||
|
void tcp_v6_reqsk_destructor(struct request_sock *req);
|
||||||
|
|
||||||
|
unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
|
||||||
|
int large_allowed);
|
||||||
|
u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb);
|
||||||
|
|
||||||
|
void skb_clone_fraglist(struct sk_buff *skb);
|
||||||
|
|
||||||
|
void inet_twsk_free(struct inet_timewait_sock *tw);
|
||||||
|
int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb);
|
||||||
|
/* These states need RST on ABORT according to RFC793 */
|
||||||
|
static inline bool tcp_need_reset(int state)
|
||||||
|
{
|
||||||
|
return (1 << state) &
|
||||||
|
(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
|
||||||
|
TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
|
||||||
|
}
|
||||||
|
|
||||||
|
int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
|
||||||
|
bool *fragstolen);
|
||||||
|
void tcp_ofo_queue(struct sock *sk);
|
||||||
|
void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb);
|
||||||
|
int linear_payload_sz(bool first_skb);
|
||||||
|
/**** END - Exports needed for MPTCP ****/
|
||||||
|
#endif
|
||||||
|
|
||||||
void tcp_tasklet_init(void);
|
void tcp_tasklet_init(void);
|
||||||
|
|
||||||
void tcp_v4_err(struct sk_buff *skb, u32);
|
void tcp_v4_err(struct sk_buff *skb, u32);
|
||||||
@ -428,7 +550,14 @@ int tcp_mmap(struct file *file, struct socket *sock,
|
|||||||
struct vm_area_struct *vma);
|
struct vm_area_struct *vma);
|
||||||
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
|
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
|
||||||
struct tcp_options_received *opt_rx,
|
struct tcp_options_received *opt_rx,
|
||||||
int estab, struct tcp_fastopen_cookie *foc);
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct mptcp_options_received *mopt_rx,
|
||||||
|
#endif
|
||||||
|
int estab, struct tcp_fastopen_cookie *foc
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
, struct tcp_sock *tp
|
||||||
|
#endif
|
||||||
|
);
|
||||||
const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
|
const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -437,6 +566,9 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
|
|||||||
|
|
||||||
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
|
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
|
||||||
void tcp_v4_mtu_reduced(struct sock *sk);
|
void tcp_v4_mtu_reduced(struct sock *sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
void tcp_v6_mtu_reduced(struct sock *sk);
|
||||||
|
#endif
|
||||||
void tcp_req_err(struct sock *sk, u32 seq, bool abort);
|
void tcp_req_err(struct sock *sk, u32 seq, bool abort);
|
||||||
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
|
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
|
||||||
struct sock *tcp_create_openreq_child(const struct sock *sk,
|
struct sock *tcp_create_openreq_child(const struct sock *sk,
|
||||||
@ -554,7 +686,12 @@ static inline u32 tcp_cookie_time(void)
|
|||||||
|
|
||||||
u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
|
u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
|
||||||
u16 *mssp);
|
u16 *mssp);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
__u32 cookie_v4_init_sequence(struct request_sock *req, const struct sock *sk,
|
||||||
|
const struct sk_buff *skb, __u16 *mss);
|
||||||
|
#else
|
||||||
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
|
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
|
||||||
|
#endif
|
||||||
u64 cookie_init_timestamp(struct request_sock *req);
|
u64 cookie_init_timestamp(struct request_sock *req);
|
||||||
bool cookie_timestamp_decode(const struct net *net,
|
bool cookie_timestamp_decode(const struct net *net,
|
||||||
struct tcp_options_received *opt);
|
struct tcp_options_received *opt);
|
||||||
@ -568,8 +705,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
|
|||||||
|
|
||||||
u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
|
u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
|
||||||
const struct tcphdr *th, u16 *mssp);
|
const struct tcphdr *th, u16 *mssp);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
__u32 cookie_v6_init_sequence(struct request_sock *req, const struct sock *sk,
|
||||||
|
const struct sk_buff *skb, __u16 *mss);
|
||||||
|
#else
|
||||||
__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
|
__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
/* tcp_output.c */
|
/* tcp_output.c */
|
||||||
|
|
||||||
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
|
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
|
||||||
@ -604,10 +746,20 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto);
|
|||||||
void tcp_skb_collapse_tstamp(struct sk_buff *skb,
|
void tcp_skb_collapse_tstamp(struct sk_buff *skb,
|
||||||
const struct sk_buff *next_skb);
|
const struct sk_buff *next_skb);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
u16 tcp_select_window(struct sock *sk);
|
||||||
|
bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||||
|
int push_one, gfp_t gfp);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* tcp_input.c */
|
/* tcp_input.c */
|
||||||
void tcp_rearm_rto(struct sock *sk);
|
void tcp_rearm_rto(struct sock *sk);
|
||||||
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
|
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
|
||||||
void tcp_reset(struct sock *sk);
|
void tcp_reset(struct sock *sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
void tcp_set_rto(struct sock *sk);
|
||||||
|
bool tcp_should_expand_sndbuf(const struct sock *sk);
|
||||||
|
#endif
|
||||||
void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
|
void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
|
||||||
void tcp_fin(struct sock *sk);
|
void tcp_fin(struct sock *sk);
|
||||||
void tcp_check_space(struct sock *sk);
|
void tcp_check_space(struct sock *sk);
|
||||||
@ -652,7 +804,11 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* tcp.c */
|
/* tcp.c */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
void tcp_get_info(struct sock *, struct tcp_info *, bool no_lock);
|
||||||
|
#else
|
||||||
void tcp_get_info(struct sock *, struct tcp_info *);
|
void tcp_get_info(struct sock *, struct tcp_info *);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Read 'sendfile()'-style from a TCP socket */
|
/* Read 'sendfile()'-style from a TCP socket */
|
||||||
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
|
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
|
||||||
@ -840,6 +996,12 @@ struct tcp_skb_cb {
|
|||||||
u16 tcp_gso_size;
|
u16 tcp_gso_size;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
__u8 mptcp_flags; /* flags for the MPTCP layer */
|
||||||
|
__u8 dss_off; /* Number of 4-byte words until
|
||||||
|
* seq-number
|
||||||
|
*/
|
||||||
|
#endif
|
||||||
__u8 tcp_flags; /* TCP header flags. (tcp[13]) */
|
__u8 tcp_flags; /* TCP header flags. (tcp[13]) */
|
||||||
|
|
||||||
__u8 sacked; /* State flags for SACK. */
|
__u8 sacked; /* State flags for SACK. */
|
||||||
@ -858,6 +1020,12 @@ struct tcp_skb_cb {
|
|||||||
has_rxtstamp:1, /* SKB has a RX timestamp */
|
has_rxtstamp:1, /* SKB has a RX timestamp */
|
||||||
unused:5;
|
unused:5;
|
||||||
__u32 ack_seq; /* Sequence number ACK'd */
|
__u32 ack_seq; /* Sequence number ACK'd */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
union { /* For MPTCP outgoing frames */
|
||||||
|
__u32 path_mask; /* paths that tried to send this skb */
|
||||||
|
__u32 dss[6]; /* DSS options */
|
||||||
|
};
|
||||||
|
#endif
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
/* There is space for up to 24 bytes */
|
/* There is space for up to 24 bytes */
|
||||||
@ -1381,6 +1549,19 @@ static inline int tcp_win_from_space(const struct sock *sk, int space)
|
|||||||
space - (space>>tcp_adv_win_scale);
|
space - (space>>tcp_adv_win_scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
extern struct static_key mptcp_static_key;
|
||||||
|
static inline bool mptcp(const struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return static_key_false(&mptcp_static_key) && tp->mpc;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline bool mptcp(const struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Note: caller must be prepared to deal with negative returns */
|
/* Note: caller must be prepared to deal with negative returns */
|
||||||
static inline int tcp_space(const struct sock *sk)
|
static inline int tcp_space(const struct sock *sk)
|
||||||
{
|
{
|
||||||
@ -1932,6 +2113,32 @@ struct tcp_sock_af_ops {
|
|||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* TCP/MPTCP-specific functions */
|
||||||
|
struct tcp_sock_ops {
|
||||||
|
u32 (*__select_window)(struct sock *sk);
|
||||||
|
u16 (*select_window)(struct sock *sk);
|
||||||
|
void (*select_initial_window)(const struct sock *sk, int __space,
|
||||||
|
__u32 mss, __u32 *rcv_wnd,
|
||||||
|
__u32 *window_clamp, int wscale_ok,
|
||||||
|
__u8 *rcv_wscale, __u32 init_rcv_wnd);
|
||||||
|
int (*select_size)(const struct sock *sk, bool first_skb, bool zc);
|
||||||
|
void (*init_buffer_space)(struct sock *sk);
|
||||||
|
void (*set_rto)(struct sock *sk);
|
||||||
|
bool (*should_expand_sndbuf)(const struct sock *sk);
|
||||||
|
void (*send_fin)(struct sock *sk);
|
||||||
|
bool (*write_xmit)(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||||
|
int push_one, gfp_t gfp);
|
||||||
|
void (*send_active_reset)(struct sock *sk, gfp_t priority);
|
||||||
|
int (*write_wakeup)(struct sock *sk, int mib);
|
||||||
|
void (*retransmit_timer)(struct sock *sk);
|
||||||
|
void (*time_wait)(struct sock *sk, int state, int timeo);
|
||||||
|
void (*cleanup_rbuf)(struct sock *sk, int copied);
|
||||||
|
void (*cwnd_validate)(struct sock *sk, bool is_cwnd_limited);
|
||||||
|
};
|
||||||
|
extern const struct tcp_sock_ops tcp_specific;
|
||||||
|
#endif
|
||||||
|
|
||||||
struct tcp_request_sock_ops {
|
struct tcp_request_sock_ops {
|
||||||
u16 mss_clamp;
|
u16 mss_clamp;
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
@ -1942,12 +2149,26 @@ struct tcp_request_sock_ops {
|
|||||||
const struct sock *sk,
|
const struct sock *sk,
|
||||||
const struct sk_buff *skb);
|
const struct sk_buff *skb);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
int (*init_req)(struct request_sock *req,
|
||||||
|
const struct sock *sk_listener,
|
||||||
|
struct sk_buff *skb,
|
||||||
|
bool want_cookie);
|
||||||
|
#else
|
||||||
void (*init_req)(struct request_sock *req,
|
void (*init_req)(struct request_sock *req,
|
||||||
const struct sock *sk_listener,
|
const struct sock *sk_listener,
|
||||||
struct sk_buff *skb);
|
struct sk_buff *skb);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_SYN_COOKIES
|
#ifdef CONFIG_SYN_COOKIES
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
__u32 (*cookie_init_seq)(struct request_sock *req, const struct sock *sk,
|
||||||
|
const struct sk_buff *skb, __u16 *mss);
|
||||||
|
#else
|
||||||
__u32 (*cookie_init_seq)(const struct sk_buff *skb,
|
__u32 (*cookie_init_seq)(const struct sk_buff *skb,
|
||||||
__u16 *mss);
|
__u16 *mss);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
|
struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
|
||||||
const struct request_sock *req);
|
const struct request_sock *req);
|
||||||
@ -1965,18 +2186,36 @@ extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_SYN_COOKIES
|
#ifdef CONFIG_SYN_COOKIES
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
|
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
|
||||||
|
struct request_sock *req,
|
||||||
const struct sock *sk, struct sk_buff *skb,
|
const struct sock *sk, struct sk_buff *skb,
|
||||||
__u16 *mss)
|
__u16 *mss)
|
||||||
{
|
|
||||||
tcp_synq_overflow(sk);
|
|
||||||
__NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
|
|
||||||
return ops->cookie_init_seq(skb, mss);
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
|
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
|
||||||
const struct sock *sk, struct sk_buff *skb,
|
const struct sock *sk, struct sk_buff *skb,
|
||||||
__u16 *mss)
|
__u16 *mss)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
tcp_synq_overflow(sk);
|
||||||
|
__NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
return ops->cookie_init_seq(req, sk, skb, mss);
|
||||||
|
#else
|
||||||
|
return ops->cookie_init_seq(skb, mss);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
|
||||||
|
struct request_sock *req,
|
||||||
|
const struct sock *sk, struct sk_buff *skb,
|
||||||
|
__u16 *mss)
|
||||||
|
#else
|
||||||
|
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
|
||||||
|
const struct sock *sk, struct sk_buff *skb,
|
||||||
|
__u16 *mss)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,9 @@ enum {
|
|||||||
TCP_LISTEN,
|
TCP_LISTEN,
|
||||||
TCP_CLOSING, /* Now a valid state */
|
TCP_CLOSING, /* Now a valid state */
|
||||||
TCP_NEW_SYN_RECV,
|
TCP_NEW_SYN_RECV,
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
TCP_RST_WAIT,
|
||||||
|
#endif
|
||||||
|
|
||||||
TCP_MAX_STATES /* Leave at the end! */
|
TCP_MAX_STATES /* Leave at the end! */
|
||||||
};
|
};
|
||||||
@ -47,6 +50,9 @@ enum {
|
|||||||
TCPF_LISTEN = (1 << TCP_LISTEN),
|
TCPF_LISTEN = (1 << TCP_LISTEN),
|
||||||
TCPF_CLOSING = (1 << TCP_CLOSING),
|
TCPF_CLOSING = (1 << TCP_CLOSING),
|
||||||
TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
|
TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
TCPF_RST_WAIT = (1 << TCP_RST_WAIT),
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _LINUX_TCP_STATES_H */
|
#endif /* _LINUX_TCP_STATES_H */
|
||||||
|
@ -58,6 +58,10 @@ ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp,
|
|||||||
|
|
||||||
/* address family specific functions */
|
/* address family specific functions */
|
||||||
extern const struct inet_connection_sock_af_ops ipv4_specific;
|
extern const struct inet_connection_sock_af_ops ipv4_specific;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
extern const struct inet_connection_sock_af_ops ipv6_mapped;
|
||||||
|
extern const struct inet_connection_sock_af_ops ipv6_specific;
|
||||||
|
#endif
|
||||||
|
|
||||||
void inet6_destroy_sock(struct sock *sk);
|
void inet6_destroy_sock(struct sock *sk);
|
||||||
|
|
||||||
|
@ -10,6 +10,9 @@
|
|||||||
#include <linux/tracepoint.h>
|
#include <linux/tracepoint.h>
|
||||||
#include <net/ipv6.h>
|
#include <net/ipv6.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#endif
|
||||||
#include <linux/sock_diag.h>
|
#include <linux/sock_diag.h>
|
||||||
|
|
||||||
#define TP_STORE_V4MAPPED(__entry, saddr, daddr) \
|
#define TP_STORE_V4MAPPED(__entry, saddr, daddr) \
|
||||||
@ -178,6 +181,15 @@ DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
|
|||||||
TP_ARGS(sk)
|
TP_ARGS(sk)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
DEFINE_EVENT(tcp_event_sk_skb, mptcp_retransmit,
|
||||||
|
|
||||||
|
TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
|
||||||
|
|
||||||
|
TP_ARGS(sk, skb)
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
|
||||||
TRACE_EVENT(tcp_retransmit_synack,
|
TRACE_EVENT(tcp_retransmit_synack,
|
||||||
|
|
||||||
TP_PROTO(const struct sock *sk, const struct request_sock *req),
|
TP_PROTO(const struct sock *sk, const struct request_sock *req),
|
||||||
@ -245,6 +257,9 @@ TRACE_EVENT(tcp_probe,
|
|||||||
__field(__u32, srtt)
|
__field(__u32, srtt)
|
||||||
__field(__u32, rcv_wnd)
|
__field(__u32, rcv_wnd)
|
||||||
__field(__u64, sock_cookie)
|
__field(__u64, sock_cookie)
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
__field(__u8, mptcp)
|
||||||
|
#endif
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
@ -271,13 +286,25 @@ TRACE_EVENT(tcp_probe,
|
|||||||
__entry->ssthresh = tcp_current_ssthresh(sk);
|
__entry->ssthresh = tcp_current_ssthresh(sk);
|
||||||
__entry->srtt = tp->srtt_us >> 3;
|
__entry->srtt = tp->srtt_us >> 3;
|
||||||
__entry->sock_cookie = sock_gen_cookie(sk);
|
__entry->sock_cookie = sock_gen_cookie(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
__entry->mptcp = mptcp(tp) ? tp->mptcp->path_index : 0;
|
||||||
|
#endif
|
||||||
),
|
),
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx mptcp=%d",
|
||||||
|
__entry->saddr, __entry->daddr, __entry->mark,
|
||||||
|
__entry->data_len, __entry->snd_nxt, __entry->snd_una,
|
||||||
|
__entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
|
||||||
|
__entry->srtt, __entry->rcv_wnd, __entry->sock_cookie,
|
||||||
|
__entry->mptcp)
|
||||||
|
#else
|
||||||
TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx",
|
TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx",
|
||||||
__entry->saddr, __entry->daddr, __entry->mark,
|
__entry->saddr, __entry->daddr, __entry->mark,
|
||||||
__entry->data_len, __entry->snd_nxt, __entry->snd_una,
|
__entry->data_len, __entry->snd_nxt, __entry->snd_una,
|
||||||
__entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
|
__entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
|
||||||
__entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
|
__entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
|
||||||
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
#endif /* _TRACE_TCP_H */
|
#endif /* _TRACE_TCP_H */
|
||||||
|
@ -2740,6 +2740,9 @@ enum {
|
|||||||
BPF_TCP_LISTEN,
|
BPF_TCP_LISTEN,
|
||||||
BPF_TCP_CLOSING, /* Now a valid state */
|
BPF_TCP_CLOSING, /* Now a valid state */
|
||||||
BPF_TCP_NEW_SYN_RECV,
|
BPF_TCP_NEW_SYN_RECV,
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
BPF_TCP_RST_WAIT,
|
||||||
|
#endif
|
||||||
|
|
||||||
BPF_TCP_MAX_STATES /* Leave at the end! */
|
BPF_TCP_MAX_STATES /* Leave at the end! */
|
||||||
};
|
};
|
||||||
|
@ -132,6 +132,11 @@ enum net_device_flags {
|
|||||||
#define IFF_ECHO IFF_ECHO
|
#define IFF_ECHO IFF_ECHO
|
||||||
#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
|
#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#define IFF_NOMULTIPATH 0x80000 /* Disable for MPTCP */
|
||||||
|
#define IFF_MPBACKUP 0x100000 /* Use as backup path for MPTCP */
|
||||||
|
#endif
|
||||||
|
|
||||||
#define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
|
#define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
|
||||||
IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
|
IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
|
||||||
|
|
||||||
|
149
include/uapi/linux/mptcp.h
Executable file
149
include/uapi/linux/mptcp.h
Executable file
@ -0,0 +1,149 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||||
|
/*
|
||||||
|
* Netlink API for Multipath TCP
|
||||||
|
*
|
||||||
|
* Author: Gregory Detal <gregory.detal@tessares.net>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _LINUX_MPTCP_H
|
||||||
|
#define _LINUX_MPTCP_H
|
||||||
|
|
||||||
|
#define MPTCP_GENL_NAME "mptcp"
|
||||||
|
#define MPTCP_GENL_EV_GRP_NAME "mptcp_events"
|
||||||
|
#define MPTCP_GENL_CMD_GRP_NAME "mptcp_commands"
|
||||||
|
#define MPTCP_GENL_VER 0x1
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ATTR types defined for MPTCP
|
||||||
|
*/
|
||||||
|
enum {
|
||||||
|
MPTCP_ATTR_UNSPEC = 0,
|
||||||
|
|
||||||
|
MPTCP_ATTR_TOKEN, /* u32 */
|
||||||
|
MPTCP_ATTR_FAMILY, /* u16 */
|
||||||
|
MPTCP_ATTR_LOC_ID, /* u8 */
|
||||||
|
MPTCP_ATTR_REM_ID, /* u8 */
|
||||||
|
MPTCP_ATTR_SADDR4, /* u32 */
|
||||||
|
MPTCP_ATTR_SADDR6, /* struct in6_addr */
|
||||||
|
MPTCP_ATTR_DADDR4, /* u32 */
|
||||||
|
MPTCP_ATTR_DADDR6, /* struct in6_addr */
|
||||||
|
MPTCP_ATTR_SPORT, /* u16 */
|
||||||
|
MPTCP_ATTR_DPORT, /* u16 */
|
||||||
|
MPTCP_ATTR_BACKUP, /* u8 */
|
||||||
|
MPTCP_ATTR_ERROR, /* u8 */
|
||||||
|
MPTCP_ATTR_FLAGS, /* u16 */
|
||||||
|
MPTCP_ATTR_TIMEOUT, /* u32 */
|
||||||
|
MPTCP_ATTR_IF_IDX, /* s32 */
|
||||||
|
|
||||||
|
__MPTCP_ATTR_AFTER_LAST
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Events generated by MPTCP:
|
||||||
|
* - MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6,
|
||||||
|
* sport, dport
|
||||||
|
* A new connection has been created. It is the good time to allocate
|
||||||
|
* memory and send ADD_ADDR if needed. Depending on the traffic-patterns
|
||||||
|
* it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent.
|
||||||
|
*
|
||||||
|
* - MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6,
|
||||||
|
* sport, dport
|
||||||
|
* A connection is established (can start new subflows).
|
||||||
|
*
|
||||||
|
* - MPTCP_EVENT_CLOSED: token
|
||||||
|
* A connection has stopped.
|
||||||
|
*
|
||||||
|
* - MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport]
|
||||||
|
* A new address has been announced by the peer.
|
||||||
|
*
|
||||||
|
* - MPTCP_EVENT_REMOVED: token, rem_id
|
||||||
|
* An address has been lost by the peer.
|
||||||
|
*
|
||||||
|
* - MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6,
|
||||||
|
* daddr4 | daddr6, sport, dport, backup,
|
||||||
|
* if_idx [, error]
|
||||||
|
* A new subflow has been established. 'error' should not be set.
|
||||||
|
*
|
||||||
|
* - MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6,
|
||||||
|
* sport, dport, backup, if_idx [, error]
|
||||||
|
* A subflow has been closed. An error (copy of sk_err) could be set if an
|
||||||
|
* error has been detected for this subflow.
|
||||||
|
*
|
||||||
|
* - MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6,
|
||||||
|
* sport, dport, backup, if_idx [, error]
|
||||||
|
* The priority of a subflow has changed. 'error' should not be set.
|
||||||
|
*
|
||||||
|
* Commands for MPTCP:
|
||||||
|
* - MPTCP_CMD_ANNOUNCE: token, loc_id, family, saddr4 | saddr6 [, sport]
|
||||||
|
* Announce a new address to the peer.
|
||||||
|
*
|
||||||
|
* - MPTCP_CMD_REMOVE: token, loc_id
|
||||||
|
* Announce that an address has been lost to the peer.
|
||||||
|
*
|
||||||
|
* - MPTCP_CMD_SUB_CREATE: token, family, loc_id, rem_id, [saddr4 | saddr6,
|
||||||
|
* daddr4 | daddr6, dport [, sport, backup, if_idx]]
|
||||||
|
* Create a new subflow.
|
||||||
|
*
|
||||||
|
* - MPTCP_CMD_SUB_DESTROY: token, family, saddr4 | saddr6, daddr4 | daddr6,
|
||||||
|
* sport, dport
|
||||||
|
* Close a subflow.
|
||||||
|
*
|
||||||
|
* - MPTCP_CMD_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6,
|
||||||
|
* sport, dport, backup
|
||||||
|
* Change the priority of a subflow.
|
||||||
|
*
|
||||||
|
* - MPTCP_CMD_SET_FILTER: flags
|
||||||
|
* Set the filter on events. Set MPTCPF_* flags to only receive specific
|
||||||
|
* events. Default is to receive all events.
|
||||||
|
*
|
||||||
|
* - MPTCP_CMD_EXIST: token
|
||||||
|
* Check if this token is linked to an existing socket.
|
||||||
|
*/
|
||||||
|
enum {
|
||||||
|
MPTCP_CMD_UNSPEC = 0,
|
||||||
|
|
||||||
|
MPTCP_EVENT_CREATED,
|
||||||
|
MPTCP_EVENT_ESTABLISHED,
|
||||||
|
MPTCP_EVENT_CLOSED,
|
||||||
|
|
||||||
|
MPTCP_CMD_ANNOUNCE,
|
||||||
|
MPTCP_CMD_REMOVE,
|
||||||
|
MPTCP_EVENT_ANNOUNCED,
|
||||||
|
MPTCP_EVENT_REMOVED,
|
||||||
|
|
||||||
|
MPTCP_CMD_SUB_CREATE,
|
||||||
|
MPTCP_CMD_SUB_DESTROY,
|
||||||
|
MPTCP_EVENT_SUB_ESTABLISHED,
|
||||||
|
MPTCP_EVENT_SUB_CLOSED,
|
||||||
|
|
||||||
|
MPTCP_CMD_SUB_PRIORITY,
|
||||||
|
MPTCP_EVENT_SUB_PRIORITY,
|
||||||
|
|
||||||
|
MPTCP_CMD_SET_FILTER,
|
||||||
|
|
||||||
|
MPTCP_CMD_EXIST,
|
||||||
|
|
||||||
|
__MPTCP_CMD_AFTER_LAST
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MPTCP_CMD_MAX (__MPTCP_CMD_AFTER_LAST - 1)
|
||||||
|
|
||||||
|
enum {
|
||||||
|
MPTCPF_EVENT_CREATED = (1 << 1),
|
||||||
|
MPTCPF_EVENT_ESTABLISHED = (1 << 2),
|
||||||
|
MPTCPF_EVENT_CLOSED = (1 << 3),
|
||||||
|
MPTCPF_EVENT_ANNOUNCED = (1 << 4),
|
||||||
|
MPTCPF_EVENT_REMOVED = (1 << 5),
|
||||||
|
MPTCPF_EVENT_SUB_ESTABLISHED = (1 << 6),
|
||||||
|
MPTCPF_EVENT_SUB_CLOSED = (1 << 7),
|
||||||
|
MPTCPF_EVENT_SUB_PRIORITY = (1 << 8),
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _LINUX_MPTCP_H */
|
@ -18,7 +18,17 @@
|
|||||||
#ifndef _UAPI_LINUX_TCP_H
|
#ifndef _UAPI_LINUX_TCP_H
|
||||||
#define _UAPI_LINUX_TCP_H
|
#define _UAPI_LINUX_TCP_H
|
||||||
|
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#ifndef __KERNEL__
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#endif
|
||||||
|
#include <linux/in.h>
|
||||||
|
#include <linux/in6.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#endif
|
||||||
#include <asm/byteorder.h>
|
#include <asm/byteorder.h>
|
||||||
#include <linux/socket.h>
|
#include <linux/socket.h>
|
||||||
|
|
||||||
@ -130,6 +140,14 @@ enum {
|
|||||||
#define TCP_REPAIR_ON 1
|
#define TCP_REPAIR_ON 1
|
||||||
#define TCP_REPAIR_OFF 0
|
#define TCP_REPAIR_OFF 0
|
||||||
#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
|
#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#define MPTCP_ENABLED 42
|
||||||
|
#define MPTCP_SCHEDULER 43
|
||||||
|
#define MPTCP_PATH_MANAGER 44
|
||||||
|
#define MPTCP_INFO 45
|
||||||
|
|
||||||
|
#define MPTCP_INFO_FLAG_SAVE_MASTER 0x01
|
||||||
|
#endif
|
||||||
|
|
||||||
struct tcp_repair_opt {
|
struct tcp_repair_opt {
|
||||||
__u32 opt_code;
|
__u32 opt_code;
|
||||||
@ -268,6 +286,55 @@ enum {
|
|||||||
TCP_NLA_REORD_SEEN, /* reordering events seen */
|
TCP_NLA_REORD_SEEN, /* reordering events seen */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct mptcp_meta_info {
|
||||||
|
__u8 mptcpi_state;
|
||||||
|
__u8 mptcpi_retransmits;
|
||||||
|
__u8 mptcpi_probes;
|
||||||
|
__u8 mptcpi_backoff;
|
||||||
|
|
||||||
|
__u32 mptcpi_rto;
|
||||||
|
__u32 mptcpi_unacked;
|
||||||
|
|
||||||
|
/* Times. */
|
||||||
|
__u32 mptcpi_last_data_sent;
|
||||||
|
__u32 mptcpi_last_data_recv;
|
||||||
|
__u32 mptcpi_last_ack_recv;
|
||||||
|
|
||||||
|
__u32 mptcpi_total_retrans;
|
||||||
|
|
||||||
|
__u64 mptcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
|
||||||
|
__u64 mptcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct mptcp_sub_info {
|
||||||
|
union {
|
||||||
|
struct sockaddr src;
|
||||||
|
struct sockaddr_in src_v4;
|
||||||
|
struct sockaddr_in6 src_v6;
|
||||||
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct sockaddr dst;
|
||||||
|
struct sockaddr_in dst_v4;
|
||||||
|
struct sockaddr_in6 dst_v6;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct mptcp_info {
|
||||||
|
__u32 tcp_info_len; /* Length of each struct tcp_info in subflows pointer */
|
||||||
|
__u32 sub_len; /* Total length of memory pointed to by subflows pointer */
|
||||||
|
__u32 meta_len; /* Length of memory pointed to by meta_info */
|
||||||
|
__u32 sub_info_len; /* Length of each struct mptcp_sub_info in subflow_info pointer */
|
||||||
|
__u32 total_sub_info_len; /* Total length of memory pointed to by subflow_info */
|
||||||
|
|
||||||
|
struct mptcp_meta_info *meta_info;
|
||||||
|
struct tcp_info *initial;
|
||||||
|
struct tcp_info *subflows; /* Pointer to array of tcp_info structs */
|
||||||
|
struct mptcp_sub_info *subflow_info;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
/* for TCP_MD5SIG socket option */
|
/* for TCP_MD5SIG socket option */
|
||||||
#define TCP_MD5SIG_MAXKEYLEN 80
|
#define TCP_MD5SIG_MAXKEYLEN 80
|
||||||
|
|
||||||
|
@ -89,6 +89,9 @@ if INET
|
|||||||
source "net/ipv4/Kconfig"
|
source "net/ipv4/Kconfig"
|
||||||
source "net/ipv6/Kconfig"
|
source "net/ipv6/Kconfig"
|
||||||
source "net/netlabel/Kconfig"
|
source "net/netlabel/Kconfig"
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
source "net/mptcp/Kconfig"
|
||||||
|
#endif
|
||||||
|
|
||||||
endif # if INET
|
endif # if INET
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ obj-$(CONFIG_TLS) += tls/
|
|||||||
obj-$(CONFIG_XFRM) += xfrm/
|
obj-$(CONFIG_XFRM) += xfrm/
|
||||||
obj-$(CONFIG_UNIX_SCM) += unix/
|
obj-$(CONFIG_UNIX_SCM) += unix/
|
||||||
obj-$(CONFIG_NET) += ipv6/
|
obj-$(CONFIG_NET) += ipv6/
|
||||||
|
obj-$(CONFIG_MPTCP) += mptcp/
|
||||||
obj-$(CONFIG_BPFILTER) += bpfilter/
|
obj-$(CONFIG_BPFILTER) += bpfilter/
|
||||||
obj-$(CONFIG_PACKET) += packet/
|
obj-$(CONFIG_PACKET) += packet/
|
||||||
obj-$(CONFIG_NET_KEY) += key/
|
obj-$(CONFIG_NET_KEY) += key/
|
||||||
|
@ -7686,7 +7686,11 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
|
|||||||
|
|
||||||
dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
|
dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
|
||||||
IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
|
IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
|
||||||
IFF_AUTOMEDIA)) |
|
IFF_AUTOMEDIA
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
| IFF_NOMULTIPATH | IFF_MPBACKUP
|
||||||
|
#endif
|
||||||
|
)) |
|
||||||
(dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
|
(dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
|
||||||
IFF_ALLMULTI));
|
IFF_ALLMULTI));
|
||||||
|
|
||||||
|
@ -548,7 +548,10 @@ static inline void skb_drop_fraglist(struct sk_buff *skb)
|
|||||||
skb_drop_list(&skb_shinfo(skb)->frag_list);
|
skb_drop_list(&skb_shinfo(skb)->frag_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void skb_clone_fraglist(struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void skb_clone_fraglist(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct sk_buff *list;
|
struct sk_buff *list;
|
||||||
|
|
||||||
|
@ -140,6 +140,11 @@
|
|||||||
|
|
||||||
#include <trace/events/sock.h>
|
#include <trace/events/sock.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/inet_common.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
#include <net/busy_poll.h>
|
#include <net/busy_poll.h>
|
||||||
|
|
||||||
@ -405,9 +410,15 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
|||||||
struct sk_buff_head *list = &sk->sk_receive_queue;
|
struct sk_buff_head *list = &sk->sk_receive_queue;
|
||||||
|
|
||||||
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
|
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
|
||||||
atomic_inc(&sk->sk_drops);
|
if (sk->sk_rcvbuf < sysctl_rmem_max) {
|
||||||
trace_sock_rcvqueue_full(sk, skb);
|
/* increase sk_rcvbuf twice */
|
||||||
return -ENOMEM;
|
sk->sk_rcvbuf = min(sk->sk_rcvbuf * 2, (int)sysctl_rmem_max);
|
||||||
|
}
|
||||||
|
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
|
||||||
|
atomic_inc(&sk->sk_drops);
|
||||||
|
trace_sock_rcvqueue_full(sk, skb);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
|
if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
|
||||||
@ -1429,6 +1440,23 @@ lenout:
|
|||||||
*/
|
*/
|
||||||
static inline void sock_lock_init(struct sock *sk)
|
static inline void sock_lock_init(struct sock *sk)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* Reclassify the lock-class for subflows */
|
||||||
|
if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
|
||||||
|
if (mptcp(tcp_sk(sk)) || tcp_sk(sk)->is_master_sk) {
|
||||||
|
sock_lock_init_class_and_name(sk, meta_slock_key_name,
|
||||||
|
&meta_slock_key,
|
||||||
|
meta_key_name,
|
||||||
|
&meta_key);
|
||||||
|
|
||||||
|
/* We don't yet have the mptcp-point.
|
||||||
|
* Thus we still need inet_sock_destruct
|
||||||
|
*/
|
||||||
|
sk->sk_destruct = inet_sock_destruct;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (sk->sk_kern_sock)
|
if (sk->sk_kern_sock)
|
||||||
sock_lock_init_class_and_name(
|
sock_lock_init_class_and_name(
|
||||||
sk,
|
sk,
|
||||||
@ -1478,7 +1506,16 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
|
|||||||
if (!sk)
|
if (!sk)
|
||||||
return sk;
|
return sk;
|
||||||
if (want_init_on_alloc(priority))
|
if (want_init_on_alloc(priority))
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
{
|
||||||
|
if (prot->clear_sk)
|
||||||
|
prot->clear_sk(sk, prot->obj_size);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
sk_prot_clear_nulls(sk, prot->obj_size);
|
sk_prot_clear_nulls(sk, prot->obj_size);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} else
|
} else
|
||||||
sk = kmalloc(prot->obj_size, priority);
|
sk = kmalloc(prot->obj_size, priority);
|
||||||
|
|
||||||
@ -1708,6 +1745,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
|
|||||||
atomic_set(&newsk->sk_zckey, 0);
|
atomic_set(&newsk->sk_zckey, 0);
|
||||||
|
|
||||||
sock_reset_flag(newsk, SOCK_DONE);
|
sock_reset_flag(newsk, SOCK_DONE);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
sock_reset_flag(newsk, SOCK_MPTCP);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* sk->sk_memcg will be populated at accept() time */
|
/* sk->sk_memcg will be populated at accept() time */
|
||||||
newsk->sk_memcg = NULL;
|
newsk->sk_memcg = NULL;
|
||||||
|
@ -681,6 +681,51 @@ config TCP_CONG_BBR
|
|||||||
bufferbloat, policers, or AQM schemes that do not provide a delay
|
bufferbloat, policers, or AQM schemes that do not provide a delay
|
||||||
signal. It requires the fq ("Fair Queue") pacing packet scheduler.
|
signal. It requires the fq ("Fair Queue") pacing packet scheduler.
|
||||||
|
|
||||||
|
config TCP_CONG_LIA
|
||||||
|
tristate "MPTCP Linked Increase"
|
||||||
|
depends on MPTCP
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
MultiPath TCP Linked Increase Congestion Control
|
||||||
|
To enable it, just put 'lia' in tcp_congestion_control
|
||||||
|
|
||||||
|
config TCP_CONG_OLIA
|
||||||
|
tristate "MPTCP Opportunistic Linked Increase"
|
||||||
|
depends on MPTCP
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
MultiPath TCP Opportunistic Linked Increase Congestion Control
|
||||||
|
To enable it, just put 'olia' in tcp_congestion_control
|
||||||
|
|
||||||
|
config TCP_CONG_WVEGAS
|
||||||
|
tristate "MPTCP WVEGAS CONGESTION CONTROL"
|
||||||
|
depends on MPTCP
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
wVegas congestion control for MPTCP
|
||||||
|
To enable it, just put 'wvegas' in tcp_congestion_control
|
||||||
|
|
||||||
|
config TCP_CONG_BALIA
|
||||||
|
tristate "MPTCP BALIA CONGESTION CONTROL"
|
||||||
|
depends on MPTCP
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
Multipath TCP Balanced Linked Adaptation Congestion Control
|
||||||
|
To enable it, just put 'balia' in tcp_congestion_control
|
||||||
|
|
||||||
|
config TCP_CONG_MCTCPDESYNC
|
||||||
|
tristate "DESYNCHRONIZED MCTCP CONGESTION CONTROL (EXPERIMENTAL)"
|
||||||
|
depends on MPTCP
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
Desynchronized MultiChannel TCP Congestion Control. This is experimental
|
||||||
|
code that only supports single path and must have set mptcp_ndiffports
|
||||||
|
larger than one.
|
||||||
|
To enable it, just put 'mctcpdesync' in tcp_congestion_control
|
||||||
|
For further details see:
|
||||||
|
http://ieeexplore.ieee.org/abstract/document/6911722/
|
||||||
|
https://doi.org/10.1016/j.comcom.2015.07.010
|
||||||
|
|
||||||
choice
|
choice
|
||||||
prompt "Default TCP congestion control"
|
prompt "Default TCP congestion control"
|
||||||
default DEFAULT_CUBIC
|
default DEFAULT_CUBIC
|
||||||
@ -718,6 +763,21 @@ choice
|
|||||||
config DEFAULT_BBR
|
config DEFAULT_BBR
|
||||||
bool "BBR" if TCP_CONG_BBR=y
|
bool "BBR" if TCP_CONG_BBR=y
|
||||||
|
|
||||||
|
config DEFAULT_LIA
|
||||||
|
bool "Lia" if TCP_CONG_LIA=y
|
||||||
|
|
||||||
|
config DEFAULT_OLIA
|
||||||
|
bool "Olia" if TCP_CONG_OLIA=y
|
||||||
|
|
||||||
|
config DEFAULT_WVEGAS
|
||||||
|
bool "Wvegas" if TCP_CONG_WVEGAS=y
|
||||||
|
|
||||||
|
config DEFAULT_BALIA
|
||||||
|
bool "Balia" if TCP_CONG_BALIA=y
|
||||||
|
|
||||||
|
config DEFAULT_MCTCPDESYNC
|
||||||
|
bool "Mctcpdesync (EXPERIMENTAL)" if TCP_CONG_MCTCPDESYNC=y
|
||||||
|
|
||||||
config DEFAULT_RENO
|
config DEFAULT_RENO
|
||||||
bool "Reno"
|
bool "Reno"
|
||||||
endchoice
|
endchoice
|
||||||
@ -738,6 +798,10 @@ config DEFAULT_TCP_CONG
|
|||||||
default "vegas" if DEFAULT_VEGAS
|
default "vegas" if DEFAULT_VEGAS
|
||||||
default "westwood" if DEFAULT_WESTWOOD
|
default "westwood" if DEFAULT_WESTWOOD
|
||||||
default "veno" if DEFAULT_VENO
|
default "veno" if DEFAULT_VENO
|
||||||
|
default "lia" if DEFAULT_LIA
|
||||||
|
default "olia" if DEFAULT_OLIA
|
||||||
|
default "wvegas" if DEFAULT_WVEGAS
|
||||||
|
default "balia" if DEFAULT_BALIA
|
||||||
default "reno" if DEFAULT_RENO
|
default "reno" if DEFAULT_RENO
|
||||||
default "dctcp" if DEFAULT_DCTCP
|
default "dctcp" if DEFAULT_DCTCP
|
||||||
default "cdg" if DEFAULT_CDG
|
default "cdg" if DEFAULT_CDG
|
||||||
|
@ -105,6 +105,9 @@
|
|||||||
#include <net/ip_fib.h>
|
#include <net/ip_fib.h>
|
||||||
#include <net/inet_connection_sock.h>
|
#include <net/inet_connection_sock.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#endif
|
||||||
#include <net/udp.h>
|
#include <net/udp.h>
|
||||||
#include <net/udplite.h>
|
#include <net/udplite.h>
|
||||||
#include <net/ping.h>
|
#include <net/ping.h>
|
||||||
@ -121,6 +124,9 @@
|
|||||||
#include <linux/mroute.h>
|
#include <linux/mroute.h>
|
||||||
#endif
|
#endif
|
||||||
#include <net/l3mdev.h>
|
#include <net/l3mdev.h>
|
||||||
|
#ifdef CONFIG_NET_ANALYTICS
|
||||||
|
#include <net/analytics.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <trace/events/sock.h>
|
#include <trace/events/sock.h>
|
||||||
|
|
||||||
@ -167,6 +173,11 @@ void inet_sock_destruct(struct sock *sk)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sock_flag(sk, SOCK_MPTCP))
|
||||||
|
mptcp_disable_static_key();
|
||||||
|
#endif
|
||||||
|
|
||||||
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
|
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
|
||||||
WARN_ON(refcount_read(&sk->sk_wmem_alloc));
|
WARN_ON(refcount_read(&sk->sk_wmem_alloc));
|
||||||
WARN_ON(sk->sk_wmem_queued);
|
WARN_ON(sk->sk_wmem_queued);
|
||||||
@ -261,8 +272,12 @@ EXPORT_SYMBOL(inet_listen);
|
|||||||
* Create an inet socket.
|
* Create an inet socket.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
int inet_create(struct net *net, struct socket *sock, int protocol, int kern)
|
||||||
|
#else
|
||||||
static int inet_create(struct net *net, struct socket *sock, int protocol,
|
static int inet_create(struct net *net, struct socket *sock, int protocol,
|
||||||
int kern)
|
int kern)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
struct inet_protosw *answer;
|
struct inet_protosw *answer;
|
||||||
@ -761,6 +776,24 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags,
|
|||||||
lock_sock(sk2);
|
lock_sock(sk2);
|
||||||
|
|
||||||
sock_rps_record_flow(sk2);
|
sock_rps_record_flow(sk2);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sk2->sk_protocol == IPPROTO_TCP && mptcp(tcp_sk(sk2))) {
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
mptcp_for_each_sub(tcp_sk(sk2)->mpcb, mptcp) {
|
||||||
|
sock_rps_record_flow(mptcp_to_sock(mptcp));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tcp_sk(sk2)->mpcb->master_sk) {
|
||||||
|
struct sock *sk_it = tcp_sk(sk2)->mpcb->master_sk;
|
||||||
|
|
||||||
|
write_lock_bh(&sk_it->sk_callback_lock);
|
||||||
|
sk_it->sk_wq = newsock->wq;
|
||||||
|
sk_it->sk_socket = newsock;
|
||||||
|
write_unlock_bh(&sk_it->sk_callback_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
WARN_ON(!((1 << sk2->sk_state) &
|
WARN_ON(!((1 << sk2->sk_state) &
|
||||||
(TCPF_ESTABLISHED | TCPF_SYN_RECV |
|
(TCPF_ESTABLISHED | TCPF_SYN_RECV |
|
||||||
TCPF_CLOSE_WAIT | TCPF_CLOSE)));
|
TCPF_CLOSE_WAIT | TCPF_CLOSE)));
|
||||||
@ -809,6 +842,9 @@ EXPORT_SYMBOL(inet_getname);
|
|||||||
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
|
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
|
||||||
{
|
{
|
||||||
struct sock *sk = sock->sk;
|
struct sock *sk = sock->sk;
|
||||||
|
#ifdef CONFIG_NET_ANALYTICS
|
||||||
|
int err;
|
||||||
|
#endif
|
||||||
|
|
||||||
sock_rps_record_flow(sk);
|
sock_rps_record_flow(sk);
|
||||||
|
|
||||||
@ -817,7 +853,14 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
|
|||||||
inet_autobind(sk))
|
inet_autobind(sk))
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
|
#ifdef CONFIG_NET_ANALYTICS
|
||||||
|
err = sk->sk_prot->sendmsg(sk, msg, size);
|
||||||
|
net_usr_tx(sk, err);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
#else
|
||||||
return sk->sk_prot->sendmsg(sk, msg, size);
|
return sk->sk_prot->sendmsg(sk, msg, size);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(inet_sendmsg);
|
EXPORT_SYMBOL(inet_sendmsg);
|
||||||
|
|
||||||
@ -853,6 +896,11 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
|
|||||||
flags & ~MSG_DONTWAIT, &addr_len);
|
flags & ~MSG_DONTWAIT, &addr_len);
|
||||||
if (err >= 0)
|
if (err >= 0)
|
||||||
msg->msg_namelen = addr_len;
|
msg->msg_namelen = addr_len;
|
||||||
|
|
||||||
|
#ifdef CONFIG_NET_ANALYTICS
|
||||||
|
net_usr_rx(sk, err);
|
||||||
|
#endif
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(inet_recvmsg);
|
EXPORT_SYMBOL(inet_recvmsg);
|
||||||
@ -1967,6 +2015,10 @@ static int __init inet_init(void)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
ip_init();
|
ip_init();
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* We must initialize MPTCP before TCP. */
|
||||||
|
mptcp_init();
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Initialise per-cpu ipv4 mibs */
|
/* Initialise per-cpu ipv4 mibs */
|
||||||
if (init_ipv4_mibs())
|
if (init_ipv4_mibs())
|
||||||
|
@ -23,6 +23,9 @@
|
|||||||
#include <net/route.h>
|
#include <net/route.h>
|
||||||
#include <net/tcp_states.h>
|
#include <net/tcp_states.h>
|
||||||
#include <net/xfrm.h>
|
#include <net/xfrm.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#endif
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
#include <net/sock_reuseport.h>
|
#include <net/sock_reuseport.h>
|
||||||
#include <net/addrconf.h>
|
#include <net/addrconf.h>
|
||||||
@ -735,9 +738,18 @@ static void reqsk_timer_handler(struct timer_list *t)
|
|||||||
int max_retries, thresh;
|
int max_retries, thresh;
|
||||||
u8 defer_accept;
|
u8 defer_accept;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!is_meta_sk(sk_listener) && inet_sk_state_load(sk_listener) != TCP_LISTEN)
|
||||||
|
#else
|
||||||
if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
|
if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
|
||||||
|
#endif
|
||||||
goto drop;
|
goto drop;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (is_meta_sk(sk_listener) && !mptcp_can_new_subflow(sk_listener))
|
||||||
|
goto drop;
|
||||||
|
#endif
|
||||||
|
|
||||||
max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
|
max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
|
||||||
thresh = max_retries;
|
thresh = max_retries;
|
||||||
/* Normally all the openreqs are young and become mature
|
/* Normally all the openreqs are young and become mature
|
||||||
@ -1028,6 +1040,16 @@ void inet_csk_listen_stop(struct sock *sk)
|
|||||||
*/
|
*/
|
||||||
while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
|
while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
|
||||||
struct sock *child = req->sk;
|
struct sock *child = req->sk;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
bool mutex_taken = false;
|
||||||
|
struct mptcp_cb *mpcb = tcp_sk(child)->mpcb;
|
||||||
|
|
||||||
|
if (is_meta_sk(child)) {
|
||||||
|
WARN_ON(refcount_inc_not_zero(&mpcb->mpcb_refcnt) == 0);
|
||||||
|
mutex_lock(&mpcb->mpcb_mutex);
|
||||||
|
mutex_taken = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
local_bh_disable();
|
local_bh_disable();
|
||||||
bh_lock_sock(child);
|
bh_lock_sock(child);
|
||||||
@ -1038,6 +1060,12 @@ void inet_csk_listen_stop(struct sock *sk)
|
|||||||
reqsk_put(req);
|
reqsk_put(req);
|
||||||
bh_unlock_sock(child);
|
bh_unlock_sock(child);
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mutex_taken) {
|
||||||
|
mutex_unlock(&mpcb->mpcb_mutex);
|
||||||
|
mptcp_mpcb_put(mpcb);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
sock_put(child);
|
sock_put(child);
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
@ -43,6 +43,9 @@
|
|||||||
#include <net/transp_v6.h>
|
#include <net/transp_v6.h>
|
||||||
#endif
|
#endif
|
||||||
#include <net/ip_fib.h>
|
#include <net/ip_fib.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <linux/errqueue.h>
|
#include <linux/errqueue.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
@ -343,6 +346,10 @@ int ip_ra_control(struct sock *sk, unsigned char on,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
|
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (on && !new_ra)
|
||||||
|
return -ENOMEM;
|
||||||
|
#endif
|
||||||
|
|
||||||
mutex_lock(&net->ipv4.ra_mutex);
|
mutex_lock(&net->ipv4.ra_mutex);
|
||||||
for (rap = &net->ipv4.ra_chain;
|
for (rap = &net->ipv4.ra_chain;
|
||||||
@ -655,7 +662,11 @@ static int do_ip_setsockopt(struct sock *sk, int level,
|
|||||||
break;
|
break;
|
||||||
old = rcu_dereference_protected(inet->inet_opt,
|
old = rcu_dereference_protected(inet->inet_opt,
|
||||||
lockdep_sock_is_held(sk));
|
lockdep_sock_is_held(sk));
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (inet->is_icsk && !is_meta_sk(sk)) {
|
||||||
|
#else
|
||||||
if (inet->is_icsk) {
|
if (inet->is_icsk) {
|
||||||
|
#endif
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
#if IS_ENABLED(CONFIG_IPV6)
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
if (sk->sk_family == PF_INET ||
|
if (sk->sk_family == PF_INET ||
|
||||||
@ -749,6 +760,23 @@ static int do_ip_setsockopt(struct sock *sk, int level,
|
|||||||
inet->tos = val;
|
inet->tos = val;
|
||||||
sk->sk_priority = rt_tos2priority(val);
|
sk->sk_priority = rt_tos2priority(val);
|
||||||
sk_dst_reset(sk);
|
sk_dst_reset(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
|
||||||
|
/* Update TOS on mptcp subflow */
|
||||||
|
if (is_meta_sk(sk)) {
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) {
|
||||||
|
struct sock *sk_it = mptcp_to_sock(mptcp);
|
||||||
|
|
||||||
|
if (inet_sk(sk_it)->tos != inet_sk(sk)->tos) {
|
||||||
|
inet_sk(sk_it)->tos = inet_sk(sk)->tos;
|
||||||
|
sk_it->sk_priority = sk->sk_priority;
|
||||||
|
sk_dst_reset(sk_it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case IP_TTL:
|
case IP_TTL:
|
||||||
|
@ -16,6 +16,10 @@
|
|||||||
#include <linux/siphash.h>
|
#include <linux/siphash.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/mptcp_v4.h>
|
||||||
|
#endif
|
||||||
#include <net/secure_seq.h>
|
#include <net/secure_seq.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
#include <net/route.h>
|
#include <net/route.h>
|
||||||
@ -179,7 +183,12 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
|
EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
__u32 cookie_v4_init_sequence(struct request_sock *req, const struct sock *sk,
|
||||||
|
const struct sk_buff *skb, __u16 *mssp)
|
||||||
|
#else
|
||||||
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp)
|
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
const struct iphdr *iph = ip_hdr(skb);
|
const struct iphdr *iph = ip_hdr(skb);
|
||||||
const struct tcphdr *th = tcp_hdr(skb);
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
@ -209,9 +218,27 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
|
|||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
struct sock *child;
|
struct sock *child;
|
||||||
bool own_req;
|
bool own_req;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
int ret;
|
||||||
|
#endif
|
||||||
|
|
||||||
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
|
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
|
||||||
NULL, &own_req);
|
NULL, &own_req);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!child)
|
||||||
|
goto listen_overflow;
|
||||||
|
|
||||||
|
ret = mptcp_check_req_master(sk, child, req, skb, 0, tsoff);
|
||||||
|
if (ret < 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!ret)
|
||||||
|
return tcp_sk(child)->mpcb->master_sk;
|
||||||
|
|
||||||
|
listen_overflow:
|
||||||
|
#endif
|
||||||
|
|
||||||
if (child) {
|
if (child) {
|
||||||
refcount_set(&req->rsk_refcnt, 1);
|
refcount_set(&req->rsk_refcnt, 1);
|
||||||
tcp_sk(child)->tsoffset = tsoff;
|
tcp_sk(child)->tsoffset = tsoff;
|
||||||
@ -289,6 +316,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
{
|
{
|
||||||
struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
|
struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
|
||||||
struct tcp_options_received tcp_opt;
|
struct tcp_options_received tcp_opt;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct mptcp_options_received mopt;
|
||||||
|
#endif
|
||||||
struct inet_request_sock *ireq;
|
struct inet_request_sock *ireq;
|
||||||
struct tcp_request_sock *treq;
|
struct tcp_request_sock *treq;
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
@ -318,7 +348,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
|
|
||||||
/* check for timestamp cookie support */
|
/* check for timestamp cookie support */
|
||||||
memset(&tcp_opt, 0, sizeof(tcp_opt));
|
memset(&tcp_opt, 0, sizeof(tcp_opt));
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
mptcp_init_mp_opt(&mopt);
|
||||||
|
tcp_parse_options(sock_net(sk), skb, &tcp_opt, &mopt, 0, NULL, NULL);
|
||||||
|
#else
|
||||||
tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
|
tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
|
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
|
||||||
tsoff = secure_tcp_ts_off(sock_net(sk),
|
tsoff = secure_tcp_ts_off(sock_net(sk),
|
||||||
@ -331,7 +366,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ret = NULL;
|
ret = NULL;
|
||||||
req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mopt.saw_mpc)
|
||||||
|
req = inet_reqsk_alloc(&mptcp_request_sock_ops, sk, false); /* for safety */
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
|
||||||
if (!req)
|
if (!req)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
@ -352,6 +392,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
ireq->sack_ok = tcp_opt.sack_ok;
|
ireq->sack_ok = tcp_opt.sack_ok;
|
||||||
ireq->wscale_ok = tcp_opt.wscale_ok;
|
ireq->wscale_ok = tcp_opt.wscale_ok;
|
||||||
ireq->tstamp_ok = tcp_opt.saw_tstamp;
|
ireq->tstamp_ok = tcp_opt.saw_tstamp;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
ireq->mptcp_rqsk = 0;
|
||||||
|
ireq->saw_mpc = 0;
|
||||||
|
#endif
|
||||||
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
|
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
|
||||||
treq->snt_synack = 0;
|
treq->snt_synack = 0;
|
||||||
treq->tfo_listener = false;
|
treq->tfo_listener = false;
|
||||||
@ -360,6 +404,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
|
|
||||||
ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
|
ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mopt.saw_mpc)
|
||||||
|
mptcp_cookies_reqsk_init(req, &mopt, skb);
|
||||||
|
#endif
|
||||||
/* We throwed the options of the initial SYN away, so we hope
|
/* We throwed the options of the initial SYN away, so we hope
|
||||||
* the ACK carries the same options again (see RFC1122 4.2.3.8)
|
* the ACK carries the same options again (see RFC1122 4.2.3.8)
|
||||||
*/
|
*/
|
||||||
@ -398,10 +446,18 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
(req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
|
(req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
|
||||||
req->rsk_window_clamp = full_space;
|
req->rsk_window_clamp = full_space;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->select_initial_window(sk, full_space, req->mss,
|
||||||
|
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
|
||||||
|
ireq->wscale_ok, &rcv_wscale,
|
||||||
|
dst_metric(&rt->dst, RTAX_INITRWND));
|
||||||
|
|
||||||
|
#else
|
||||||
tcp_select_initial_window(sk, full_space, req->mss,
|
tcp_select_initial_window(sk, full_space, req->mss,
|
||||||
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
|
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
|
||||||
ireq->wscale_ok, &rcv_wscale,
|
ireq->wscale_ok, &rcv_wscale,
|
||||||
dst_metric(&rt->dst, RTAX_INITRWND));
|
dst_metric(&rt->dst, RTAX_INITRWND));
|
||||||
|
#endif
|
||||||
|
|
||||||
ireq->rcv_wscale = rcv_wscale;
|
ireq->rcv_wscale = rcv_wscale;
|
||||||
ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
|
ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
|
||||||
|
444
net/ipv4/tcp.c
444
net/ipv4/tcp.c
@ -274,6 +274,9 @@
|
|||||||
|
|
||||||
#include <net/icmp.h>
|
#include <net/icmp.h>
|
||||||
#include <net/inet_common.h>
|
#include <net/inet_common.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#endif
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
#include <net/xfrm.h>
|
#include <net/xfrm.h>
|
||||||
#include <net/ip.h>
|
#include <net/ip.h>
|
||||||
@ -404,6 +407,30 @@ static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
|
|||||||
return rate64;
|
return rate64;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
static int select_size(const struct sock *sk, bool first_skb, bool zc);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
const struct tcp_sock_ops tcp_specific = {
|
||||||
|
.__select_window = __tcp_select_window,
|
||||||
|
.select_window = tcp_select_window,
|
||||||
|
.select_initial_window = tcp_select_initial_window,
|
||||||
|
.select_size = select_size,
|
||||||
|
.init_buffer_space = tcp_init_buffer_space,
|
||||||
|
.set_rto = tcp_set_rto,
|
||||||
|
.should_expand_sndbuf = tcp_should_expand_sndbuf,
|
||||||
|
.send_fin = tcp_send_fin,
|
||||||
|
.write_xmit = tcp_write_xmit,
|
||||||
|
.send_active_reset = tcp_send_active_reset,
|
||||||
|
.write_wakeup = tcp_write_wakeup,
|
||||||
|
.retransmit_timer = tcp_retransmit_timer,
|
||||||
|
.time_wait = tcp_time_wait,
|
||||||
|
.cleanup_rbuf = tcp_cleanup_rbuf,
|
||||||
|
.cwnd_validate = tcp_cwnd_validate,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Address-family independent initialization for a tcp_sock.
|
/* Address-family independent initialization for a tcp_sock.
|
||||||
*
|
*
|
||||||
* NOTE: A lot of things set to zero explicitly by call to
|
* NOTE: A lot of things set to zero explicitly by call to
|
||||||
@ -457,6 +484,12 @@ void tcp_init_sock(struct sock *sk)
|
|||||||
sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
|
sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
|
||||||
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
|
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops = &tcp_specific;
|
||||||
|
|
||||||
|
/* Initialize MPTCP-specific stuff and function-pointers */
|
||||||
|
mptcp_init_tcp_sock(sk);
|
||||||
|
#endif
|
||||||
sk_sockets_allocated_inc(sk);
|
sk_sockets_allocated_inc(sk);
|
||||||
sk->sk_route_forced_caps = NETIF_F_GSO;
|
sk->sk_route_forced_caps = NETIF_F_GSO;
|
||||||
}
|
}
|
||||||
@ -471,7 +504,11 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
|
|||||||
tcp_init_metrics(sk);
|
tcp_init_metrics(sk);
|
||||||
tcp_call_bpf(sk, bpf_op, 0, NULL);
|
tcp_call_bpf(sk, bpf_op, 0, NULL);
|
||||||
tcp_init_congestion_control(sk);
|
tcp_init_congestion_control(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_sk(sk)->ops->init_buffer_space(sk);
|
||||||
|
#else
|
||||||
tcp_init_buffer_space(sk);
|
tcp_init_buffer_space(sk);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
|
static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
|
||||||
@ -811,6 +848,16 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
|
|||||||
|
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tcp_sk(sk))) {
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) {
|
||||||
|
sock_rps_record_flow(mptcp_to_sock(mptcp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
|
timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
|
||||||
while (tss.len) {
|
while (tss.len) {
|
||||||
ret = __tcp_splice_read(sk, &tss);
|
ret = __tcp_splice_read(sk, &tss);
|
||||||
@ -914,8 +961,11 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
|
#ifndef CONFIG_MPTCP
|
||||||
int large_allowed)
|
static
|
||||||
|
#endif
|
||||||
|
unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
|
||||||
|
int large_allowed)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
u32 new_size_goal, size_goal;
|
u32 new_size_goal, size_goal;
|
||||||
@ -943,8 +993,17 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
|
|||||||
{
|
{
|
||||||
int mss_now;
|
int mss_now;
|
||||||
|
|
||||||
mss_now = tcp_current_mss(sk);
|
#ifdef CONFIG_MPTCP
|
||||||
*size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
|
if (mptcp(tcp_sk(sk))) {
|
||||||
|
mss_now = mptcp_current_mss(sk);
|
||||||
|
*size_goal = mptcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
|
||||||
|
} else {
|
||||||
|
#endif
|
||||||
|
mss_now = tcp_current_mss(sk);
|
||||||
|
*size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return mss_now;
|
return mss_now;
|
||||||
}
|
}
|
||||||
@ -979,12 +1038,39 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
|
|||||||
* is fully established.
|
* is fully established.
|
||||||
*/
|
*/
|
||||||
if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
|
if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
!tcp_passive_fastopen(mptcp(tp) && tp->mpcb->master_sk ?
|
||||||
|
tp->mpcb->master_sk : sk)) {
|
||||||
|
#else
|
||||||
!tcp_passive_fastopen(sk)) {
|
!tcp_passive_fastopen(sk)) {
|
||||||
|
#endif
|
||||||
err = sk_stream_wait_connect(sk, &timeo);
|
err = sk_stream_wait_connect(sk, &timeo);
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tp)) {
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
/* We must check this with socket-lock hold because we iterate
|
||||||
|
* over the subflows.
|
||||||
|
*/
|
||||||
|
if (!mptcp_can_sendpage(sk)) {
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
release_sock(sk);
|
||||||
|
ret = sock_no_sendpage(sk->sk_socket, page, offset,
|
||||||
|
size, flags);
|
||||||
|
lock_sock(sk);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
mptcp_for_each_sub(tp->mpcb, mptcp) {
|
||||||
|
sock_rps_record_flow(mptcp_to_sock(mptcp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
|
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
|
||||||
|
|
||||||
mss_now = tcp_send_mss(sk, &size_goal, flags);
|
mss_now = tcp_send_mss(sk, &size_goal, flags);
|
||||||
@ -1103,7 +1189,12 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
|
|||||||
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
|
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
|
||||||
size_t size, int flags)
|
size_t size, int flags)
|
||||||
{
|
{
|
||||||
|
/* If MPTCP is enabled, we check it later after establishment */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!mptcp(tcp_sk(sk)) && !(sk->sk_route_caps & NETIF_F_SG))
|
||||||
|
#else
|
||||||
if (!(sk->sk_route_caps & NETIF_F_SG))
|
if (!(sk->sk_route_caps & NETIF_F_SG))
|
||||||
|
#endif
|
||||||
return sock_no_sendpage_locked(sk, page, offset, size, flags);
|
return sock_no_sendpage_locked(sk, page, offset, size, flags);
|
||||||
|
|
||||||
tcp_rate_check_app_limited(sk); /* is sending application-limited? */
|
tcp_rate_check_app_limited(sk); /* is sending application-limited? */
|
||||||
@ -1135,14 +1226,21 @@ EXPORT_SYMBOL(tcp_sendpage);
|
|||||||
* This also speeds up tso_fragment(), since it wont fallback
|
* This also speeds up tso_fragment(), since it wont fallback
|
||||||
* to tcp_fragment().
|
* to tcp_fragment().
|
||||||
*/
|
*/
|
||||||
static int linear_payload_sz(bool first_skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
int linear_payload_sz(bool first_skb)
|
||||||
{
|
{
|
||||||
if (first_skb)
|
if (first_skb)
|
||||||
return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
|
return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int select_size(bool first_skb, bool zc)
|
#ifdef CONFIG_MPTCP
|
||||||
|
static int select_size(const struct sock *sk, bool first_skb, bool zc)
|
||||||
|
#else
|
||||||
|
int select_size(bool first_skb, bool zc)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
if (zc)
|
if (zc)
|
||||||
return 0;
|
return 0;
|
||||||
@ -1253,12 +1351,27 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
|
|||||||
* is fully established.
|
* is fully established.
|
||||||
*/
|
*/
|
||||||
if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
|
if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
!tcp_passive_fastopen(mptcp(tp) && tp->mpcb->master_sk ?
|
||||||
|
tp->mpcb->master_sk : sk)) {
|
||||||
|
#else
|
||||||
!tcp_passive_fastopen(sk)) {
|
!tcp_passive_fastopen(sk)) {
|
||||||
|
#endif
|
||||||
err = sk_stream_wait_connect(sk, &timeo);
|
err = sk_stream_wait_connect(sk, &timeo);
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
goto do_error;
|
goto do_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tp)) {
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
mptcp_for_each_sub(tp->mpcb, mptcp) {
|
||||||
|
sock_rps_record_flow(mptcp_to_sock(mptcp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (unlikely(tp->repair)) {
|
if (unlikely(tp->repair)) {
|
||||||
if (tp->repair_queue == TCP_RECV_QUEUE) {
|
if (tp->repair_queue == TCP_RECV_QUEUE) {
|
||||||
copied = tcp_send_rcvq(sk, msg, size);
|
copied = tcp_send_rcvq(sk, msg, size);
|
||||||
@ -1314,7 +1427,11 @@ new_segment:
|
|||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
first_skb = tcp_rtx_and_write_queues_empty(sk);
|
first_skb = tcp_rtx_and_write_queues_empty(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
linear = tp->ops->select_size(sk, first_skb, zc);
|
||||||
|
#else
|
||||||
linear = select_size(first_skb, zc);
|
linear = select_size(first_skb, zc);
|
||||||
|
#endif
|
||||||
skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
|
skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
|
||||||
first_skb);
|
first_skb);
|
||||||
if (!skb)
|
if (!skb)
|
||||||
@ -1552,7 +1669,10 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
|
|||||||
* calculation of whether or not we must ACK for the sake of
|
* calculation of whether or not we must ACK for the sake of
|
||||||
* a window update.
|
* a window update.
|
||||||
*/
|
*/
|
||||||
static void tcp_cleanup_rbuf(struct sock *sk, int copied)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_cleanup_rbuf(struct sock *sk, int copied)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
bool time_to_ack = false;
|
bool time_to_ack = false;
|
||||||
@ -1598,7 +1718,11 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
|
|||||||
|
|
||||||
/* Optimize, __tcp_select_window() is not cheap. */
|
/* Optimize, __tcp_select_window() is not cheap. */
|
||||||
if (2*rcv_window_now <= tp->window_clamp) {
|
if (2*rcv_window_now <= tp->window_clamp) {
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
__u32 new_window = tp->ops->__select_window(sk);
|
||||||
|
#else
|
||||||
__u32 new_window = __tcp_select_window(sk);
|
__u32 new_window = __tcp_select_window(sk);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Send ACK now, if this read freed lots of space
|
/* Send ACK now, if this read freed lots of space
|
||||||
* in our buffer. Certainly, new_window is new window.
|
* in our buffer. Certainly, new_window is new window.
|
||||||
@ -1716,7 +1840,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
|
|||||||
/* Clean up data we have read: This will do ACK frames. */
|
/* Clean up data we have read: This will do ACK frames. */
|
||||||
if (copied > 0) {
|
if (copied > 0) {
|
||||||
tcp_recv_skb(sk, seq, &offset);
|
tcp_recv_skb(sk, seq, &offset);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->cleanup_rbuf(sk, copied);
|
||||||
|
#else
|
||||||
tcp_cleanup_rbuf(sk, copied);
|
tcp_cleanup_rbuf(sk, copied);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
return copied;
|
return copied;
|
||||||
}
|
}
|
||||||
@ -1974,6 +2102,16 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
|
|||||||
|
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tp)) {
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
mptcp_for_each_sub(tp->mpcb, mptcp) {
|
||||||
|
sock_rps_record_flow(mptcp_to_sock(mptcp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
err = -ENOTCONN;
|
err = -ENOTCONN;
|
||||||
if (sk->sk_state == TCP_LISTEN)
|
if (sk->sk_state == TCP_LISTEN)
|
||||||
goto out;
|
goto out;
|
||||||
@ -2092,7 +2230,11 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->cleanup_rbuf(sk, copied);
|
||||||
|
#else
|
||||||
tcp_cleanup_rbuf(sk, copied);
|
tcp_cleanup_rbuf(sk, copied);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (copied >= target) {
|
if (copied >= target) {
|
||||||
/* Do not sleep, just process backlog. */
|
/* Do not sleep, just process backlog. */
|
||||||
@ -2185,7 +2327,11 @@ skip_copy:
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* Clean up data we have read: This will do ACK frames. */
|
/* Clean up data we have read: This will do ACK frames. */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->cleanup_rbuf(sk, copied);
|
||||||
|
#else
|
||||||
tcp_cleanup_rbuf(sk, copied);
|
tcp_cleanup_rbuf(sk, copied);
|
||||||
|
#endif
|
||||||
|
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
|
|
||||||
@ -2297,7 +2443,10 @@ static const unsigned char new_state[16] = {
|
|||||||
[TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */
|
[TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */
|
||||||
};
|
};
|
||||||
|
|
||||||
static int tcp_close_state(struct sock *sk)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
int tcp_close_state(struct sock *sk)
|
||||||
{
|
{
|
||||||
int next = (int)new_state[sk->sk_state];
|
int next = (int)new_state[sk->sk_state];
|
||||||
int ns = next & TCP_STATE_MASK;
|
int ns = next & TCP_STATE_MASK;
|
||||||
@ -2327,7 +2476,11 @@ void tcp_shutdown(struct sock *sk, int how)
|
|||||||
TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
|
TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
|
||||||
/* Clear out any half completed packets. FIN if needed. */
|
/* Clear out any half completed packets. FIN if needed. */
|
||||||
if (tcp_close_state(sk))
|
if (tcp_close_state(sk))
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_sk(sk)->ops->send_fin(sk);
|
||||||
|
#else
|
||||||
tcp_send_fin(sk);
|
tcp_send_fin(sk);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(tcp_shutdown);
|
EXPORT_SYMBOL(tcp_shutdown);
|
||||||
@ -2351,6 +2504,18 @@ void tcp_close(struct sock *sk, long timeout)
|
|||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
int data_was_unread = 0;
|
int data_was_unread = 0;
|
||||||
int state;
|
int state;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (is_meta_sk(sk)) {
|
||||||
|
/* TODO: Currently forcing timeout to 0 because
|
||||||
|
* sk_stream_wait_close will complain during lockdep because
|
||||||
|
* of the mpcb_mutex (circular lock dependency through
|
||||||
|
* inet_csk_listen_stop()).
|
||||||
|
* We should find a way to get rid of the mpcb_mutex.
|
||||||
|
*/
|
||||||
|
mptcp_close(sk, 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
sk->sk_shutdown = SHUTDOWN_MASK;
|
sk->sk_shutdown = SHUTDOWN_MASK;
|
||||||
@ -2396,7 +2561,11 @@ void tcp_close(struct sock *sk, long timeout)
|
|||||||
/* Unread data was tossed, zap the connection. */
|
/* Unread data was tossed, zap the connection. */
|
||||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
|
||||||
tcp_set_state(sk, TCP_CLOSE);
|
tcp_set_state(sk, TCP_CLOSE);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_sk(sk)->ops->send_active_reset(sk, sk->sk_allocation);
|
||||||
|
#else
|
||||||
tcp_send_active_reset(sk, sk->sk_allocation);
|
tcp_send_active_reset(sk, sk->sk_allocation);
|
||||||
|
#endif
|
||||||
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
|
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
|
||||||
/* Check zero linger _after_ checking for unread data. */
|
/* Check zero linger _after_ checking for unread data. */
|
||||||
sk->sk_prot->disconnect(sk, 0);
|
sk->sk_prot->disconnect(sk, 0);
|
||||||
@ -2470,7 +2639,11 @@ adjudge_to_death:
|
|||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
if (tp->linger2 < 0) {
|
if (tp->linger2 < 0) {
|
||||||
tcp_set_state(sk, TCP_CLOSE);
|
tcp_set_state(sk, TCP_CLOSE);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#else
|
||||||
tcp_send_active_reset(sk, GFP_ATOMIC);
|
tcp_send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#endif
|
||||||
__NET_INC_STATS(sock_net(sk),
|
__NET_INC_STATS(sock_net(sk),
|
||||||
LINUX_MIB_TCPABORTONLINGER);
|
LINUX_MIB_TCPABORTONLINGER);
|
||||||
} else {
|
} else {
|
||||||
@ -2480,7 +2653,12 @@ adjudge_to_death:
|
|||||||
inet_csk_reset_keepalive_timer(sk,
|
inet_csk_reset_keepalive_timer(sk,
|
||||||
tmo - TCP_TIMEWAIT_LEN);
|
tmo - TCP_TIMEWAIT_LEN);
|
||||||
} else {
|
} else {
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_sk(sk)->ops->time_wait(sk, TCP_FIN_WAIT2,
|
||||||
|
tmo);
|
||||||
|
#else
|
||||||
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
|
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
|
||||||
|
#endif
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2489,7 +2667,11 @@ adjudge_to_death:
|
|||||||
sk_mem_reclaim(sk);
|
sk_mem_reclaim(sk);
|
||||||
if (tcp_check_oom(sk, 0)) {
|
if (tcp_check_oom(sk, 0)) {
|
||||||
tcp_set_state(sk, TCP_CLOSE);
|
tcp_set_state(sk, TCP_CLOSE);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_sk(sk)->ops->send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#else
|
||||||
tcp_send_active_reset(sk, GFP_ATOMIC);
|
tcp_send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#endif
|
||||||
__NET_INC_STATS(sock_net(sk),
|
__NET_INC_STATS(sock_net(sk),
|
||||||
LINUX_MIB_TCPABORTONMEMORY);
|
LINUX_MIB_TCPABORTONMEMORY);
|
||||||
} else if (!check_net(sock_net(sk))) {
|
} else if (!check_net(sock_net(sk))) {
|
||||||
@ -2518,6 +2700,7 @@ out:
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(tcp_close);
|
EXPORT_SYMBOL(tcp_close);
|
||||||
|
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
/* These states need RST on ABORT according to RFC793 */
|
/* These states need RST on ABORT according to RFC793 */
|
||||||
|
|
||||||
static inline bool tcp_need_reset(int state)
|
static inline bool tcp_need_reset(int state)
|
||||||
@ -2526,7 +2709,7 @@ static inline bool tcp_need_reset(int state)
|
|||||||
(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
|
(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
|
||||||
TCPF_FIN_WAIT2 | TCPF_SYN_RECV | TCPF_SYN_SENT);
|
TCPF_FIN_WAIT2 | TCPF_SYN_RECV | TCPF_SYN_SENT);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
static void tcp_rtx_queue_purge(struct sock *sk)
|
static void tcp_rtx_queue_purge(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
|
struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
|
||||||
@ -2547,7 +2730,11 @@ static void tcp_rtx_queue_purge(struct sock *sk)
|
|||||||
void tcp_write_queue_purge(struct sock *sk)
|
void tcp_write_queue_purge(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tcp_sk(sk)) && !is_meta_sk(sk) &&
|
||||||
|
!tcp_rtx_and_write_queues_empty(sk))
|
||||||
|
mptcp_reinject_data(sk, 0);
|
||||||
|
#endif
|
||||||
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
|
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
|
||||||
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
|
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
|
||||||
tcp_skb_tsorted_anchor_cleanup(skb);
|
tcp_skb_tsorted_anchor_cleanup(skb);
|
||||||
@ -2586,7 +2773,11 @@ int tcp_disconnect(struct sock *sk, int flags)
|
|||||||
/* The last check adjusts for discrepancy of Linux wrt. RFC
|
/* The last check adjusts for discrepancy of Linux wrt. RFC
|
||||||
* states
|
* states
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->send_active_reset(sk, gfp_any());
|
||||||
|
#else
|
||||||
tcp_send_active_reset(sk, gfp_any());
|
tcp_send_active_reset(sk, gfp_any());
|
||||||
|
#endif
|
||||||
sk->sk_err = ECONNRESET;
|
sk->sk_err = ECONNRESET;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2603,6 +2794,15 @@ int tcp_disconnect(struct sock *sk, int flags)
|
|||||||
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
|
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
|
||||||
inet_reset_saddr(sk);
|
inet_reset_saddr(sk);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (is_meta_sk(sk)) {
|
||||||
|
mptcp_disconnect(sk);
|
||||||
|
} else {
|
||||||
|
if (tp->inside_tk_table)
|
||||||
|
mptcp_hash_remove_bh(tp);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
sk->sk_shutdown = 0;
|
sk->sk_shutdown = 0;
|
||||||
sock_reset_flag(sk, SOCK_DONE);
|
sock_reset_flag(sk, SOCK_DONE);
|
||||||
tp->srtt_us = 0;
|
tp->srtt_us = 0;
|
||||||
@ -2669,8 +2869,13 @@ EXPORT_SYMBOL(tcp_disconnect);
|
|||||||
|
|
||||||
static inline bool tcp_can_repair_sock(const struct sock *sk)
|
static inline bool tcp_can_repair_sock(const struct sock *sk)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
|
||||||
|
(sk->sk_state != TCP_LISTEN) && !sock_flag(sk, SOCK_MPTCP);
|
||||||
|
#else
|
||||||
return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
|
return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
|
||||||
(sk->sk_state != TCP_LISTEN);
|
(sk->sk_state != TCP_LISTEN);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
|
static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
|
||||||
@ -2816,6 +3021,61 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
|
|||||||
|
|
||||||
return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
|
return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
case MPTCP_SCHEDULER: {
|
||||||
|
char name[MPTCP_SCHED_NAME_MAX];
|
||||||
|
|
||||||
|
if (optlen < 1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* Cannot be used if MPTCP is not used or we already have
|
||||||
|
* established an MPTCP-connection.
|
||||||
|
*/
|
||||||
|
if (mptcp_init_failed || !sysctl_mptcp_enabled ||
|
||||||
|
sk->sk_state != TCP_CLOSE)
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
val = strncpy_from_user(name, optval,
|
||||||
|
min_t(long, MPTCP_SCHED_NAME_MAX - 1,
|
||||||
|
optlen));
|
||||||
|
|
||||||
|
if (val < 0)
|
||||||
|
return -EFAULT;
|
||||||
|
name[val] = 0;
|
||||||
|
|
||||||
|
lock_sock(sk);
|
||||||
|
err = mptcp_set_scheduler(sk, name);
|
||||||
|
release_sock(sk);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
case MPTCP_PATH_MANAGER: {
|
||||||
|
char name[MPTCP_PM_NAME_MAX];
|
||||||
|
|
||||||
|
if (optlen < 1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* Cannot be used if MPTCP is not used or we already have
|
||||||
|
* established an MPTCP-connection.
|
||||||
|
*/
|
||||||
|
if (mptcp_init_failed || !sysctl_mptcp_enabled ||
|
||||||
|
sk->sk_state != TCP_CLOSE)
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
val = strncpy_from_user(name, optval,
|
||||||
|
min_t(long, MPTCP_PM_NAME_MAX - 1,
|
||||||
|
optlen));
|
||||||
|
|
||||||
|
if (val < 0)
|
||||||
|
return -EFAULT;
|
||||||
|
name[val] = 0;
|
||||||
|
|
||||||
|
lock_sock(sk);
|
||||||
|
err = mptcp_set_path_manager(sk, name);
|
||||||
|
release_sock(sk);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
default:
|
default:
|
||||||
/* fallthru */
|
/* fallthru */
|
||||||
break;
|
break;
|
||||||
@ -3005,6 +3265,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case TCP_DEFER_ACCEPT:
|
case TCP_DEFER_ACCEPT:
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* An established MPTCP-connection (mptcp(tp) only returns true
|
||||||
|
* if the socket is established) should not use DEFER on new
|
||||||
|
* subflows.
|
||||||
|
*/
|
||||||
|
if (mptcp(tp))
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
/* Translate value in seconds to number of retransmits */
|
/* Translate value in seconds to number of retransmits */
|
||||||
icsk->icsk_accept_queue.rskq_defer_accept =
|
icsk->icsk_accept_queue.rskq_defer_accept =
|
||||||
secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
|
secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
|
||||||
@ -3032,7 +3300,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
|
|||||||
(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
|
(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
|
||||||
inet_csk_ack_scheduled(sk)) {
|
inet_csk_ack_scheduled(sk)) {
|
||||||
icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
|
icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->cleanup_rbuf(sk, 1);
|
||||||
|
#else
|
||||||
tcp_cleanup_rbuf(sk, 1);
|
tcp_cleanup_rbuf(sk, 1);
|
||||||
|
#endif
|
||||||
if (!(val & 1))
|
if (!(val & 1))
|
||||||
icsk->icsk_ack.pingpong = 1;
|
icsk->icsk_ack.pingpong = 1;
|
||||||
}
|
}
|
||||||
@ -3099,6 +3371,32 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
|
|||||||
tp->notsent_lowat = val;
|
tp->notsent_lowat = val;
|
||||||
sk->sk_write_space(sk);
|
sk->sk_write_space(sk);
|
||||||
break;
|
break;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
case MPTCP_ENABLED:
|
||||||
|
if (mptcp_init_failed || !sysctl_mptcp_enabled ||
|
||||||
|
sk->sk_state != TCP_CLOSE
|
||||||
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
|
|| tp->md5sig_info
|
||||||
|
#endif
|
||||||
|
) {
|
||||||
|
err = -EPERM;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val)
|
||||||
|
mptcp_enable_sock(sk);
|
||||||
|
else
|
||||||
|
mptcp_disable_sock(sk);
|
||||||
|
break;
|
||||||
|
case MPTCP_INFO:
|
||||||
|
if (mptcp_init_failed || !sysctl_mptcp_enabled) {
|
||||||
|
err = -EPERM;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
tp->record_master_info = !!(val & MPTCP_INFO_FLAG_SAVE_MASTER);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
case TCP_INQ:
|
case TCP_INQ:
|
||||||
if (val > 1 || val < 0)
|
if (val > 1 || val < 0)
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
@ -3158,7 +3456,11 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Return information about state of tcp endpoint in API format. */
|
/* Return information about state of tcp endpoint in API format. */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
void tcp_get_info(struct sock *sk, struct tcp_info *info, bool no_lock)
|
||||||
|
#else
|
||||||
void tcp_get_info(struct sock *sk, struct tcp_info *info)
|
void tcp_get_info(struct sock *sk, struct tcp_info *info)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
|
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
|
||||||
const struct inet_connection_sock *icsk = inet_csk(sk);
|
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
@ -3195,7 +3497,12 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!no_lock)
|
||||||
|
slow = lock_sock_fast(sk);
|
||||||
|
#else
|
||||||
slow = lock_sock_fast(sk);
|
slow = lock_sock_fast(sk);
|
||||||
|
#endif
|
||||||
|
|
||||||
info->tcpi_ca_state = icsk->icsk_ca_state;
|
info->tcpi_ca_state = icsk->icsk_ca_state;
|
||||||
info->tcpi_retransmits = icsk->icsk_retransmits;
|
info->tcpi_retransmits = icsk->icsk_retransmits;
|
||||||
@ -3269,7 +3576,12 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
|
|||||||
info->tcpi_bytes_retrans = tp->bytes_retrans;
|
info->tcpi_bytes_retrans = tp->bytes_retrans;
|
||||||
info->tcpi_dsack_dups = tp->dsack_dups;
|
info->tcpi_dsack_dups = tp->dsack_dups;
|
||||||
info->tcpi_reord_seen = tp->reord_seen;
|
info->tcpi_reord_seen = tp->reord_seen;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!no_lock)
|
||||||
|
unlock_sock_fast(sk, slow);
|
||||||
|
#else
|
||||||
unlock_sock_fast(sk, slow);
|
unlock_sock_fast(sk, slow);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(tcp_get_info);
|
EXPORT_SYMBOL_GPL(tcp_get_info);
|
||||||
|
|
||||||
@ -3414,7 +3726,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
|
|||||||
if (get_user(len, optlen))
|
if (get_user(len, optlen))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_get_info(sk, &info, false);
|
||||||
|
#else
|
||||||
tcp_get_info(sk, &info);
|
tcp_get_info(sk, &info);
|
||||||
|
#endif
|
||||||
|
|
||||||
len = min_t(unsigned int, len, sizeof(info));
|
len = min_t(unsigned int, len, sizeof(info));
|
||||||
if (put_user(len, optlen))
|
if (put_user(len, optlen))
|
||||||
@ -3605,6 +3921,87 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
|
|||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
case MPTCP_SCHEDULER:
|
||||||
|
if (get_user(len, optlen))
|
||||||
|
return -EFAULT;
|
||||||
|
len = min_t(unsigned int, len, MPTCP_SCHED_NAME_MAX);
|
||||||
|
if (put_user(len, optlen))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
lock_sock(sk);
|
||||||
|
if (mptcp(tcp_sk(sk))) {
|
||||||
|
struct mptcp_cb *mpcb = tcp_sk(mptcp_meta_sk(sk))->mpcb;
|
||||||
|
|
||||||
|
if (copy_to_user(optval, mpcb->sched_ops->name, len)) {
|
||||||
|
release_sock(sk);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (copy_to_user(optval, tcp_sk(sk)->mptcp_sched_name,
|
||||||
|
len)) {
|
||||||
|
release_sock(sk);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
release_sock(sk);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
case MPTCP_PATH_MANAGER:
|
||||||
|
if (get_user(len, optlen))
|
||||||
|
return -EFAULT;
|
||||||
|
len = min_t(unsigned int, len, MPTCP_PM_NAME_MAX);
|
||||||
|
if (put_user(len, optlen))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
lock_sock(sk);
|
||||||
|
if (mptcp(tcp_sk(sk))) {
|
||||||
|
struct mptcp_cb *mpcb = tcp_sk(mptcp_meta_sk(sk))->mpcb;
|
||||||
|
|
||||||
|
if (copy_to_user(optval, mpcb->pm_ops->name, len)) {
|
||||||
|
release_sock(sk);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (copy_to_user(optval, tcp_sk(sk)->mptcp_pm_name,
|
||||||
|
len)) {
|
||||||
|
release_sock(sk);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
release_sock(sk);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
case MPTCP_ENABLED:
|
||||||
|
if (sk->sk_state != TCP_SYN_SENT)
|
||||||
|
val = mptcp(tp) ? 1 : 0;
|
||||||
|
else
|
||||||
|
val = sock_flag(sk, SOCK_MPTCP) ? 1 : 0;
|
||||||
|
break;
|
||||||
|
case MPTCP_INFO:
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!mptcp(tp))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (get_user(len, optlen))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
len = min_t(unsigned int, len, sizeof(struct mptcp_info));
|
||||||
|
|
||||||
|
lock_sock(sk);
|
||||||
|
ret = mptcp_get_info(sk, optval, len);
|
||||||
|
release_sock(sk);
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (put_user(len, optlen))
|
||||||
|
return -EFAULT;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#ifdef CONFIG_MMU
|
#ifdef CONFIG_MMU
|
||||||
case TCP_ZEROCOPY_RECEIVE: {
|
case TCP_ZEROCOPY_RECEIVE: {
|
||||||
struct tcp_zerocopy_receive zc;
|
struct tcp_zerocopy_receive zc;
|
||||||
@ -3807,6 +4204,9 @@ void tcp_done(struct sock *sk)
|
|||||||
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
|
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
|
||||||
TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
|
TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
//WARN_ON(sk->sk_state == TCP_CLOSE);
|
||||||
|
#endif
|
||||||
tcp_set_state(sk, TCP_CLOSE);
|
tcp_set_state(sk, TCP_CLOSE);
|
||||||
tcp_clear_xmit_timers(sk);
|
tcp_clear_xmit_timers(sk);
|
||||||
if (req)
|
if (req)
|
||||||
@ -3823,6 +4223,9 @@ EXPORT_SYMBOL_GPL(tcp_done);
|
|||||||
|
|
||||||
int tcp_abort(struct sock *sk, int err)
|
int tcp_abort(struct sock *sk, int err)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct sock *meta_sk = mptcp(tcp_sk(sk)) ? mptcp_meta_sk(sk) : sk;
|
||||||
|
#endif
|
||||||
if (!sk_fullsock(sk)) {
|
if (!sk_fullsock(sk)) {
|
||||||
if (sk->sk_state == TCP_NEW_SYN_RECV) {
|
if (sk->sk_state == TCP_NEW_SYN_RECV) {
|
||||||
struct request_sock *req = inet_reqsk(sk);
|
struct request_sock *req = inet_reqsk(sk);
|
||||||
@ -3836,7 +4239,11 @@ int tcp_abort(struct sock *sk, int err)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Don't race with userspace socket closes such as tcp_close. */
|
/* Don't race with userspace socket closes such as tcp_close. */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
lock_sock(meta_sk);
|
||||||
|
#else
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (sk->sk_state == TCP_LISTEN) {
|
if (sk->sk_state == TCP_LISTEN) {
|
||||||
tcp_set_state(sk, TCP_CLOSE);
|
tcp_set_state(sk, TCP_CLOSE);
|
||||||
@ -3845,22 +4252,39 @@ int tcp_abort(struct sock *sk, int err)
|
|||||||
|
|
||||||
/* Don't race with BH socket closes such as inet_csk_listen_stop. */
|
/* Don't race with BH socket closes such as inet_csk_listen_stop. */
|
||||||
local_bh_disable();
|
local_bh_disable();
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
bh_lock_sock(meta_sk);
|
||||||
|
#else
|
||||||
bh_lock_sock(sk);
|
bh_lock_sock(sk);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!sock_flag(sk, SOCK_DEAD)) {
|
if (!sock_flag(sk, SOCK_DEAD)) {
|
||||||
sk->sk_err = err;
|
sk->sk_err = err;
|
||||||
/* This barrier is coupled with smp_rmb() in tcp_poll() */
|
/* This barrier is coupled with smp_rmb() in tcp_poll() */
|
||||||
smp_wmb();
|
smp_wmb();
|
||||||
sk->sk_error_report(sk);
|
sk->sk_error_report(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (tcp_need_reset(sk->sk_state))
|
||||||
|
tcp_sk(sk)->ops->send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#else
|
||||||
if (tcp_need_reset(sk->sk_state))
|
if (tcp_need_reset(sk->sk_state))
|
||||||
tcp_send_active_reset(sk, GFP_ATOMIC);
|
tcp_send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#endif
|
||||||
tcp_done(sk);
|
tcp_done(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
bh_unlock_sock(meta_sk);
|
||||||
|
#else
|
||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
#endif
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
tcp_write_queue_purge(sk);
|
tcp_write_queue_purge(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
release_sock(meta_sk);
|
||||||
|
#else
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(tcp_abort);
|
EXPORT_SYMBOL_GPL(tcp_abort);
|
||||||
|
@ -34,8 +34,15 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
|
|||||||
READ_ONCE(tp->copied_seq), 0);
|
READ_ONCE(tp->copied_seq), 0);
|
||||||
r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una;
|
r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (info)
|
||||||
|
tcp_get_info(sk, info, false);
|
||||||
|
#else
|
||||||
if (info)
|
if (info)
|
||||||
tcp_get_info(sk, info);
|
tcp_get_info(sk, info);
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
|
@ -9,6 +9,9 @@
|
|||||||
#include <linux/rculist.h>
|
#include <linux/rculist.h>
|
||||||
#include <net/inetpeer.h>
|
#include <net/inetpeer.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
void tcp_fastopen_init_key_once(struct net *net)
|
void tcp_fastopen_init_key_once(struct net *net)
|
||||||
{
|
{
|
||||||
@ -219,6 +222,10 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
|
|||||||
struct tcp_sock *tp;
|
struct tcp_sock *tp;
|
||||||
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
|
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
|
||||||
struct sock *child;
|
struct sock *child;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct sock *meta_sk;
|
||||||
|
int ret;
|
||||||
|
#endif
|
||||||
bool own_req;
|
bool own_req;
|
||||||
|
|
||||||
req->num_retrans = 0;
|
req->num_retrans = 0;
|
||||||
@ -258,8 +265,10 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
|
|||||||
|
|
||||||
refcount_set(&req->rsk_refcnt, 2);
|
refcount_set(&req->rsk_refcnt, 2);
|
||||||
|
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
/* Now finish processing the fastopen child socket. */
|
/* Now finish processing the fastopen child socket. */
|
||||||
tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
|
tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
|
||||||
|
#endif
|
||||||
|
|
||||||
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
|
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
|
||||||
|
|
||||||
@ -267,6 +276,20 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
|
|||||||
|
|
||||||
tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
|
tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
|
||||||
tp->rcv_wup = tp->rcv_nxt;
|
tp->rcv_wup = tp->rcv_nxt;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
meta_sk = child;
|
||||||
|
ret = mptcp_check_req_fastopen(meta_sk, req);
|
||||||
|
if (ret < 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (ret == 0) {
|
||||||
|
child = tcp_sk(meta_sk)->mpcb->master_sk;
|
||||||
|
tp = tcp_sk(child);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now finish processing the fastopen child socket. */
|
||||||
|
tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
|
||||||
|
#endif
|
||||||
/* tcp_conn_request() is sending the SYNACK,
|
/* tcp_conn_request() is sending the SYNACK,
|
||||||
* and queues the child into listener accept queue.
|
* and queues the child into listener accept queue.
|
||||||
*/
|
*/
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -67,6 +67,10 @@
|
|||||||
#include <net/icmp.h>
|
#include <net/icmp.h>
|
||||||
#include <net/inet_hashtables.h>
|
#include <net/inet_hashtables.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/mptcp_v4.h>
|
||||||
|
#endif
|
||||||
#include <net/transp_v6.h>
|
#include <net/transp_v6.h>
|
||||||
#include <net/ipv6.h>
|
#include <net/ipv6.h>
|
||||||
#include <net/inet_common.h>
|
#include <net/inet_common.h>
|
||||||
@ -436,6 +440,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|||||||
const int type = icmp_hdr(icmp_skb)->type;
|
const int type = icmp_hdr(icmp_skb)->type;
|
||||||
const int code = icmp_hdr(icmp_skb)->code;
|
const int code = icmp_hdr(icmp_skb)->code;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct sock *meta_sk;
|
||||||
|
#endif
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
struct request_sock *fastopen;
|
struct request_sock *fastopen;
|
||||||
u32 seq, snd_una;
|
u32 seq, snd_una;
|
||||||
@ -464,13 +471,27 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|||||||
(code == ICMP_NET_UNREACH ||
|
(code == ICMP_NET_UNREACH ||
|
||||||
code == ICMP_HOST_UNREACH)));
|
code == ICMP_HOST_UNREACH)));
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp = tcp_sk(sk);
|
||||||
|
if (mptcp(tp))
|
||||||
|
meta_sk = mptcp_meta_sk(sk);
|
||||||
|
else
|
||||||
|
meta_sk = sk;
|
||||||
|
|
||||||
|
bh_lock_sock(meta_sk);
|
||||||
|
#else
|
||||||
bh_lock_sock(sk);
|
bh_lock_sock(sk);
|
||||||
|
#endif
|
||||||
/* If too many ICMPs get dropped on busy
|
/* If too many ICMPs get dropped on busy
|
||||||
* servers this needs to be solved differently.
|
* servers this needs to be solved differently.
|
||||||
* We do take care of PMTU discovery (RFC1191) special case :
|
* We do take care of PMTU discovery (RFC1191) special case :
|
||||||
* we can receive locally generated ICMP messages while socket is held.
|
* we can receive locally generated ICMP messages while socket is held.
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sock_owned_by_user(meta_sk)) {
|
||||||
|
#else
|
||||||
if (sock_owned_by_user(sk)) {
|
if (sock_owned_by_user(sk)) {
|
||||||
|
#endif
|
||||||
if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
|
if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
|
||||||
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
|
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
|
||||||
}
|
}
|
||||||
@ -483,7 +504,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|||||||
}
|
}
|
||||||
|
|
||||||
icsk = inet_csk(sk);
|
icsk = inet_csk(sk);
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
tp = tcp_sk(sk);
|
tp = tcp_sk(sk);
|
||||||
|
#endif
|
||||||
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
|
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
|
||||||
fastopen = tp->fastopen_rsk;
|
fastopen = tp->fastopen_rsk;
|
||||||
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
|
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
|
||||||
@ -517,11 +540,19 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
WRITE_ONCE(tp->mtu_info, info);
|
WRITE_ONCE(tp->mtu_info, info);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!sock_owned_by_user(meta_sk)) {
|
||||||
|
#else
|
||||||
if (!sock_owned_by_user(sk)) {
|
if (!sock_owned_by_user(sk)) {
|
||||||
|
#endif
|
||||||
tcp_v4_mtu_reduced(sk);
|
tcp_v4_mtu_reduced(sk);
|
||||||
} else {
|
} else {
|
||||||
if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
|
if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tp))
|
||||||
|
mptcp_tsq_flags(sk);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -535,7 +566,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|||||||
!icsk->icsk_backoff || fastopen)
|
!icsk->icsk_backoff || fastopen)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sock_owned_by_user(meta_sk))
|
||||||
|
#else
|
||||||
if (sock_owned_by_user(sk))
|
if (sock_owned_by_user(sk))
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
|
|
||||||
skb = tcp_rtx_queue_head(sk);
|
skb = tcp_rtx_queue_head(sk);
|
||||||
@ -558,7 +593,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|||||||
} else {
|
} else {
|
||||||
/* RTO revert clocked out retransmission.
|
/* RTO revert clocked out retransmission.
|
||||||
* Will retransmit now */
|
* Will retransmit now */
|
||||||
tcp_retransmit_timer(sk);
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_sk(sk)->ops->retransmit_timer(sk);
|
||||||
|
#else
|
||||||
|
tcp_retransmit_timer(sk);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@ -578,7 +617,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|||||||
if (fastopen && !fastopen->sk)
|
if (fastopen && !fastopen->sk)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!sock_owned_by_user(meta_sk)) {
|
||||||
|
#else
|
||||||
if (!sock_owned_by_user(sk)) {
|
if (!sock_owned_by_user(sk)) {
|
||||||
|
#endif
|
||||||
sk->sk_err = err;
|
sk->sk_err = err;
|
||||||
|
|
||||||
sk->sk_error_report(sk);
|
sk->sk_error_report(sk);
|
||||||
@ -607,7 +650,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
inet = inet_sk(sk);
|
inet = inet_sk(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!sock_owned_by_user(meta_sk) && inet->recverr) {
|
||||||
|
#else
|
||||||
if (!sock_owned_by_user(sk) && inet->recverr) {
|
if (!sock_owned_by_user(sk) && inet->recverr) {
|
||||||
|
#endif
|
||||||
sk->sk_err = err;
|
sk->sk_err = err;
|
||||||
sk->sk_error_report(sk);
|
sk->sk_error_report(sk);
|
||||||
} else { /* Only an error on timeout */
|
} else { /* Only an error on timeout */
|
||||||
@ -615,7 +662,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
bh_unlock_sock(meta_sk);
|
||||||
|
#else
|
||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
#endif
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -650,7 +701,10 @@ EXPORT_SYMBOL(tcp_v4_send_check);
|
|||||||
* Exception: precedence violation. We do not implement it in any case.
|
* Exception: precedence violation. We do not implement it in any case.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
const struct tcphdr *th = tcp_hdr(skb);
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
struct {
|
struct {
|
||||||
@ -794,12 +848,19 @@ out:
|
|||||||
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
|
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
|
||||||
outside socket context is ugly, certainly. What can I do?
|
outside socket context is ugly, certainly. What can I do?
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
static void tcp_v4_send_ack(const struct sock *sk,
|
||||||
|
struct sk_buff *skb, u32 seq, u32 ack, u32 data_ack,
|
||||||
|
u32 win, u32 tsval, u32 tsecr, int oif,
|
||||||
|
struct tcp_md5sig_key *key,
|
||||||
|
int reply_flags, u8 tos, int mptcp)
|
||||||
|
#else
|
||||||
static void tcp_v4_send_ack(const struct sock *sk,
|
static void tcp_v4_send_ack(const struct sock *sk,
|
||||||
struct sk_buff *skb, u32 seq, u32 ack,
|
struct sk_buff *skb, u32 seq, u32 ack,
|
||||||
u32 win, u32 tsval, u32 tsecr, int oif,
|
u32 win, u32 tsval, u32 tsecr, int oif,
|
||||||
struct tcp_md5sig_key *key,
|
struct tcp_md5sig_key *key,
|
||||||
int reply_flags, u8 tos)
|
int reply_flags, u8 tos)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
const struct tcphdr *th = tcp_hdr(skb);
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
struct {
|
struct {
|
||||||
@ -807,6 +868,10 @@ static void tcp_v4_send_ack(const struct sock *sk,
|
|||||||
__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
|
__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
+ (TCPOLEN_MD5SIG_ALIGNED >> 2)
|
+ (TCPOLEN_MD5SIG_ALIGNED >> 2)
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
+ ((MPTCP_SUB_LEN_DSS >> 2) +
|
||||||
|
(MPTCP_SUB_LEN_ACK >> 2))
|
||||||
#endif
|
#endif
|
||||||
];
|
];
|
||||||
} rep;
|
} rep;
|
||||||
@ -853,6 +918,21 @@ static void tcp_v4_send_ack(const struct sock *sk,
|
|||||||
ip_hdr(skb)->daddr, &rep.th);
|
ip_hdr(skb)->daddr, &rep.th);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp) {
|
||||||
|
int offset = (tsecr) ? 3 : 0;
|
||||||
|
/* Construction of 32-bit data_ack */
|
||||||
|
rep.opt[offset++] = htonl((TCPOPT_MPTCP << 24) |
|
||||||
|
((MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK) << 16) |
|
||||||
|
(0x20 << 8) |
|
||||||
|
(0x01));
|
||||||
|
rep.opt[offset] = htonl(data_ack);
|
||||||
|
|
||||||
|
arg.iov[0].iov_len += MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK;
|
||||||
|
rep.th.doff = arg.iov[0].iov_len / 4;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_MPTCP */
|
||||||
|
|
||||||
arg.flags = reply_flags;
|
arg.flags = reply_flags;
|
||||||
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
|
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
|
||||||
ip_hdr(skb)->saddr, /* XXX */
|
ip_hdr(skb)->saddr, /* XXX */
|
||||||
@ -881,9 +961,20 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
|||||||
{
|
{
|
||||||
struct inet_timewait_sock *tw = inet_twsk(sk);
|
struct inet_timewait_sock *tw = inet_twsk(sk);
|
||||||
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
|
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
u32 data_ack = 0;
|
||||||
|
int mptcp = 0;
|
||||||
|
|
||||||
|
if (tcptw->mptcp_tw) {
|
||||||
|
data_ack = (u32)tcptw->mptcp_tw->rcv_nxt;
|
||||||
|
mptcp = 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
tcp_v4_send_ack(sk, skb,
|
tcp_v4_send_ack(sk, skb,
|
||||||
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
|
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
data_ack,
|
||||||
|
#endif
|
||||||
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
|
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
|
||||||
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
|
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
|
||||||
tcptw->tw_ts_recent,
|
tcptw->tw_ts_recent,
|
||||||
@ -891,19 +982,31 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
|||||||
tcp_twsk_md5_key(tcptw),
|
tcp_twsk_md5_key(tcptw),
|
||||||
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
|
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
|
||||||
tw->tw_tos
|
tw->tw_tos
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
, mptcp
|
||||||
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
inet_twsk_put(tw);
|
inet_twsk_put(tw);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
#ifndef CONFIG_MPTCP
|
||||||
struct request_sock *req)
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
||||||
|
struct request_sock *req)
|
||||||
{
|
{
|
||||||
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
|
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
|
||||||
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
|
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
u32 seq = (sk->sk_state == TCP_LISTEN || is_meta_sk(sk)) ?
|
||||||
|
tcp_rsk(req)->snt_isn + 1 :
|
||||||
|
tcp_sk(sk)->snd_nxt;
|
||||||
|
#else
|
||||||
u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
|
u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
|
||||||
tcp_sk(sk)->snd_nxt;
|
tcp_sk(sk)->snd_nxt;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* RFC 7323 2.3
|
/* RFC 7323 2.3
|
||||||
* The window field (SEG.WND) of every outgoing segment, with the
|
* The window field (SEG.WND) of every outgoing segment, with the
|
||||||
@ -912,6 +1015,9 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
|||||||
*/
|
*/
|
||||||
tcp_v4_send_ack(sk, skb, seq,
|
tcp_v4_send_ack(sk, skb, seq,
|
||||||
tcp_rsk(req)->rcv_nxt,
|
tcp_rsk(req)->rcv_nxt,
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
0,
|
||||||
|
#endif
|
||||||
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
|
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
|
||||||
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
|
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
|
||||||
req->ts_recent,
|
req->ts_recent,
|
||||||
@ -919,7 +1025,11 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
|||||||
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
|
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
|
||||||
AF_INET),
|
AF_INET),
|
||||||
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
|
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
|
||||||
ip_hdr(skb)->tos);
|
ip_hdr(skb)->tos
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
, 0
|
||||||
|
#endif
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -927,11 +1037,14 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
|||||||
* This still operates on a request_sock only, not on a big
|
* This still operates on a request_sock only, not on a big
|
||||||
* socket.
|
* socket.
|
||||||
*/
|
*/
|
||||||
static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
|
#ifndef CONFIG_MPTCP
|
||||||
struct flowi *fl,
|
static
|
||||||
struct request_sock *req,
|
#endif
|
||||||
struct tcp_fastopen_cookie *foc,
|
int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
|
||||||
enum tcp_synack_type synack_type)
|
struct flowi *fl,
|
||||||
|
struct request_sock *req,
|
||||||
|
struct tcp_fastopen_cookie *foc,
|
||||||
|
enum tcp_synack_type synack_type)
|
||||||
{
|
{
|
||||||
const struct inet_request_sock *ireq = inet_rsk(req);
|
const struct inet_request_sock *ireq = inet_rsk(req);
|
||||||
struct flowi4 fl4;
|
struct flowi4 fl4;
|
||||||
@ -961,7 +1074,10 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
|
|||||||
/*
|
/*
|
||||||
* IPv4 request_sock destructor.
|
* IPv4 request_sock destructor.
|
||||||
*/
|
*/
|
||||||
static void tcp_v4_reqsk_destructor(struct request_sock *req)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_v4_reqsk_destructor(struct request_sock *req)
|
||||||
{
|
{
|
||||||
kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
|
kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
|
||||||
}
|
}
|
||||||
@ -1343,9 +1459,14 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
static int tcp_v4_init_req(struct request_sock *req, const struct sock *sk_listener,
|
||||||
|
struct sk_buff *skb, bool want_cookie)
|
||||||
|
#else
|
||||||
static void tcp_v4_init_req(struct request_sock *req,
|
static void tcp_v4_init_req(struct request_sock *req,
|
||||||
const struct sock *sk_listener,
|
const struct sock *sk_listener,
|
||||||
struct sk_buff *skb)
|
struct sk_buff *skb)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
struct inet_request_sock *ireq = inet_rsk(req);
|
struct inet_request_sock *ireq = inet_rsk(req);
|
||||||
struct net *net = sock_net(sk_listener);
|
struct net *net = sock_net(sk_listener);
|
||||||
@ -1353,6 +1474,9 @@ static void tcp_v4_init_req(struct request_sock *req,
|
|||||||
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
|
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
|
||||||
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
|
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
|
||||||
RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
|
RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
|
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
|
||||||
@ -1372,6 +1496,9 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
|
|||||||
.syn_ack_timeout = tcp_syn_ack_timeout,
|
.syn_ack_timeout = tcp_syn_ack_timeout,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
|
const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
|
||||||
.mss_clamp = TCP_MSS_DEFAULT,
|
.mss_clamp = TCP_MSS_DEFAULT,
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
@ -1520,7 +1647,10 @@ put_and_exit:
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
|
EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
|
||||||
|
|
||||||
static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_SYN_COOKIES
|
#ifdef CONFIG_SYN_COOKIES
|
||||||
const struct tcphdr *th = tcp_hdr(skb);
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
@ -1542,6 +1672,10 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
|
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct sock *rsk;
|
struct sock *rsk;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (is_meta_sk(sk))
|
||||||
|
return mptcp_v4_do_rcv(sk, skb);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
|
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
|
||||||
struct dst_entry *dst;
|
struct dst_entry *dst;
|
||||||
@ -1697,6 +1831,10 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
|
|||||||
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
|
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
|
||||||
skb->len - th->doff * 4);
|
skb->len - th->doff * 4);
|
||||||
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
|
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
TCP_SKB_CB(skb)->mptcp_flags = 0;
|
||||||
|
TCP_SKB_CB(skb)->dss_off = 0;
|
||||||
|
#endif
|
||||||
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
|
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
|
||||||
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
|
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
|
||||||
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
|
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
|
||||||
@ -1717,6 +1855,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
|||||||
const struct tcphdr *th;
|
const struct tcphdr *th;
|
||||||
bool refcounted;
|
bool refcounted;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct sock *meta_sk = NULL;
|
||||||
|
#endif
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (skb->pkt_type != PACKET_HOST)
|
if (skb->pkt_type != PACKET_HOST)
|
||||||
@ -1770,15 +1911,26 @@ process:
|
|||||||
reqsk_put(req);
|
reqsk_put(req);
|
||||||
goto csum_error;
|
goto csum_error;
|
||||||
}
|
}
|
||||||
if (unlikely(sk->sk_state != TCP_LISTEN)) {
|
if (unlikely(sk->sk_state != TCP_LISTEN
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
&& !is_meta_sk(sk)
|
||||||
|
#endif
|
||||||
|
)) {
|
||||||
inet_csk_reqsk_queue_drop_and_put(sk, req);
|
inet_csk_reqsk_queue_drop_and_put(sk, req);
|
||||||
goto lookup;
|
goto lookup;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (unlikely(is_meta_sk(sk) && !mptcp_can_new_subflow(sk))) {
|
||||||
|
inet_csk_reqsk_queue_drop_and_put(sk, req);
|
||||||
|
goto lookup;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
/* We own a reference on the listener, increase it again
|
/* We own a reference on the listener, increase it again
|
||||||
* as we might lose it too soon.
|
* as we might lose it too soon.
|
||||||
*/
|
*/
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
refcounted = true;
|
refcounted = true;
|
||||||
|
|
||||||
nsk = NULL;
|
nsk = NULL;
|
||||||
if (!tcp_filter(sk, skb)) {
|
if (!tcp_filter(sk, skb)) {
|
||||||
th = (const struct tcphdr *)skb->data;
|
th = (const struct tcphdr *)skb->data;
|
||||||
@ -1839,15 +1991,38 @@ process:
|
|||||||
|
|
||||||
sk_incoming_cpu_update(sk);
|
sk_incoming_cpu_update(sk);
|
||||||
|
|
||||||
bh_lock_sock_nested(sk);
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tcp_sk(sk))) {
|
||||||
|
meta_sk = mptcp_meta_sk(sk);
|
||||||
|
|
||||||
|
bh_lock_sock_nested(meta_sk);
|
||||||
|
if (sock_owned_by_user(meta_sk))
|
||||||
|
mptcp_prepare_for_backlog(sk, skb);
|
||||||
|
} else {
|
||||||
|
meta_sk = sk;
|
||||||
|
#endif
|
||||||
|
bh_lock_sock_nested(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
}
|
||||||
|
#endif
|
||||||
tcp_segs_in(tcp_sk(sk), skb);
|
tcp_segs_in(tcp_sk(sk), skb);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!sock_owned_by_user(meta_sk)) {
|
||||||
|
ret = tcp_v4_do_rcv(sk, skb);
|
||||||
|
} else if (tcp_add_backlog(meta_sk, skb)) {
|
||||||
|
#else
|
||||||
if (!sock_owned_by_user(sk)) {
|
if (!sock_owned_by_user(sk)) {
|
||||||
ret = tcp_v4_do_rcv(sk, skb);
|
ret = tcp_v4_do_rcv(sk, skb);
|
||||||
} else if (tcp_add_backlog(sk, skb)) {
|
} else if (tcp_add_backlog(sk, skb)) {
|
||||||
|
#endif
|
||||||
goto discard_and_relse;
|
goto discard_and_relse;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
bh_unlock_sock(meta_sk);
|
||||||
|
#else
|
||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
#endif
|
||||||
|
|
||||||
put_and_return:
|
put_and_return:
|
||||||
if (refcounted)
|
if (refcounted)
|
||||||
@ -1861,6 +2036,19 @@ no_tcp_socket:
|
|||||||
|
|
||||||
tcp_v4_fill_cb(skb, iph, th);
|
tcp_v4_fill_cb(skb, iph, th);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!sk && th->syn && !th->ack) {
|
||||||
|
int ret = mptcp_lookup_join(skb, NULL);
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
tcp_v4_send_reset(NULL, skb);
|
||||||
|
goto discard_it;
|
||||||
|
} else if (ret > 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (tcp_checksum_complete(skb)) {
|
if (tcp_checksum_complete(skb)) {
|
||||||
csum_error:
|
csum_error:
|
||||||
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
|
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
|
||||||
@ -1909,6 +2097,18 @@ do_time_wait:
|
|||||||
refcounted = false;
|
refcounted = false;
|
||||||
goto process;
|
goto process;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (th->syn && !th->ack) {
|
||||||
|
int ret = mptcp_lookup_join(skb, inet_twsk(sk));
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
tcp_v4_send_reset(NULL, skb);
|
||||||
|
goto discard_it;
|
||||||
|
} else if (ret > 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
/* to ACK */
|
/* to ACK */
|
||||||
/* fall through */
|
/* fall through */
|
||||||
@ -1978,7 +2178,12 @@ static int tcp_v4_init_sock(struct sock *sk)
|
|||||||
|
|
||||||
tcp_init_sock(sk);
|
tcp_init_sock(sk);
|
||||||
|
|
||||||
icsk->icsk_af_ops = &ipv4_specific;
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sock_flag(sk, SOCK_MPTCP))
|
||||||
|
icsk->icsk_af_ops = &mptcp_v4_specific;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
icsk->icsk_af_ops = &ipv4_specific;
|
||||||
|
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
|
tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
|
||||||
@ -1996,7 +2201,12 @@ void tcp_v4_destroy_sock(struct sock *sk)
|
|||||||
tcp_clear_xmit_timers(sk);
|
tcp_clear_xmit_timers(sk);
|
||||||
|
|
||||||
tcp_cleanup_congestion_control(sk);
|
tcp_cleanup_congestion_control(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tp))
|
||||||
|
mptcp_destroy_sock(sk);
|
||||||
|
if (tp->inside_tk_table)
|
||||||
|
mptcp_hash_remove_bh(tp);
|
||||||
|
#endif
|
||||||
tcp_cleanup_ulp(sk);
|
tcp_cleanup_ulp(sk);
|
||||||
|
|
||||||
/* Cleanup up the write buffer. */
|
/* Cleanup up the write buffer. */
|
||||||
@ -2506,6 +2716,11 @@ struct proto tcp_prot = {
|
|||||||
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
|
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
|
||||||
.max_header = MAX_TCP_HEADER,
|
.max_header = MAX_TCP_HEADER,
|
||||||
.obj_size = sizeof(struct tcp_sock),
|
.obj_size = sizeof(struct tcp_sock),
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
.useroffset = offsetof(struct tcp_sock, mptcp_sched_name),
|
||||||
|
.usersize = sizeof_field(struct tcp_sock, mptcp_sched_name) +
|
||||||
|
sizeof_field(struct tcp_sock, mptcp_pm_name),
|
||||||
|
#endif
|
||||||
.slab_flags = SLAB_TYPESAFE_BY_RCU,
|
.slab_flags = SLAB_TYPESAFE_BY_RCU,
|
||||||
.twsk_prot = &tcp_timewait_sock_ops,
|
.twsk_prot = &tcp_timewait_sock_ops,
|
||||||
.rsk_prot = &tcp_request_sock_ops,
|
.rsk_prot = &tcp_request_sock_ops,
|
||||||
@ -2516,6 +2731,9 @@ struct proto tcp_prot = {
|
|||||||
.compat_getsockopt = compat_tcp_getsockopt,
|
.compat_getsockopt = compat_tcp_getsockopt,
|
||||||
#endif
|
#endif
|
||||||
.diag_destroy = tcp_abort,
|
.diag_destroy = tcp_abort,
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
.clear_sk = mptcp_clear_sk,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
EXPORT_SYMBOL(tcp_prot);
|
EXPORT_SYMBOL(tcp_prot);
|
||||||
|
|
||||||
|
@ -18,11 +18,17 @@
|
|||||||
* Jorge Cwik, <jorge@laser.satlink.net>
|
* Jorge Cwik, <jorge@laser.satlink.net>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <linux/kconfig.h>
|
||||||
|
#endif
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/sysctl.h>
|
#include <linux/sysctl.h>
|
||||||
#include <linux/workqueue.h>
|
#include <linux/workqueue.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#endif
|
||||||
#include <linux/static_key.h>
|
#include <linux/static_key.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
#include <net/inet_common.h>
|
#include <net/inet_common.h>
|
||||||
@ -94,10 +100,25 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
|
|||||||
struct tcp_options_received tmp_opt;
|
struct tcp_options_received tmp_opt;
|
||||||
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
|
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
|
||||||
bool paws_reject = false;
|
bool paws_reject = false;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct mptcp_options_received mopt;
|
||||||
|
#endif
|
||||||
|
|
||||||
tmp_opt.saw_tstamp = 0;
|
tmp_opt.saw_tstamp = 0;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (th->doff > (sizeof(*th) >> 2) &&
|
||||||
|
(tcptw->tw_ts_recent_stamp || tcptw->mptcp_tw)) {
|
||||||
|
#else
|
||||||
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
|
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
mptcp_init_mp_opt(&mopt);
|
||||||
|
|
||||||
|
tcp_parse_options(twsk_net(tw), skb, &tmp_opt, &mopt, 0, NULL, NULL);
|
||||||
|
|
||||||
|
#else
|
||||||
tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
|
tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (tmp_opt.saw_tstamp) {
|
if (tmp_opt.saw_tstamp) {
|
||||||
if (tmp_opt.rcv_tsecr)
|
if (tmp_opt.rcv_tsecr)
|
||||||
@ -106,6 +127,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
|
|||||||
tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
|
tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
|
||||||
paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
|
paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (unlikely(mopt.mp_fclose) && tcptw->mptcp_tw) {
|
||||||
|
if (mopt.mptcp_sender_key == tcptw->mptcp_tw->loc_key)
|
||||||
|
return TCP_TW_RST;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tw->tw_substate == TCP_FIN_WAIT2) {
|
if (tw->tw_substate == TCP_FIN_WAIT2) {
|
||||||
@ -129,6 +156,17 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
|
|||||||
if (!th->ack ||
|
if (!th->ack ||
|
||||||
!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
|
!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
|
||||||
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
|
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* If mptcp_is_data_fin() returns true, we are sure that
|
||||||
|
* mopt has been initialized - otherwise it would not
|
||||||
|
* be a DATA_FIN.
|
||||||
|
*/
|
||||||
|
if (tcptw->mptcp_tw && tcptw->mptcp_tw->meta_tw &&
|
||||||
|
mptcp_is_data_fin(skb) &&
|
||||||
|
TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
|
||||||
|
mopt.data_seq + 1 == (u32)tcptw->mptcp_tw->rcv_nxt)
|
||||||
|
return TCP_TW_ACK;
|
||||||
|
#endif
|
||||||
inet_twsk_put(tw);
|
inet_twsk_put(tw);
|
||||||
return TCP_TW_SUCCESS;
|
return TCP_TW_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -274,6 +312,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
|||||||
tcptw->tw_ts_offset = tp->tsoffset;
|
tcptw->tw_ts_offset = tp->tsoffset;
|
||||||
tcptw->tw_last_oow_ack_time = 0;
|
tcptw->tw_last_oow_ack_time = 0;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tp)) {
|
||||||
|
if (mptcp_init_tw_sock(sk, tcptw)) {
|
||||||
|
inet_twsk_free(tw);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tcptw->mptcp_tw = NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if IS_ENABLED(CONFIG_IPV6)
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
if (tw->tw_family == PF_INET6) {
|
if (tw->tw_family == PF_INET6) {
|
||||||
struct ipv6_pinfo *np = inet6_sk(sk);
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
||||||
@ -330,6 +378,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
|||||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
exit:
|
||||||
|
#endif
|
||||||
tcp_update_metrics(sk);
|
tcp_update_metrics(sk);
|
||||||
tcp_done(sk);
|
tcp_done(sk);
|
||||||
}
|
}
|
||||||
@ -337,9 +388,16 @@ EXPORT_SYMBOL(tcp_time_wait);
|
|||||||
|
|
||||||
void tcp_twsk_destructor(struct sock *sk)
|
void tcp_twsk_destructor(struct sock *sk)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_MPTCP
|
||||||
struct tcp_timewait_sock *twsk = tcp_twsk(sk);
|
struct tcp_timewait_sock *twsk = tcp_twsk(sk);
|
||||||
|
|
||||||
|
if (twsk->mptcp_tw)
|
||||||
|
mptcp_twsk_destructor(twsk);
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
|
struct tcp_timewait_sock *twsk = tcp_twsk(sk);
|
||||||
|
#endif
|
||||||
if (twsk->tw_md5_key)
|
if (twsk->tw_md5_key)
|
||||||
kfree_rcu(twsk->tw_md5_key, rcu);
|
kfree_rcu(twsk->tw_md5_key, rcu);
|
||||||
#endif
|
#endif
|
||||||
@ -378,8 +436,14 @@ void tcp_openreq_init_rwin(struct request_sock *req,
|
|||||||
full_space = rcv_wnd * mss;
|
full_space = rcv_wnd * mss;
|
||||||
|
|
||||||
/* tcp_full_space because it is guaranteed to be the first packet */
|
/* tcp_full_space because it is guaranteed to be the first packet */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->select_initial_window(sk_listener, full_space,
|
||||||
|
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) -
|
||||||
|
(ireq->saw_mpc ? MPTCP_SUB_LEN_DSM_ALIGN : 0),
|
||||||
|
#else
|
||||||
tcp_select_initial_window(sk_listener, full_space,
|
tcp_select_initial_window(sk_listener, full_space,
|
||||||
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
|
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
|
||||||
|
#endif
|
||||||
&req->rsk_rcv_wnd,
|
&req->rsk_rcv_wnd,
|
||||||
&req->rsk_window_clamp,
|
&req->rsk_window_clamp,
|
||||||
ireq->wscale_ok,
|
ireq->wscale_ok,
|
||||||
@ -477,6 +541,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
|
|||||||
newtp->snd_sml = newtp->snd_una =
|
newtp->snd_sml = newtp->snd_una =
|
||||||
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
|
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
newtp->out_of_order_queue = RB_ROOT;
|
||||||
|
newsk->tcp_rtx_queue = RB_ROOT;
|
||||||
|
#endif
|
||||||
INIT_LIST_HEAD(&newtp->tsq_node);
|
INIT_LIST_HEAD(&newtp->tsq_node);
|
||||||
INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
|
INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
|
||||||
|
|
||||||
@ -547,6 +615,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
|
|||||||
newtp->rx_opt.ts_recent_stamp = 0;
|
newtp->rx_opt.ts_recent_stamp = 0;
|
||||||
newtp->tcp_header_len = sizeof(struct tcphdr);
|
newtp->tcp_header_len = sizeof(struct tcphdr);
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (ireq->saw_mpc)
|
||||||
|
newtp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN;
|
||||||
|
#endif
|
||||||
newtp->tsoffset = treq->ts_off;
|
newtp->tsoffset = treq->ts_off;
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
newtp->md5sig_info = NULL; /*XXX*/
|
newtp->md5sig_info = NULL; /*XXX*/
|
||||||
@ -589,6 +661,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
|
|||||||
bool fastopen, bool *req_stolen)
|
bool fastopen, bool *req_stolen)
|
||||||
{
|
{
|
||||||
struct tcp_options_received tmp_opt;
|
struct tcp_options_received tmp_opt;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct mptcp_options_received mopt;
|
||||||
|
#endif
|
||||||
struct sock *child;
|
struct sock *child;
|
||||||
const struct tcphdr *th = tcp_hdr(skb);
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
|
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
|
||||||
@ -596,8 +671,15 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
|
|||||||
bool own_req;
|
bool own_req;
|
||||||
|
|
||||||
tmp_opt.saw_tstamp = 0;
|
tmp_opt.saw_tstamp = 0;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
mptcp_init_mp_opt(&mopt);
|
||||||
|
#endif
|
||||||
if (th->doff > (sizeof(struct tcphdr)>>2)) {
|
if (th->doff > (sizeof(struct tcphdr)>>2)) {
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_parse_options(sock_net(sk), skb, &tmp_opt, &mopt, 0, NULL, NULL);
|
||||||
|
#else
|
||||||
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
|
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (tmp_opt.saw_tstamp) {
|
if (tmp_opt.saw_tstamp) {
|
||||||
tmp_opt.ts_recent = req->ts_recent;
|
tmp_opt.ts_recent = req->ts_recent;
|
||||||
@ -638,7 +720,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
|
|||||||
*
|
*
|
||||||
* Reset timer after retransmitting SYNACK, similar to
|
* Reset timer after retransmitting SYNACK, similar to
|
||||||
* the idea of fast retransmit in recovery.
|
* the idea of fast retransmit in recovery.
|
||||||
|
*
|
||||||
|
* Fall back to TCP if MP_CAPABLE is not set.
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (inet_rsk(req)->saw_mpc && !mopt.saw_mpc)
|
||||||
|
inet_rsk(req)->saw_mpc = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!tcp_oow_rate_limited(sock_net(sk), skb,
|
if (!tcp_oow_rate_limited(sock_net(sk), skb,
|
||||||
LINUX_MIB_TCPACKSKIPPEDSYNRECV,
|
LINUX_MIB_TCPACKSKIPPEDSYNRECV,
|
||||||
&tcp_rsk(req)->last_oow_ack_time) &&
|
&tcp_rsk(req)->last_oow_ack_time) &&
|
||||||
@ -791,6 +880,19 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
|
|||||||
if (!child)
|
if (!child)
|
||||||
goto listen_overflow;
|
goto listen_overflow;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (own_req && !is_meta_sk(sk)) {
|
||||||
|
int ret = mptcp_check_req_master(sk, child, req, skb, 1, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
goto listen_overflow;
|
||||||
|
|
||||||
|
/* MPTCP-supported */
|
||||||
|
if (!ret)
|
||||||
|
return tcp_sk(child)->mpcb->master_sk;
|
||||||
|
} else if (own_req) {
|
||||||
|
return mptcp_check_req_child(sk, child, req, skb, &mopt);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
sock_rps_save_rxhash(child, skb);
|
sock_rps_save_rxhash(child, skb);
|
||||||
tcp_synack_rtt_meas(child, req);
|
tcp_synack_rtt_meas(child, req);
|
||||||
*req_stolen = !own_req;
|
*req_stolen = !own_req;
|
||||||
@ -842,12 +944,24 @@ int tcp_child_process(struct sock *parent, struct sock *child,
|
|||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int state = child->sk_state;
|
int state = child->sk_state;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct sock *meta_sk = mptcp(tcp_sk(child)) ? mptcp_meta_sk(child) : child;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* record NAPI ID of child */
|
/* record NAPI ID of child */
|
||||||
sk_mark_napi_id(child, skb);
|
sk_mark_napi_id(child, skb);
|
||||||
|
|
||||||
tcp_segs_in(tcp_sk(child), skb);
|
tcp_segs_in(tcp_sk(child), skb);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* The following will be removed when we allow lockless data-reception
|
||||||
|
* on the subflows.
|
||||||
|
*/
|
||||||
|
if (mptcp(tcp_sk(child)))
|
||||||
|
bh_lock_sock_nested(meta_sk);
|
||||||
|
if (!sock_owned_by_user(meta_sk)) {
|
||||||
|
#else
|
||||||
if (!sock_owned_by_user(child)) {
|
if (!sock_owned_by_user(child)) {
|
||||||
|
#endif
|
||||||
ret = tcp_rcv_state_process(child, skb);
|
ret = tcp_rcv_state_process(child, skb);
|
||||||
/* Wakeup parent, send SIGIO */
|
/* Wakeup parent, send SIGIO */
|
||||||
if (state == TCP_SYN_RECV && child->sk_state != state)
|
if (state == TCP_SYN_RECV && child->sk_state != state)
|
||||||
@ -857,10 +971,20 @@ int tcp_child_process(struct sock *parent, struct sock *child,
|
|||||||
* in main socket hash table and lock on listening
|
* in main socket hash table and lock on listening
|
||||||
* socket does not protect us more.
|
* socket does not protect us more.
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tcp_sk(child)))
|
||||||
|
mptcp_prepare_for_backlog(child, skb);
|
||||||
|
__sk_add_backlog(meta_sk, skb);
|
||||||
|
#else
|
||||||
__sk_add_backlog(child, skb);
|
__sk_add_backlog(child, skb);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bh_unlock_sock(child);
|
bh_unlock_sock(child);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tcp_sk(child)))
|
||||||
|
bh_unlock_sock(meta_sk);
|
||||||
|
#endif
|
||||||
sock_put(child);
|
sock_put(child);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -36,6 +36,14 @@
|
|||||||
|
|
||||||
#define pr_fmt(fmt) "TCP: " fmt
|
#define pr_fmt(fmt) "TCP: " fmt
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/mptcp_v4.h>
|
||||||
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
|
#include <net/mptcp_v6.h>
|
||||||
|
#endif
|
||||||
|
#include <net/ipv6.h>
|
||||||
|
#endif
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
|
|
||||||
#include <linux/compiler.h>
|
#include <linux/compiler.h>
|
||||||
@ -45,11 +53,16 @@
|
|||||||
|
|
||||||
#include <trace/events/tcp.h>
|
#include <trace/events/tcp.h>
|
||||||
|
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||||
int push_one, gfp_t gfp);
|
int push_one, gfp_t gfp);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Account for new data that has been sent to the network. */
|
/* Account for new data that has been sent to the network. */
|
||||||
static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
@ -243,12 +256,24 @@ EXPORT_SYMBOL(tcp_select_initial_window);
|
|||||||
* value can be stuffed directly into th->window for an outgoing
|
* value can be stuffed directly into th->window for an outgoing
|
||||||
* frame.
|
* frame.
|
||||||
*/
|
*/
|
||||||
static u16 tcp_select_window(struct sock *sk)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
u16 tcp_select_window(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
u32 old_win = tp->rcv_wnd;
|
u32 old_win = tp->rcv_wnd;
|
||||||
|
/* The window must never shrink at the meta-level. At the subflow we
|
||||||
|
* have to allow this. Otherwise we may announce a window too large
|
||||||
|
* for the current meta-level sk_rcvbuf.
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
u32 cur_win = tcp_receive_window(mptcp(tp) ? tcp_sk(mptcp_meta_sk(sk)) : tp);
|
||||||
|
u32 new_win = tp->ops->__select_window(sk);
|
||||||
|
#else
|
||||||
u32 cur_win = tcp_receive_window(tp);
|
u32 cur_win = tcp_receive_window(tp);
|
||||||
u32 new_win = __tcp_select_window(sk);
|
u32 new_win = __tcp_select_window(sk);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Never shrink the offered window */
|
/* Never shrink the offered window */
|
||||||
if (new_win < cur_win) {
|
if (new_win < cur_win) {
|
||||||
@ -264,6 +289,7 @@ static u16 tcp_select_window(struct sock *sk)
|
|||||||
LINUX_MIB_TCPWANTZEROWINDOWADV);
|
LINUX_MIB_TCPWANTZEROWINDOWADV);
|
||||||
new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
|
new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
|
||||||
}
|
}
|
||||||
|
|
||||||
tp->rcv_wnd = new_win;
|
tp->rcv_wnd = new_win;
|
||||||
tp->rcv_wup = tp->rcv_nxt;
|
tp->rcv_wup = tp->rcv_nxt;
|
||||||
|
|
||||||
@ -376,7 +402,10 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
|
|||||||
/* Constructs common control bits of non-data skb. If SYN/FIN is present,
|
/* Constructs common control bits of non-data skb. If SYN/FIN is present,
|
||||||
* auto increment end seqno.
|
* auto increment end seqno.
|
||||||
*/
|
*/
|
||||||
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
|
||||||
{
|
{
|
||||||
skb->ip_summed = CHECKSUM_PARTIAL;
|
skb->ip_summed = CHECKSUM_PARTIAL;
|
||||||
|
|
||||||
@ -391,7 +420,10 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
|
|||||||
TCP_SKB_CB(skb)->end_seq = seq;
|
TCP_SKB_CB(skb)->end_seq = seq;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool tcp_urg_mode(const struct tcp_sock *tp)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static inline
|
||||||
|
#endif
|
||||||
|
bool tcp_urg_mode(const struct tcp_sock *tp)
|
||||||
{
|
{
|
||||||
return tp->snd_una != tp->snd_up;
|
return tp->snd_una != tp->snd_up;
|
||||||
}
|
}
|
||||||
@ -402,6 +434,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
|
|||||||
#define OPTION_WSCALE (1 << 3)
|
#define OPTION_WSCALE (1 << 3)
|
||||||
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
|
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
|
||||||
#define OPTION_SMC (1 << 9)
|
#define OPTION_SMC (1 << 9)
|
||||||
|
/* Before adding here - take a look at OPTION_MPTCP in include/net/mptcp.h */
|
||||||
|
|
||||||
static void smc_options_write(__be32 *ptr, u16 *options)
|
static void smc_options_write(__be32 *ptr, u16 *options)
|
||||||
{
|
{
|
||||||
@ -418,6 +451,7 @@ static void smc_options_write(__be32 *ptr, u16 *options)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
struct tcp_out_options {
|
struct tcp_out_options {
|
||||||
u16 options; /* bit field of OPTION_* */
|
u16 options; /* bit field of OPTION_* */
|
||||||
u16 mss; /* 0 to disable */
|
u16 mss; /* 0 to disable */
|
||||||
@ -428,6 +462,7 @@ struct tcp_out_options {
|
|||||||
__u32 tsval, tsecr; /* need to include OPTION_TS */
|
__u32 tsval, tsecr; /* need to include OPTION_TS */
|
||||||
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
|
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Write previously computed TCP options to the packet.
|
/* Write previously computed TCP options to the packet.
|
||||||
*
|
*
|
||||||
@ -443,7 +478,11 @@ struct tcp_out_options {
|
|||||||
* (but it may well be that other scenarios fail similarly).
|
* (but it may well be that other scenarios fail similarly).
|
||||||
*/
|
*/
|
||||||
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
|
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
|
||||||
struct tcp_out_options *opts)
|
struct tcp_out_options *opts
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
, struct sk_buff *skb
|
||||||
|
#endif
|
||||||
|
)
|
||||||
{
|
{
|
||||||
u16 options = opts->options; /* mungable copy */
|
u16 options = opts->options; /* mungable copy */
|
||||||
|
|
||||||
@ -537,6 +576,10 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
smc_options_write(ptr, &options);
|
smc_options_write(ptr, &options);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (unlikely(OPTION_MPTCP & opts->options))
|
||||||
|
mptcp_options_write(ptr, tp, opts, skb);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void smc_set_option(const struct tcp_sock *tp,
|
static void smc_set_option(const struct tcp_sock *tp,
|
||||||
@ -622,7 +665,10 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
|
|||||||
if (unlikely(!(OPTION_TS & opts->options)))
|
if (unlikely(!(OPTION_TS & opts->options)))
|
||||||
remaining -= TCPOLEN_SACKPERM_ALIGNED;
|
remaining -= TCPOLEN_SACKPERM_ALIGNED;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (tp->request_mptcp || mptcp(tp))
|
||||||
|
mptcp_syn_options(sk, opts, &remaining);
|
||||||
|
#endif
|
||||||
if (fastopen && fastopen->cookie.len >= 0) {
|
if (fastopen && fastopen->cookie.len >= 0) {
|
||||||
u32 need = fastopen->cookie.len;
|
u32 need = fastopen->cookie.len;
|
||||||
|
|
||||||
@ -704,7 +750,10 @@ static unsigned int tcp_synack_options(const struct sock *sk,
|
|||||||
}
|
}
|
||||||
|
|
||||||
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
|
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (ireq->saw_mpc)
|
||||||
|
mptcp_synack_options(req, opts, &remaining);
|
||||||
|
#endif
|
||||||
return MAX_TCP_OPTION_SPACE - remaining;
|
return MAX_TCP_OPTION_SPACE - remaining;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -738,10 +787,22 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
|
|||||||
opts->tsecr = tp->rx_opt.ts_recent;
|
opts->tsecr = tp->rx_opt.ts_recent;
|
||||||
size += TCPOLEN_TSTAMP_ALIGNED;
|
size += TCPOLEN_TSTAMP_ALIGNED;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tp))
|
||||||
|
mptcp_established_options(sk, skb, opts, &size);
|
||||||
|
#endif
|
||||||
|
|
||||||
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
|
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
|
||||||
if (unlikely(eff_sacks)) {
|
if (unlikely(eff_sacks)) {
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
|
||||||
|
|
||||||
|
if (remaining < TCPOLEN_SACK_BASE_ALIGNED)
|
||||||
|
opts->num_sack_blocks = 0;
|
||||||
|
else
|
||||||
|
#else
|
||||||
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
|
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
|
||||||
|
#endif
|
||||||
opts->num_sack_blocks =
|
opts->num_sack_blocks =
|
||||||
min_t(unsigned int, eff_sacks,
|
min_t(unsigned int, eff_sacks,
|
||||||
(remaining - TCPOLEN_SACK_BASE_ALIGNED) /
|
(remaining - TCPOLEN_SACK_BASE_ALIGNED) /
|
||||||
@ -786,21 +847,46 @@ static void tcp_tsq_write(struct sock *sk)
|
|||||||
tp->snd_cwnd > tcp_packets_in_flight(tp)) {
|
tp->snd_cwnd > tcp_packets_in_flight(tp)) {
|
||||||
tcp_mstamp_refresh(tp);
|
tcp_mstamp_refresh(tp);
|
||||||
tcp_xmit_retransmit_queue(sk);
|
tcp_xmit_retransmit_queue(sk);
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_sk(sk)->ops->write_xmit(sk, tcp_current_mss(sk),
|
||||||
|
tcp_sk(sk)->nonagle, 0, GFP_ATOMIC);
|
||||||
|
#else
|
||||||
tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
|
tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
|
||||||
0, GFP_ATOMIC);
|
0, GFP_ATOMIC);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_tsq_handler(struct sock *sk)
|
static void tcp_tsq_handler(struct sock *sk)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk;
|
||||||
|
|
||||||
|
bh_lock_sock(meta_sk);
|
||||||
|
if (!sock_owned_by_user(meta_sk)) {
|
||||||
|
tcp_tsq_write(sk);
|
||||||
|
|
||||||
|
if (mptcp(tp))
|
||||||
|
tcp_tsq_write(meta_sk);
|
||||||
|
} else {
|
||||||
|
if (!test_and_set_bit(TCP_TSQ_DEFERRED, &meta_sk->sk_tsq_flags))
|
||||||
|
sock_hold(meta_sk);
|
||||||
|
|
||||||
|
if ((mptcp(tp)) && (sk->sk_state != TCP_CLOSE))
|
||||||
|
mptcp_tsq_flags(sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
bh_unlock_sock(meta_sk);
|
||||||
|
#else
|
||||||
bh_lock_sock(sk);
|
bh_lock_sock(sk);
|
||||||
if (!sock_owned_by_user(sk))
|
if (!sock_owned_by_user(sk))
|
||||||
tcp_tsq_write(sk);
|
tcp_tsq_write(sk);
|
||||||
else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
|
else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* One tasklet per cpu tries to send more skbs.
|
* One tasklet per cpu tries to send more skbs.
|
||||||
@ -834,10 +920,19 @@ static void tcp_tasklet_func(unsigned long data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
|
||||||
|
TCPF_WRITE_TIMER_DEFERRED | \
|
||||||
|
TCPF_DELACK_TIMER_DEFERRED | \
|
||||||
|
TCPF_MTU_REDUCED_DEFERRED | \
|
||||||
|
TCPF_PATH_MANAGER_DEFERRED |\
|
||||||
|
TCPF_SUB_DEFERRED)
|
||||||
|
#else
|
||||||
#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
|
#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
|
||||||
TCPF_WRITE_TIMER_DEFERRED | \
|
TCPF_WRITE_TIMER_DEFERRED | \
|
||||||
TCPF_DELACK_TIMER_DEFERRED | \
|
TCPF_DELACK_TIMER_DEFERRED | \
|
||||||
TCPF_MTU_REDUCED_DEFERRED)
|
TCPF_MTU_REDUCED_DEFERRED)
|
||||||
|
#endif
|
||||||
/**
|
/**
|
||||||
* tcp_release_cb - tcp release_sock() callback
|
* tcp_release_cb - tcp release_sock() callback
|
||||||
* @sk: socket
|
* @sk: socket
|
||||||
@ -860,6 +955,10 @@ void tcp_release_cb(struct sock *sk)
|
|||||||
if (flags & TCPF_TSQ_DEFERRED) {
|
if (flags & TCPF_TSQ_DEFERRED) {
|
||||||
tcp_tsq_write(sk);
|
tcp_tsq_write(sk);
|
||||||
__sock_put(sk);
|
__sock_put(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tcp_sk(sk)))
|
||||||
|
tcp_tsq_write(mptcp_meta_sk(sk));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
/* Here begins the tricky part :
|
/* Here begins the tricky part :
|
||||||
* We are called from release_sock() with :
|
* We are called from release_sock() with :
|
||||||
@ -884,6 +983,15 @@ void tcp_release_cb(struct sock *sk)
|
|||||||
inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
|
inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
|
||||||
__sock_put(sk);
|
__sock_put(sk);
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (flags & TCPF_PATH_MANAGER_DEFERRED) {
|
||||||
|
if (tcp_sk(sk)->mpcb->pm_ops->release_sock)
|
||||||
|
tcp_sk(sk)->mpcb->pm_ops->release_sock(sk);
|
||||||
|
__sock_put(sk);
|
||||||
|
}
|
||||||
|
if (flags & TCPF_SUB_DEFERRED)
|
||||||
|
mptcp_tsq_sub_deferred(sk);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(tcp_release_cb);
|
EXPORT_SYMBOL(tcp_release_cb);
|
||||||
|
|
||||||
@ -1004,7 +1112,10 @@ static bool tcp_pacing_check(const struct sock *sk)
|
|||||||
hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
|
hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
skb->skb_mstamp = tp->tcp_mstamp;
|
skb->skb_mstamp = tp->tcp_mstamp;
|
||||||
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
||||||
@ -1115,11 +1226,18 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
|||||||
th->urg = 1;
|
th->urg = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_options_write((__be32 *)(th + 1), tp, &opts, skb);
|
||||||
|
#else
|
||||||
tcp_options_write((__be32 *)(th + 1), tp, &opts);
|
tcp_options_write((__be32 *)(th + 1), tp, &opts);
|
||||||
|
#endif
|
||||||
skb_shinfo(skb)->gso_type = sk->sk_gso_type;
|
skb_shinfo(skb)->gso_type = sk->sk_gso_type;
|
||||||
if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
|
if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
th->window = htons(tp->ops->select_window(sk));
|
||||||
|
#else
|
||||||
th->window = htons(tcp_select_window(sk));
|
th->window = htons(tcp_select_window(sk));
|
||||||
|
#endif
|
||||||
tcp_ecn_send(sk, skb, th, tcp_header_size);
|
tcp_ecn_send(sk, skb, th, tcp_header_size);
|
||||||
} else {
|
} else {
|
||||||
/* RFC1323: The window in SYN & SYN/ACK segments
|
/* RFC1323: The window in SYN & SYN/ACK segments
|
||||||
@ -1177,7 +1295,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
|
||||||
gfp_t gfp_mask)
|
gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
|
return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
|
||||||
@ -1189,7 +1310,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
|
|||||||
* NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
|
* NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
|
||||||
* otherwise socket can stall.
|
* otherwise socket can stall.
|
||||||
*/
|
*/
|
||||||
static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
@ -1202,7 +1326,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Initialize TSO segments for a packet. */
|
/* Initialize TSO segments for a packet. */
|
||||||
static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
|
||||||
{
|
{
|
||||||
if (skb->len <= mss_now) {
|
if (skb->len <= mss_now) {
|
||||||
/* Avoid the costly divide in the normal
|
/* Avoid the costly divide in the normal
|
||||||
@ -1219,7 +1346,10 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
|
|||||||
/* Pcount in the middle of the write queue got changed, we need to do various
|
/* Pcount in the middle of the write queue got changed, we need to do various
|
||||||
* tweaks to fix counters
|
* tweaks to fix counters
|
||||||
*/
|
*/
|
||||||
static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
@ -1387,7 +1517,10 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
|
|||||||
/* This is similar to __pskb_pull_tail(). The difference is that pulled
|
/* This is similar to __pskb_pull_tail(). The difference is that pulled
|
||||||
* data is not copied, but immediately discarded.
|
* data is not copied, but immediately discarded.
|
||||||
*/
|
*/
|
||||||
static int __pskb_trim_head(struct sk_buff *skb, int len)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
int __pskb_trim_head(struct sk_buff *skb, int len)
|
||||||
{
|
{
|
||||||
struct skb_shared_info *shinfo;
|
struct skb_shared_info *shinfo;
|
||||||
int i, k, eat;
|
int i, k, eat;
|
||||||
@ -1611,6 +1744,10 @@ unsigned int tcp_current_mss(struct sock *sk)
|
|||||||
return mss_now;
|
return mss_now;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
EXPORT_SYMBOL(tcp_current_mss);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
|
/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
|
||||||
* As additional protections, we do not touch cwnd in retransmission phases,
|
* As additional protections, we do not touch cwnd in retransmission phases,
|
||||||
* and if application hit its sndbuf limit recently.
|
* and if application hit its sndbuf limit recently.
|
||||||
@ -1633,7 +1770,10 @@ static void tcp_cwnd_application_limited(struct sock *sk)
|
|||||||
tp->snd_cwnd_stamp = tcp_jiffies32;
|
tp->snd_cwnd_stamp = tcp_jiffies32;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
|
||||||
{
|
{
|
||||||
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
|
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
@ -1697,8 +1837,11 @@ static bool tcp_minshall_check(const struct tcp_sock *tp)
|
|||||||
* But we can avoid doing the divide again given we already have
|
* But we can avoid doing the divide again given we already have
|
||||||
* skb_pcount = skb->len / mss_now
|
* skb_pcount = skb->len / mss_now
|
||||||
*/
|
*/
|
||||||
static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
|
#ifndef CONFIG_MPTCP
|
||||||
const struct sk_buff *skb)
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
|
||||||
|
const struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
if (skb->len < tcp_skb_pcount(skb) * mss_now)
|
if (skb->len < tcp_skb_pcount(skb) * mss_now)
|
||||||
tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
|
tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
|
||||||
@ -1757,11 +1900,14 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Returns the portion of skb which can be sent right away */
|
/* Returns the portion of skb which can be sent right away */
|
||||||
static unsigned int tcp_mss_split_point(const struct sock *sk,
|
#ifndef CONFIG_MPTCP
|
||||||
const struct sk_buff *skb,
|
static
|
||||||
unsigned int mss_now,
|
#endif
|
||||||
unsigned int max_segs,
|
unsigned int tcp_mss_split_point(const struct sock *sk,
|
||||||
int nonagle)
|
const struct sk_buff *skb,
|
||||||
|
unsigned int mss_now,
|
||||||
|
unsigned int max_segs,
|
||||||
|
int nonagle)
|
||||||
{
|
{
|
||||||
const struct tcp_sock *tp = tcp_sk(sk);
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
u32 partial, needed, window, max_len;
|
u32 partial, needed, window, max_len;
|
||||||
@ -1791,13 +1937,20 @@ static unsigned int tcp_mss_split_point(const struct sock *sk,
|
|||||||
/* Can at least one segment of SKB be sent right now, according to the
|
/* Can at least one segment of SKB be sent right now, according to the
|
||||||
* congestion window rules? If so, return how many segments are allowed.
|
* congestion window rules? If so, return how many segments are allowed.
|
||||||
*/
|
*/
|
||||||
static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
|
#ifndef CONFIG_MPTCP
|
||||||
const struct sk_buff *skb)
|
static inline
|
||||||
|
#endif
|
||||||
|
unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
|
||||||
|
const struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
u32 in_flight, cwnd, halfcwnd;
|
u32 in_flight, cwnd, halfcwnd;
|
||||||
|
|
||||||
/* Don't be strict about the congestion window for the final FIN. */
|
/* Don't be strict about the congestion window for the final FIN. */
|
||||||
if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
|
if (
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
skb &&
|
||||||
|
#endif
|
||||||
|
(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
|
||||||
tcp_skb_pcount(skb) == 1)
|
tcp_skb_pcount(skb) == 1)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@ -1812,12 +1965,18 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
|
|||||||
halfcwnd = max(cwnd >> 1, 1U);
|
halfcwnd = max(cwnd >> 1, 1U);
|
||||||
return min(halfcwnd, cwnd - in_flight);
|
return min(halfcwnd, cwnd - in_flight);
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
EXPORT_SYMBOL(tcp_cwnd_test);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Initialize TSO state of a skb.
|
/* Initialize TSO state of a skb.
|
||||||
* This must be invoked the first time we consider transmitting
|
* This must be invoked the first time we consider transmitting
|
||||||
* SKB onto the wire.
|
* SKB onto the wire.
|
||||||
*/
|
*/
|
||||||
static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
|
||||||
{
|
{
|
||||||
int tso_segs = tcp_skb_pcount(skb);
|
int tso_segs = tcp_skb_pcount(skb);
|
||||||
|
|
||||||
@ -1832,8 +1991,11 @@ static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
|
|||||||
/* Return true if the Nagle test allows this packet to be
|
/* Return true if the Nagle test allows this packet to be
|
||||||
* sent now.
|
* sent now.
|
||||||
*/
|
*/
|
||||||
static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
|
#ifndef CONFIG_MPTCP
|
||||||
unsigned int cur_mss, int nonagle)
|
static inline
|
||||||
|
#endif
|
||||||
|
bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
|
||||||
|
unsigned int cur_mss, int nonagle)
|
||||||
{
|
{
|
||||||
/* Nagle rule does not apply to frames, which sit in the middle of the
|
/* Nagle rule does not apply to frames, which sit in the middle of the
|
||||||
* write_queue (they have no chances to get new data).
|
* write_queue (they have no chances to get new data).
|
||||||
@ -1845,7 +2007,11 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
|
|||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* Don't use the nagle rule for urgent data (or for the final FIN). */
|
/* Don't use the nagle rule for urgent data (or for the final FIN). */
|
||||||
if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
|
if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
|| mptcp_is_data_fin(skb)
|
||||||
|
#endif
|
||||||
|
)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
|
if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
|
||||||
@ -1855,7 +2021,10 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Does at least the first segment of SKB fit into the send window? */
|
/* Does at least the first segment of SKB fit into the send window? */
|
||||||
static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
bool tcp_snd_wnd_test(const struct tcp_sock *tp,
|
||||||
const struct sk_buff *skb,
|
const struct sk_buff *skb,
|
||||||
unsigned int cur_mss)
|
unsigned int cur_mss)
|
||||||
{
|
{
|
||||||
@ -1866,6 +2035,9 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
|
|||||||
|
|
||||||
return !after(end_seq, tcp_wnd_end(tp));
|
return !after(end_seq, tcp_wnd_end(tp));
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
EXPORT_SYMBOL(tcp_snd_wnd_test);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
|
/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
|
||||||
* which is put after SKB on the list. It is very much like
|
* which is put after SKB on the list. It is very much like
|
||||||
@ -2017,8 +2189,12 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
/* If this packet won't get more data, do not wait. */
|
/* If this packet won't get more data, do not wait. */
|
||||||
|
if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || mptcp_is_data_fin(skb))
|
||||||
|
#else
|
||||||
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
|
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
|
||||||
|
#endif
|
||||||
goto send_now;
|
goto send_now;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -2322,8 +2498,11 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
|
|||||||
* Returns true, if no segments are in flight and we have queued segments,
|
* Returns true, if no segments are in flight and we have queued segments,
|
||||||
* but cannot send anything now because of SWS or another problem.
|
* but cannot send anything now because of SWS or another problem.
|
||||||
*/
|
*/
|
||||||
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
#ifndef CONFIG_MPTCP
|
||||||
int push_one, gfp_t gfp)
|
static
|
||||||
|
#endif
|
||||||
|
bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||||
|
int push_one, gfp_t gfp)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
@ -2336,7 +2515,16 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
|||||||
sent_pkts = 0;
|
sent_pkts = 0;
|
||||||
|
|
||||||
tcp_mstamp_refresh(tp);
|
tcp_mstamp_refresh(tp);
|
||||||
if (!push_one) {
|
|
||||||
|
/* pmtu not yet supported with MPTCP. Should be possible, by early
|
||||||
|
* exiting the loop inside tcp_mtu_probe, making sure that only one
|
||||||
|
* single DSS-mapping gets probed.
|
||||||
|
*/
|
||||||
|
if (!push_one
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
&& !mptcp(tp)
|
||||||
|
#endif
|
||||||
|
) {
|
||||||
/* Do MTU probing. */
|
/* Do MTU probing. */
|
||||||
result = tcp_mtu_probe(sk);
|
result = tcp_mtu_probe(sk);
|
||||||
if (!result) {
|
if (!result) {
|
||||||
@ -2435,7 +2623,12 @@ repair:
|
|||||||
|
|
||||||
is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
|
is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
|
||||||
if (likely(sent_pkts || is_cwnd_limited))
|
if (likely(sent_pkts || is_cwnd_limited))
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (tp->ops->cwnd_validate)
|
||||||
|
tp->ops->cwnd_validate(sk, is_cwnd_limited);
|
||||||
|
#else
|
||||||
tcp_cwnd_validate(sk, is_cwnd_limited);
|
tcp_cwnd_validate(sk, is_cwnd_limited);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (likely(sent_pkts)) {
|
if (likely(sent_pkts)) {
|
||||||
if (tcp_in_cwnd_reduction(sk))
|
if (tcp_in_cwnd_reduction(sk))
|
||||||
@ -2531,7 +2724,11 @@ void tcp_send_loss_probe(struct sock *sk)
|
|||||||
skb = tcp_send_head(sk);
|
skb = tcp_send_head(sk);
|
||||||
if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
|
if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
|
||||||
pcount = tp->packets_out;
|
pcount = tp->packets_out;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
|
||||||
|
#else
|
||||||
tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
|
tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
|
||||||
|
#endif
|
||||||
if (tp->packets_out > pcount)
|
if (tp->packets_out > pcount)
|
||||||
goto probe_sent;
|
goto probe_sent;
|
||||||
goto rearm_timer;
|
goto rearm_timer;
|
||||||
@ -2593,8 +2790,13 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
|
|||||||
if (unlikely(sk->sk_state == TCP_CLOSE))
|
if (unlikely(sk->sk_state == TCP_CLOSE))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (tcp_sk(sk)->ops->write_xmit(sk, cur_mss, nonagle, 0,
|
||||||
|
sk_gfp_mask(sk, GFP_ATOMIC)))
|
||||||
|
#else
|
||||||
if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
|
if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
|
||||||
sk_gfp_mask(sk, GFP_ATOMIC)))
|
sk_gfp_mask(sk, GFP_ATOMIC)))
|
||||||
|
#endif
|
||||||
tcp_check_probe_timer(sk);
|
tcp_check_probe_timer(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2607,7 +2809,12 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
|
|||||||
|
|
||||||
BUG_ON(!skb || skb->len < mss_now);
|
BUG_ON(!skb || skb->len < mss_now);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_sk(sk)->ops->write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1,
|
||||||
|
sk->sk_allocation);
|
||||||
|
#else
|
||||||
tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
|
tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This function returns the amount that we can raise the
|
/* This function returns the amount that we can raise the
|
||||||
@ -2829,6 +3036,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
|
|||||||
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
|
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* Currently not supported for MPTCP - but it should be possible */
|
||||||
|
if (mptcp(tp))
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
skb_rbtree_walk_from_safe(skb, tmp) {
|
skb_rbtree_walk_from_safe(skb, tmp) {
|
||||||
if (!tcp_can_collapse(sk, skb))
|
if (!tcp_can_collapse(sk, skb))
|
||||||
break;
|
break;
|
||||||
@ -3308,7 +3520,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
|
|||||||
|
|
||||||
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
|
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
|
||||||
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
|
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_options_write((__be32 *)(th + 1), NULL, &opts, skb);
|
||||||
|
#else
|
||||||
tcp_options_write((__be32 *)(th + 1), NULL, &opts);
|
tcp_options_write((__be32 *)(th + 1), NULL, &opts);
|
||||||
|
#endif
|
||||||
th->doff = (tcp_header_size >> 2);
|
th->doff = (tcp_header_size >> 2);
|
||||||
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
|
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
|
||||||
|
|
||||||
@ -3389,6 +3605,15 @@ static void tcp_connect_init(struct sock *sk)
|
|||||||
if (rcv_wnd == 0)
|
if (rcv_wnd == 0)
|
||||||
rcv_wnd = dst_metric(dst, RTAX_INITRWND);
|
rcv_wnd = dst_metric(dst, RTAX_INITRWND);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->select_initial_window(sk, tcp_full_space(sk),
|
||||||
|
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
|
||||||
|
&tp->rcv_wnd,
|
||||||
|
&tp->window_clamp,
|
||||||
|
sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
|
||||||
|
&rcv_wscale,
|
||||||
|
rcv_wnd);
|
||||||
|
#else
|
||||||
tcp_select_initial_window(sk, tcp_full_space(sk),
|
tcp_select_initial_window(sk, tcp_full_space(sk),
|
||||||
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
|
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
|
||||||
&tp->rcv_wnd,
|
&tp->rcv_wnd,
|
||||||
@ -3396,6 +3621,7 @@ static void tcp_connect_init(struct sock *sk)
|
|||||||
sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
|
sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
|
||||||
&rcv_wscale,
|
&rcv_wscale,
|
||||||
rcv_wnd);
|
rcv_wnd);
|
||||||
|
#endif
|
||||||
|
|
||||||
tp->rx_opt.rcv_wscale = rcv_wscale;
|
tp->rx_opt.rcv_wscale = rcv_wscale;
|
||||||
tp->rcv_ssthresh = tp->rcv_wnd;
|
tp->rcv_ssthresh = tp->rcv_wnd;
|
||||||
@ -3420,6 +3646,36 @@ static void tcp_connect_init(struct sock *sk)
|
|||||||
inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
|
inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
|
||||||
inet_csk(sk)->icsk_retransmits = 0;
|
inet_csk(sk)->icsk_retransmits = 0;
|
||||||
tcp_clear_retrans(tp);
|
tcp_clear_retrans(tp);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sock_flag(sk, SOCK_MPTCP) && mptcp_doit(sk)) {
|
||||||
|
if (is_master_tp(tp)) {
|
||||||
|
tp->request_mptcp = 1;
|
||||||
|
mptcp_connect_init(sk);
|
||||||
|
} else if (tp->mptcp) {
|
||||||
|
struct inet_sock *inet = inet_sk(sk);
|
||||||
|
|
||||||
|
tp->mptcp->snt_isn = tp->write_seq;
|
||||||
|
tp->mptcp->init_rcv_wnd = tp->rcv_wnd;
|
||||||
|
|
||||||
|
/* Set nonce for new subflows */
|
||||||
|
if (sk->sk_family == AF_INET)
|
||||||
|
tp->mptcp->mptcp_loc_nonce = mptcp_v4_get_nonce(
|
||||||
|
inet->inet_saddr,
|
||||||
|
inet->inet_daddr,
|
||||||
|
inet->inet_sport,
|
||||||
|
inet->inet_dport);
|
||||||
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
|
else
|
||||||
|
tp->mptcp->mptcp_loc_nonce = mptcp_v6_get_nonce(
|
||||||
|
inet6_sk(sk)->saddr.s6_addr32,
|
||||||
|
sk->sk_v6_daddr.s6_addr32,
|
||||||
|
inet->inet_sport,
|
||||||
|
inet->inet_dport);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
|
static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
|
||||||
@ -3685,6 +3941,9 @@ void tcp_send_ack(struct sock *sk)
|
|||||||
{
|
{
|
||||||
__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
|
__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
EXPORT_SYMBOL_GPL(tcp_send_ack);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* This routine sends a packet with an out of date sequence
|
/* This routine sends a packet with an out of date sequence
|
||||||
* number. It assumes the other end will try to ack it.
|
* number. It assumes the other end will try to ack it.
|
||||||
@ -3697,7 +3956,10 @@ void tcp_send_ack(struct sock *sk)
|
|||||||
* one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
|
* one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
|
||||||
* out-of-date with SND.UNA-1 to probe window.
|
* out-of-date with SND.UNA-1 to probe window.
|
||||||
*/
|
*/
|
||||||
static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
@ -3784,7 +4046,11 @@ void tcp_send_probe0(struct sock *sk)
|
|||||||
unsigned long probe_max;
|
unsigned long probe_max;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
err = tp->ops->write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
|
||||||
|
#else
|
||||||
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
|
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (tp->packets_out || tcp_write_queue_empty(sk)) {
|
if (tp->packets_out || tcp_write_queue_empty(sk)) {
|
||||||
/* Cancel probe timer, if it is not required. */
|
/* Cancel probe timer, if it is not required. */
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
|
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#endif
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
|
|
||||||
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
|
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
|
||||||
@ -78,7 +81,10 @@ int tcp_use_userconfig_sysctl_handler(struct ctl_table *table, int write,
|
|||||||
*
|
*
|
||||||
* Returns: Nothing (void)
|
* Returns: Nothing (void)
|
||||||
*/
|
*/
|
||||||
static void tcp_write_err(struct sock *sk)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_write_err(struct sock *sk)
|
||||||
{
|
{
|
||||||
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
|
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
|
||||||
sk->sk_error_report(sk);
|
sk->sk_error_report(sk);
|
||||||
@ -134,7 +140,11 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
|
|||||||
(!tp->snd_wnd && !tp->packets_out))
|
(!tp->snd_wnd && !tp->packets_out))
|
||||||
do_reset = true;
|
do_reset = true;
|
||||||
if (do_reset)
|
if (do_reset)
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#else
|
||||||
tcp_send_active_reset(sk, GFP_ATOMIC);
|
tcp_send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#endif
|
||||||
tcp_done(sk);
|
tcp_done(sk);
|
||||||
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
|
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
|
||||||
return 1;
|
return 1;
|
||||||
@ -219,7 +229,10 @@ static unsigned int tcp_model_timeout(struct sock *sk,
|
|||||||
* after "boundary" unsuccessful, exponentially backed-off
|
* after "boundary" unsuccessful, exponentially backed-off
|
||||||
* retransmissions with an initial RTO of TCP_RTO_MIN.
|
* retransmissions with an initial RTO of TCP_RTO_MIN.
|
||||||
*/
|
*/
|
||||||
static bool retransmits_timed_out(struct sock *sk,
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
bool retransmits_timed_out(struct sock *sk,
|
||||||
unsigned int boundary,
|
unsigned int boundary,
|
||||||
unsigned int timeout)
|
unsigned int timeout)
|
||||||
{
|
{
|
||||||
@ -241,7 +254,10 @@ static bool retransmits_timed_out(struct sock *sk,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* A write timeout has occurred. Process the after effects. */
|
/* A write timeout has occurred. Process the after effects. */
|
||||||
static int tcp_write_timeout(struct sock *sk)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
int tcp_write_timeout(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
@ -256,6 +272,17 @@ static int tcp_write_timeout(struct sock *sk)
|
|||||||
sk_rethink_txhash(sk);
|
sk_rethink_txhash(sk);
|
||||||
}
|
}
|
||||||
retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
|
retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* Stop retransmitting MP_CAPABLE options in SYN if timed out. */
|
||||||
|
if (tcp_sk(sk)->request_mptcp &&
|
||||||
|
icsk->icsk_retransmits >= sysctl_mptcp_syn_retries) {
|
||||||
|
tcp_sk(sk)->request_mptcp = 0;
|
||||||
|
|
||||||
|
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLERETRANSFALLBACK);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_MPTCP */
|
||||||
|
|
||||||
expired = icsk->icsk_retransmits >= retry_until;
|
expired = icsk->icsk_retransmits >= retry_until;
|
||||||
} else {
|
} else {
|
||||||
if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
|
if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
|
||||||
@ -351,18 +378,36 @@ static void tcp_delack_timer(struct timer_list *t)
|
|||||||
struct inet_connection_sock *icsk =
|
struct inet_connection_sock *icsk =
|
||||||
from_timer(icsk, t, icsk_delack_timer);
|
from_timer(icsk, t, icsk_delack_timer);
|
||||||
struct sock *sk = &icsk->icsk_inet.sk;
|
struct sock *sk = &icsk->icsk_inet.sk;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk;
|
||||||
|
|
||||||
|
bh_lock_sock(meta_sk);
|
||||||
|
if (!sock_owned_by_user(meta_sk)) {
|
||||||
|
#else
|
||||||
bh_lock_sock(sk);
|
bh_lock_sock(sk);
|
||||||
if (!sock_owned_by_user(sk)) {
|
if (!sock_owned_by_user(sk)) {
|
||||||
|
#endif
|
||||||
tcp_delack_timer_handler(sk);
|
tcp_delack_timer_handler(sk);
|
||||||
} else {
|
} else {
|
||||||
icsk->icsk_ack.blocked = 1;
|
icsk->icsk_ack.blocked = 1;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
__NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_DELAYEDACKLOCKED);
|
||||||
|
#else
|
||||||
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
|
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
|
||||||
|
#endif
|
||||||
/* deleguate our work to tcp_release_cb() */
|
/* deleguate our work to tcp_release_cb() */
|
||||||
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
|
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tp))
|
||||||
|
mptcp_tsq_flags(sk);
|
||||||
|
}
|
||||||
|
bh_unlock_sock(meta_sk);
|
||||||
|
#else
|
||||||
}
|
}
|
||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
#endif
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -406,7 +451,16 @@ static void tcp_probe_timer(struct sock *sk)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (icsk->icsk_probes_out >= max_probes) {
|
if (icsk->icsk_probes_out >= max_probes) {
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
abort:
|
||||||
|
tcp_write_err(sk);
|
||||||
|
if (is_meta_sk(sk) &&
|
||||||
|
mptcp_in_infinite_mapping_weak(tp->mpcb)) {
|
||||||
|
mptcp_sub_force_close_all(tp->mpcb, NULL);
|
||||||
|
}
|
||||||
|
#else
|
||||||
abort: tcp_write_err(sk);
|
abort: tcp_write_err(sk);
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
/* Only send another probe if we didn't close things up. */
|
/* Only send another probe if we didn't close things up. */
|
||||||
tcp_send_probe0(sk);
|
tcp_send_probe0(sk);
|
||||||
@ -620,7 +674,11 @@ void tcp_write_timer_handler(struct sock *sk)
|
|||||||
break;
|
break;
|
||||||
case ICSK_TIME_RETRANS:
|
case ICSK_TIME_RETRANS:
|
||||||
icsk->icsk_pending = 0;
|
icsk->icsk_pending = 0;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_sk(sk)->ops->retransmit_timer(sk);
|
||||||
|
#else
|
||||||
tcp_retransmit_timer(sk);
|
tcp_retransmit_timer(sk);
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
case ICSK_TIME_PROBE0:
|
case ICSK_TIME_PROBE0:
|
||||||
icsk->icsk_pending = 0;
|
icsk->icsk_pending = 0;
|
||||||
@ -637,16 +695,29 @@ static void tcp_write_timer(struct timer_list *t)
|
|||||||
struct inet_connection_sock *icsk =
|
struct inet_connection_sock *icsk =
|
||||||
from_timer(icsk, t, icsk_retransmit_timer);
|
from_timer(icsk, t, icsk_retransmit_timer);
|
||||||
struct sock *sk = &icsk->icsk_inet.sk;
|
struct sock *sk = &icsk->icsk_inet.sk;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct sock *meta_sk = mptcp(tcp_sk(sk)) ? mptcp_meta_sk(sk) : sk;
|
||||||
|
|
||||||
|
bh_lock_sock(meta_sk);
|
||||||
|
if (!sock_owned_by_user(meta_sk)) {
|
||||||
|
#else
|
||||||
bh_lock_sock(sk);
|
bh_lock_sock(sk);
|
||||||
if (!sock_owned_by_user(sk)) {
|
if (!sock_owned_by_user(sk)) {
|
||||||
|
#endif
|
||||||
tcp_write_timer_handler(sk);
|
tcp_write_timer_handler(sk);
|
||||||
} else {
|
} else {
|
||||||
/* delegate our work to tcp_release_cb() */
|
/* delegate our work to tcp_release_cb() */
|
||||||
if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
|
if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tcp_sk(sk)))
|
||||||
|
mptcp_tsq_flags(sk);
|
||||||
|
}
|
||||||
|
bh_unlock_sock(meta_sk);
|
||||||
|
#else
|
||||||
}
|
}
|
||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
#endif
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -676,11 +747,19 @@ static void tcp_keepalive_timer (struct timer_list *t)
|
|||||||
struct sock *sk = from_timer(sk, t, sk_timer);
|
struct sock *sk = from_timer(sk, t, sk_timer);
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk;
|
||||||
|
#endif
|
||||||
u32 elapsed;
|
u32 elapsed;
|
||||||
|
|
||||||
/* Only process if socket is not in use. */
|
/* Only process if socket is not in use. */
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
bh_lock_sock(meta_sk);
|
||||||
|
if (sock_owned_by_user(meta_sk)) {
|
||||||
|
#else
|
||||||
bh_lock_sock(sk);
|
bh_lock_sock(sk);
|
||||||
if (sock_owned_by_user(sk)) {
|
if (sock_owned_by_user(sk)) {
|
||||||
|
#endif
|
||||||
/* Try again later. */
|
/* Try again later. */
|
||||||
inet_csk_reset_keepalive_timer (sk, HZ/20);
|
inet_csk_reset_keepalive_timer (sk, HZ/20);
|
||||||
goto out;
|
goto out;
|
||||||
@ -692,16 +771,39 @@ static void tcp_keepalive_timer (struct timer_list *t)
|
|||||||
}
|
}
|
||||||
|
|
||||||
tcp_mstamp_refresh(tp);
|
tcp_mstamp_refresh(tp);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (tp->send_mp_fclose) {
|
||||||
|
if (icsk->icsk_retransmits >= MPTCP_FASTCLOSE_RETRIES) {
|
||||||
|
tcp_write_err(sk);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
tcp_send_ack(sk);
|
||||||
|
icsk->icsk_retransmits++;
|
||||||
|
|
||||||
|
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
|
||||||
|
elapsed = icsk->icsk_rto;
|
||||||
|
goto resched;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
|
if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
|
||||||
if (tp->linger2 >= 0) {
|
if (tp->linger2 >= 0) {
|
||||||
const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
|
const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
|
||||||
|
|
||||||
if (tmo > 0) {
|
if (tmo > 0) {
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->time_wait(sk, TCP_FIN_WAIT2, tmo);
|
||||||
|
#else
|
||||||
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
|
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
|
||||||
|
#endif
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#else
|
||||||
tcp_send_active_reset(sk, GFP_ATOMIC);
|
tcp_send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#endif
|
||||||
goto death;
|
goto death;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -726,11 +828,20 @@ static void tcp_keepalive_timer (struct timer_list *t)
|
|||||||
icsk->icsk_probes_out > 0) ||
|
icsk->icsk_probes_out > 0) ||
|
||||||
(icsk->icsk_user_timeout == 0 &&
|
(icsk->icsk_user_timeout == 0 &&
|
||||||
icsk->icsk_probes_out >= keepalive_probes(tp))) {
|
icsk->icsk_probes_out >= keepalive_probes(tp))) {
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#else
|
||||||
tcp_send_active_reset(sk, GFP_ATOMIC);
|
tcp_send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
#endif
|
||||||
tcp_write_err(sk);
|
tcp_write_err(sk);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (tp->ops->write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
|
||||||
|
#else
|
||||||
if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
|
if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
|
||||||
|
#endif
|
||||||
icsk->icsk_probes_out++;
|
icsk->icsk_probes_out++;
|
||||||
elapsed = keepalive_intvl_when(tp);
|
elapsed = keepalive_intvl_when(tp);
|
||||||
} else {
|
} else {
|
||||||
@ -754,7 +865,11 @@ death:
|
|||||||
tcp_done(sk);
|
tcp_done(sk);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
bh_unlock_sock(meta_sk);
|
||||||
|
#else
|
||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
#endif
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -931,6 +931,10 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
|
|||||||
kfree_rcu(ifp, rcu);
|
kfree_rcu(ifp, rcu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
EXPORT_SYMBOL(inet6_ifa_finish_destroy);
|
||||||
|
#endif
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
|
ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
|
||||||
{
|
{
|
||||||
|
@ -121,8 +121,11 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
|
|||||||
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
|
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int inet6_create(struct net *net, struct socket *sock, int protocol,
|
#ifndef CONFIG_MPTCP
|
||||||
int kern)
|
static
|
||||||
|
#endif
|
||||||
|
int inet6_create(struct net *net, struct socket *sock, int protocol,
|
||||||
|
int kern)
|
||||||
{
|
{
|
||||||
struct inet_sock *inet;
|
struct inet_sock *inet;
|
||||||
struct ipv6_pinfo *np;
|
struct ipv6_pinfo *np;
|
||||||
|
@ -48,6 +48,10 @@
|
|||||||
#include <net/addrconf.h>
|
#include <net/addrconf.h>
|
||||||
#include <net/inet_common.h>
|
#include <net/inet_common.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/mptcp_v4.h>
|
||||||
|
#endif
|
||||||
#include <net/udp.h>
|
#include <net/udp.h>
|
||||||
#include <net/udplite.h>
|
#include <net/udplite.h>
|
||||||
#include <net/xfrm.h>
|
#include <net/xfrm.h>
|
||||||
@ -68,6 +72,10 @@ int ip6_ra_control(struct sock *sk, int sel)
|
|||||||
return -ENOPROTOOPT;
|
return -ENOPROTOOPT;
|
||||||
|
|
||||||
new_ra = (sel >= 0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
|
new_ra = (sel >= 0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sel >= 0 && !new_ra)
|
||||||
|
return -ENOMEM;
|
||||||
|
#endif
|
||||||
|
|
||||||
write_lock_bh(&ip6_ra_lock);
|
write_lock_bh(&ip6_ra_lock);
|
||||||
for (rap = &ip6_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
|
for (rap = &ip6_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
|
||||||
@ -223,7 +231,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
sock_prot_inuse_add(net, &tcp_prot, 1);
|
sock_prot_inuse_add(net, &tcp_prot, 1);
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
sk->sk_prot = &tcp_prot;
|
sk->sk_prot = &tcp_prot;
|
||||||
icsk->icsk_af_ops = &ipv4_specific;
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sock_flag(sk, SOCK_MPTCP))
|
||||||
|
icsk->icsk_af_ops = &mptcp_v4_specific;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
icsk->icsk_af_ops = &ipv4_specific;
|
||||||
sk->sk_socket->ops = &inet_stream_ops;
|
sk->sk_socket->ops = &inet_stream_ops;
|
||||||
sk->sk_family = PF_INET;
|
sk->sk_family = PF_INET;
|
||||||
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
|
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
|
||||||
|
@ -20,6 +20,10 @@
|
|||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <net/secure_seq.h>
|
#include <net/secure_seq.h>
|
||||||
#include <net/ipv6.h>
|
#include <net/ipv6.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/mptcp_v6.h>
|
||||||
|
#endif
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
|
|
||||||
#define COOKIEBITS 24 /* Upper bits store count */
|
#define COOKIEBITS 24 /* Upper bits store count */
|
||||||
@ -111,7 +115,12 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
|
EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
__u32 cookie_v6_init_sequence(struct request_sock *req, const struct sock *sk,
|
||||||
|
const struct sk_buff *skb, __u16 *mssp)
|
||||||
|
#else
|
||||||
__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
|
__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
const struct ipv6hdr *iph = ipv6_hdr(skb);
|
const struct ipv6hdr *iph = ipv6_hdr(skb);
|
||||||
const struct tcphdr *th = tcp_hdr(skb);
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
@ -133,6 +142,9 @@ EXPORT_SYMBOL_GPL(__cookie_v6_check);
|
|||||||
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
|
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct tcp_options_received tcp_opt;
|
struct tcp_options_received tcp_opt;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct mptcp_options_received mopt;
|
||||||
|
#endif
|
||||||
struct inet_request_sock *ireq;
|
struct inet_request_sock *ireq;
|
||||||
struct tcp_request_sock *treq;
|
struct tcp_request_sock *treq;
|
||||||
struct ipv6_pinfo *np = inet6_sk(sk);
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
||||||
@ -162,7 +174,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
|
|
||||||
/* check for timestamp cookie support */
|
/* check for timestamp cookie support */
|
||||||
memset(&tcp_opt, 0, sizeof(tcp_opt));
|
memset(&tcp_opt, 0, sizeof(tcp_opt));
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
mptcp_init_mp_opt(&mopt);
|
||||||
|
tcp_parse_options(sock_net(sk), skb, &tcp_opt, &mopt, 0, NULL, NULL);
|
||||||
|
#else
|
||||||
tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
|
tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
|
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
|
||||||
tsoff = secure_tcpv6_ts_off(sock_net(sk),
|
tsoff = secure_tcpv6_ts_off(sock_net(sk),
|
||||||
@ -175,15 +192,32 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ret = NULL;
|
ret = NULL;
|
||||||
req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mopt.saw_mpc)
|
||||||
|
req = inet_reqsk_alloc(&mptcp6_request_sock_ops, sk, false);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
|
||||||
if (!req)
|
if (!req)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ireq = inet_rsk(req);
|
ireq = inet_rsk(req);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
ireq->mptcp_rqsk = 0;
|
||||||
|
ireq->saw_mpc = 0;
|
||||||
|
#endif
|
||||||
treq = tcp_rsk(req);
|
treq = tcp_rsk(req);
|
||||||
treq->af_specific = &tcp_request_sock_ipv6_ops;
|
treq->af_specific = &tcp_request_sock_ipv6_ops;
|
||||||
treq->tfo_listener = false;
|
treq->tfo_listener = false;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* Must be done before anything else, as it initializes
|
||||||
|
* hash_entry of the MPTCP request-sock.
|
||||||
|
*/
|
||||||
|
if (mopt.saw_mpc)
|
||||||
|
mptcp_cookies_reqsk_init(req, &mopt, skb);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (security_inet_conn_request(sk, skb, req))
|
if (security_inet_conn_request(sk, skb, req))
|
||||||
goto out_free;
|
goto out_free;
|
||||||
|
|
||||||
@ -253,10 +287,17 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
(req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
|
(req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
|
||||||
req->rsk_window_clamp = full_space;
|
req->rsk_window_clamp = full_space;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp->ops->select_initial_window(sk, full_space, req->mss,
|
||||||
|
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
|
||||||
|
ireq->wscale_ok, &rcv_wscale,
|
||||||
|
dst_metric(dst, RTAX_INITRWND));
|
||||||
|
#else
|
||||||
tcp_select_initial_window(sk, full_space, req->mss,
|
tcp_select_initial_window(sk, full_space, req->mss,
|
||||||
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
|
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
|
||||||
ireq->wscale_ok, &rcv_wscale,
|
ireq->wscale_ok, &rcv_wscale,
|
||||||
dst_metric(dst, RTAX_INITRWND));
|
dst_metric(dst, RTAX_INITRWND));
|
||||||
|
#endif
|
||||||
|
|
||||||
ireq->rcv_wscale = rcv_wscale;
|
ireq->rcv_wscale = rcv_wscale;
|
||||||
ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
|
ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
|
||||||
|
@ -61,6 +61,10 @@
|
|||||||
#include <net/timewait_sock.h>
|
#include <net/timewait_sock.h>
|
||||||
#include <net/inet_common.h>
|
#include <net/inet_common.h>
|
||||||
#include <net/secure_seq.h>
|
#include <net/secure_seq.h>
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/mptcp_v6.h>
|
||||||
|
#endif
|
||||||
#include <net/busy_poll.h>
|
#include <net/busy_poll.h>
|
||||||
|
|
||||||
#include <linux/proc_fs.h>
|
#include <linux/proc_fs.h>
|
||||||
@ -71,6 +75,7 @@
|
|||||||
|
|
||||||
#include <trace/events/tcp.h>
|
#include <trace/events/tcp.h>
|
||||||
|
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
|
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
|
||||||
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
||||||
struct request_sock *req);
|
struct request_sock *req);
|
||||||
@ -79,6 +84,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
|
|||||||
|
|
||||||
static const struct inet_connection_sock_af_ops ipv6_mapped;
|
static const struct inet_connection_sock_af_ops ipv6_mapped;
|
||||||
static const struct inet_connection_sock_af_ops ipv6_specific;
|
static const struct inet_connection_sock_af_ops ipv6_specific;
|
||||||
|
#endif
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
|
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
|
||||||
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
|
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
|
||||||
@ -90,7 +96,10 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct dst_entry *dst = skb_dst(skb);
|
struct dst_entry *dst = skb_dst(skb);
|
||||||
|
|
||||||
@ -132,7 +141,10 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
|
|||||||
return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
|
return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||||
int addr_len)
|
int addr_len)
|
||||||
{
|
{
|
||||||
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
|
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
|
||||||
@ -229,7 +241,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
|||||||
sin.sin_port = usin->sin6_port;
|
sin.sin_port = usin->sin6_port;
|
||||||
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
|
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
|
||||||
|
|
||||||
icsk->icsk_af_ops = &ipv6_mapped;
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sock_flag(sk, SOCK_MPTCP))
|
||||||
|
icsk->icsk_af_ops = &mptcp_v6_mapped;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
icsk->icsk_af_ops = &ipv6_mapped;
|
||||||
sk->sk_backlog_rcv = tcp_v4_do_rcv;
|
sk->sk_backlog_rcv = tcp_v4_do_rcv;
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
|
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
|
||||||
@ -239,7 +256,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
|||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
icsk->icsk_ext_hdr_len = exthdrlen;
|
icsk->icsk_ext_hdr_len = exthdrlen;
|
||||||
icsk->icsk_af_ops = &ipv6_specific;
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sock_flag(sk, SOCK_MPTCP))
|
||||||
|
icsk->icsk_af_ops = &mptcp_v6_specific;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
icsk->icsk_af_ops = &ipv6_specific;
|
||||||
sk->sk_backlog_rcv = tcp_v6_do_rcv;
|
sk->sk_backlog_rcv = tcp_v6_do_rcv;
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
tp->af_specific = &tcp_sock_ipv6_specific;
|
tp->af_specific = &tcp_sock_ipv6_specific;
|
||||||
@ -333,7 +355,10 @@ failure:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_v6_mtu_reduced(struct sock *sk)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_v6_mtu_reduced(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct dst_entry *dst;
|
struct dst_entry *dst;
|
||||||
u32 mtu;
|
u32 mtu;
|
||||||
@ -370,6 +395,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
|||||||
struct tcp_sock *tp;
|
struct tcp_sock *tp;
|
||||||
__u32 seq, snd_una;
|
__u32 seq, snd_una;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct sock *meta_sk;
|
||||||
|
#endif
|
||||||
bool fatal;
|
bool fatal;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
@ -393,8 +421,19 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
|||||||
if (sk->sk_state == TCP_NEW_SYN_RECV)
|
if (sk->sk_state == TCP_NEW_SYN_RECV)
|
||||||
return tcp_req_err(sk, seq, fatal);
|
return tcp_req_err(sk, seq, fatal);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tp = tcp_sk(sk);
|
||||||
|
if (mptcp(tp))
|
||||||
|
meta_sk = mptcp_meta_sk(sk);
|
||||||
|
else
|
||||||
|
meta_sk = sk;
|
||||||
|
|
||||||
|
bh_lock_sock(meta_sk);
|
||||||
|
if (sock_owned_by_user(meta_sk) && type != ICMPV6_PKT_TOOBIG)
|
||||||
|
#else
|
||||||
bh_lock_sock(sk);
|
bh_lock_sock(sk);
|
||||||
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
|
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
|
||||||
|
#endif
|
||||||
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
|
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
|
||||||
|
|
||||||
if (sk->sk_state == TCP_CLOSE)
|
if (sk->sk_state == TCP_CLOSE)
|
||||||
@ -405,7 +444,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
tp = tcp_sk(sk);
|
tp = tcp_sk(sk);
|
||||||
|
#endif
|
||||||
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
|
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
|
||||||
fastopen = tp->fastopen_rsk;
|
fastopen = tp->fastopen_rsk;
|
||||||
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
|
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
|
||||||
@ -445,11 +486,27 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
|||||||
|
|
||||||
WRITE_ONCE(tp->mtu_info, mtu);
|
WRITE_ONCE(tp->mtu_info, mtu);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!sock_owned_by_user(meta_sk)) {
|
||||||
|
#else
|
||||||
if (!sock_owned_by_user(sk))
|
if (!sock_owned_by_user(sk))
|
||||||
|
#endif
|
||||||
tcp_v6_mtu_reduced(sk);
|
tcp_v6_mtu_reduced(sk);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
} else {
|
||||||
|
if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
|
||||||
|
&sk->sk_tsq_flags))
|
||||||
|
|
||||||
|
sock_hold(sk);
|
||||||
|
if (mptcp(tp))
|
||||||
|
mptcp_tsq_flags(sk);
|
||||||
|
}
|
||||||
|
#else
|
||||||
else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
|
else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
|
||||||
&sk->sk_tsq_flags))
|
&sk->sk_tsq_flags))
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
|
#endif
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -463,8 +520,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
|||||||
*/
|
*/
|
||||||
if (fastopen && !fastopen->sk)
|
if (fastopen && !fastopen->sk)
|
||||||
break;
|
break;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!sock_owned_by_user(meta_sk)) {
|
||||||
|
#else
|
||||||
if (!sock_owned_by_user(sk)) {
|
if (!sock_owned_by_user(sk)) {
|
||||||
|
#endif
|
||||||
sk->sk_err = err;
|
sk->sk_err = err;
|
||||||
sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
|
sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
|
||||||
|
|
||||||
@ -474,14 +534,22 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!sock_owned_by_user(meta_sk) && np->recverr) {
|
||||||
|
#else
|
||||||
if (!sock_owned_by_user(sk) && np->recverr) {
|
if (!sock_owned_by_user(sk) && np->recverr) {
|
||||||
|
#endif
|
||||||
sk->sk_err = err;
|
sk->sk_err = err;
|
||||||
sk->sk_error_report(sk);
|
sk->sk_error_report(sk);
|
||||||
} else
|
} else
|
||||||
sk->sk_err_soft = err;
|
sk->sk_err_soft = err;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
bh_unlock_sock(meta_sk);
|
||||||
|
#else
|
||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
#endif
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -529,7 +597,10 @@ done:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void tcp_v6_reqsk_destructor(struct request_sock *req)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_v6_reqsk_destructor(struct request_sock *req)
|
||||||
{
|
{
|
||||||
kfree(inet_rsk(req)->ipv6_opt);
|
kfree(inet_rsk(req)->ipv6_opt);
|
||||||
kfree_skb(inet_rsk(req)->pktopts);
|
kfree_skb(inet_rsk(req)->pktopts);
|
||||||
@ -747,9 +818,14 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
static int tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener,
|
||||||
|
struct sk_buff *skb, bool want_cookie)
|
||||||
|
#else
|
||||||
static void tcp_v6_init_req(struct request_sock *req,
|
static void tcp_v6_init_req(struct request_sock *req,
|
||||||
const struct sock *sk_listener,
|
const struct sock *sk_listener,
|
||||||
struct sk_buff *skb)
|
struct sk_buff *skb)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
struct inet_request_sock *ireq = inet_rsk(req);
|
struct inet_request_sock *ireq = inet_rsk(req);
|
||||||
const struct ipv6_pinfo *np = inet6_sk(sk_listener);
|
const struct ipv6_pinfo *np = inet6_sk(sk_listener);
|
||||||
@ -770,6 +846,9 @@ static void tcp_v6_init_req(struct request_sock *req,
|
|||||||
refcount_inc(&skb->users);
|
refcount_inc(&skb->users);
|
||||||
ireq->pktopts = skb;
|
ireq->pktopts = skb;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
|
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
|
||||||
@ -789,6 +868,9 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
|
|||||||
.syn_ack_timeout = tcp_syn_ack_timeout,
|
.syn_ack_timeout = tcp_syn_ack_timeout,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
|
const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
|
||||||
.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
|
.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
|
||||||
sizeof(struct ipv6hdr),
|
sizeof(struct ipv6hdr),
|
||||||
@ -806,10 +888,17 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
|
|||||||
.send_synack = tcp_v6_send_synack,
|
.send_synack = tcp_v6_send_synack,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
|
||||||
|
u32 ack, u32 data_ack, u32 win, u32 tsval, u32 tsecr,
|
||||||
|
int oif, struct tcp_md5sig_key *key, int rst,
|
||||||
|
u8 tclass, __be32 label, int mptcp)
|
||||||
|
#else
|
||||||
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
|
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
|
||||||
u32 ack, u32 win, u32 tsval, u32 tsecr,
|
u32 ack, u32 win, u32 tsval, u32 tsecr,
|
||||||
int oif, struct tcp_md5sig_key *key, int rst,
|
int oif, struct tcp_md5sig_key *key, int rst,
|
||||||
u8 tclass, __be32 label)
|
u8 tclass, __be32 label)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
const struct tcphdr *th = tcp_hdr(skb);
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
struct tcphdr *t1;
|
struct tcphdr *t1;
|
||||||
@ -828,7 +917,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
|
|||||||
if (key)
|
if (key)
|
||||||
tot_len += TCPOLEN_MD5SIG_ALIGNED;
|
tot_len += TCPOLEN_MD5SIG_ALIGNED;
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp)
|
||||||
|
tot_len += MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK;
|
||||||
|
#endif
|
||||||
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
|
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
|
||||||
GFP_ATOMIC);
|
GFP_ATOMIC);
|
||||||
if (!buff)
|
if (!buff)
|
||||||
@ -866,6 +958,19 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
|
|||||||
tcp_v6_md5_hash_hdr((__u8 *)topt, key,
|
tcp_v6_md5_hash_hdr((__u8 *)topt, key,
|
||||||
&ipv6_hdr(skb)->saddr,
|
&ipv6_hdr(skb)->saddr,
|
||||||
&ipv6_hdr(skb)->daddr, t1);
|
&ipv6_hdr(skb)->daddr, t1);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
topt += 4;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp) {
|
||||||
|
/* Construction of 32-bit data_ack */
|
||||||
|
*topt++ = htonl((TCPOPT_MPTCP << 24) |
|
||||||
|
((MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK) << 16) |
|
||||||
|
(0x20 << 8) |
|
||||||
|
(0x01));
|
||||||
|
*topt++ = htonl(data_ack);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -915,7 +1020,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
|
|||||||
kfree_skb(buff);
|
kfree_skb(buff);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
const struct tcphdr *th = tcp_hdr(skb);
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
u32 seq = 0, ack_seq = 0;
|
u32 seq = 0, ack_seq = 0;
|
||||||
@ -983,7 +1091,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
|
|||||||
trace_tcp_send_reset(sk, skb);
|
trace_tcp_send_reset(sk, skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, 0, oif, key, 1, 0, 0, 0);
|
||||||
|
#else
|
||||||
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
|
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
out:
|
out:
|
||||||
@ -991,6 +1103,16 @@ out:
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
|
||||||
|
u32 ack, u32 data_ack, u32 win, u32 tsval, u32 tsecr, int oif,
|
||||||
|
struct tcp_md5sig_key *key, u8 tclass,
|
||||||
|
__be32 label, int mptcp)
|
||||||
|
{
|
||||||
|
tcp_v6_send_response(sk, skb, seq, ack, data_ack, win, tsval, tsecr, oif,
|
||||||
|
key, 0, tclass, label, mptcp);
|
||||||
|
}
|
||||||
|
#else
|
||||||
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
|
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
|
||||||
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
|
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
|
||||||
struct tcp_md5sig_key *key, u8 tclass,
|
struct tcp_md5sig_key *key, u8 tclass,
|
||||||
@ -999,23 +1121,43 @@ static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
|
|||||||
tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
|
tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
|
||||||
tclass, label);
|
tclass, label);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct inet_timewait_sock *tw = inet_twsk(sk);
|
struct inet_timewait_sock *tw = inet_twsk(sk);
|
||||||
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
|
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
u32 data_ack = 0;
|
||||||
|
int mptcp = 0;
|
||||||
|
|
||||||
|
if (tcptw->mptcp_tw) {
|
||||||
|
data_ack = (u32)tcptw->mptcp_tw->rcv_nxt;
|
||||||
|
mptcp = 1;
|
||||||
|
}
|
||||||
|
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
|
||||||
|
data_ack,
|
||||||
|
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
|
||||||
|
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
|
||||||
|
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
|
||||||
|
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), mptcp);
|
||||||
|
#else
|
||||||
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
|
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
|
||||||
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
|
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
|
||||||
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
|
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
|
||||||
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
|
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
|
||||||
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
|
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
|
||||||
|
#endif
|
||||||
|
|
||||||
inet_twsk_put(tw);
|
inet_twsk_put(tw);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
#ifndef CONFIG_MPTCP
|
||||||
struct request_sock *req)
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
||||||
|
struct request_sock *req)
|
||||||
{
|
{
|
||||||
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
|
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
|
||||||
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
|
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
|
||||||
@ -1025,6 +1167,17 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
|||||||
* exception of <SYN> segments, MUST be right-shifted by
|
* exception of <SYN> segments, MUST be right-shifted by
|
||||||
* Rcv.Wind.Shift bits:
|
* Rcv.Wind.Shift bits:
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN || is_meta_sk(sk)) ?
|
||||||
|
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
|
||||||
|
tcp_rsk(req)->rcv_nxt, 0,
|
||||||
|
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
|
||||||
|
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
|
||||||
|
req->ts_recent, sk->sk_bound_dev_if,
|
||||||
|
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
|
||||||
|
0, 0, 0);
|
||||||
|
|
||||||
|
#else
|
||||||
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
|
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
|
||||||
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
|
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
|
||||||
tcp_rsk(req)->rcv_nxt,
|
tcp_rsk(req)->rcv_nxt,
|
||||||
@ -1033,10 +1186,14 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
|||||||
req->ts_recent, sk->sk_bound_dev_if,
|
req->ts_recent, sk->sk_bound_dev_if,
|
||||||
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
|
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
|
||||||
0, 0);
|
0, 0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_SYN_COOKIES
|
#ifdef CONFIG_SYN_COOKIES
|
||||||
const struct tcphdr *th = tcp_hdr(skb);
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
@ -1047,7 +1204,10 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
|
|||||||
return sk;
|
return sk;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
if (skb->protocol == htons(ETH_P_IP))
|
if (skb->protocol == htons(ETH_P_IP))
|
||||||
return tcp_v4_conn_request(sk, skb);
|
return tcp_v4_conn_request(sk, skb);
|
||||||
@ -1078,11 +1238,14 @@ static void tcp_v6_restore_cb(struct sk_buff *skb)
|
|||||||
sizeof(struct inet6_skb_parm));
|
sizeof(struct inet6_skb_parm));
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
|
#ifndef CONFIG_MPTCP
|
||||||
struct request_sock *req,
|
static
|
||||||
struct dst_entry *dst,
|
#endif
|
||||||
struct request_sock *req_unhash,
|
struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
|
||||||
bool *own_req)
|
struct request_sock *req,
|
||||||
|
struct dst_entry *dst,
|
||||||
|
struct request_sock *req_unhash,
|
||||||
|
bool *own_req)
|
||||||
{
|
{
|
||||||
struct inet_request_sock *ireq;
|
struct inet_request_sock *ireq;
|
||||||
struct ipv6_pinfo *newnp;
|
struct ipv6_pinfo *newnp;
|
||||||
@ -1120,7 +1283,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
|
|||||||
|
|
||||||
newnp->saddr = newsk->sk_v6_rcv_saddr;
|
newnp->saddr = newsk->sk_v6_rcv_saddr;
|
||||||
|
|
||||||
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* We must check on the request-socket because the listener
|
||||||
|
* socket's flag may have been changed halfway through.
|
||||||
|
*/
|
||||||
|
if (!inet_rsk(req)->saw_mpc)
|
||||||
|
inet_csk(newsk)->icsk_af_ops = &mptcp_v6_mapped;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
|
||||||
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
|
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
|
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
|
||||||
@ -1167,6 +1338,14 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
|
|||||||
if (!newsk)
|
if (!newsk)
|
||||||
goto out_nonewsk;
|
goto out_nonewsk;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
/* If the meta_sk is v6-mapped we can end up here with the wrong af_ops.
|
||||||
|
* Just make sure that this subflow is v6.
|
||||||
|
*/
|
||||||
|
if (is_meta_sk(sk))
|
||||||
|
inet_csk(newsk)->icsk_af_ops = &mptcp_v6_specific;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No need to charge this sock to the relevant IPv6 refcnt debug socks
|
* No need to charge this sock to the relevant IPv6 refcnt debug socks
|
||||||
* count here, tcp_create_openreq_child now does this for us, see the
|
* count here, tcp_create_openreq_child now does this for us, see the
|
||||||
@ -1305,7 +1484,10 @@ out:
|
|||||||
* This is because we cannot sleep with the original spinlock
|
* This is because we cannot sleep with the original spinlock
|
||||||
* held.
|
* held.
|
||||||
*/
|
*/
|
||||||
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct ipv6_pinfo *np = inet6_sk(sk);
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
||||||
struct tcp_sock *tp;
|
struct tcp_sock *tp;
|
||||||
@ -1321,6 +1503,11 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
|
|||||||
|
|
||||||
if (skb->protocol == htons(ETH_P_IP))
|
if (skb->protocol == htons(ETH_P_IP))
|
||||||
return tcp_v4_do_rcv(sk, skb);
|
return tcp_v4_do_rcv(sk, skb);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (is_meta_sk(sk))
|
||||||
|
return mptcp_v6_do_rcv(sk, skb);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* socket locking is here for SMP purposes as backlog rcv
|
* socket locking is here for SMP purposes as backlog rcv
|
||||||
@ -1452,6 +1639,10 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
|
|||||||
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
|
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
|
||||||
skb->len - th->doff*4);
|
skb->len - th->doff*4);
|
||||||
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
|
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
TCP_SKB_CB(skb)->mptcp_flags = 0;
|
||||||
|
TCP_SKB_CB(skb)->dss_off = 0;
|
||||||
|
#endif
|
||||||
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
|
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
|
||||||
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
|
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
|
||||||
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
|
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
|
||||||
@ -1467,6 +1658,9 @@ static int tcp_v6_rcv(struct sk_buff *skb)
|
|||||||
const struct ipv6hdr *hdr;
|
const struct ipv6hdr *hdr;
|
||||||
bool refcounted;
|
bool refcounted;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
struct sock *meta_sk = NULL;
|
||||||
|
#endif
|
||||||
int ret;
|
int ret;
|
||||||
struct net *net = dev_net(skb->dev);
|
struct net *net = dev_net(skb->dev);
|
||||||
|
|
||||||
@ -1520,10 +1714,20 @@ process:
|
|||||||
reqsk_put(req);
|
reqsk_put(req);
|
||||||
goto csum_error;
|
goto csum_error;
|
||||||
}
|
}
|
||||||
if (unlikely(sk->sk_state != TCP_LISTEN)) {
|
if (unlikely(sk->sk_state != TCP_LISTEN
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
&& !is_meta_sk(sk)
|
||||||
|
#endif
|
||||||
|
)) {
|
||||||
inet_csk_reqsk_queue_drop_and_put(sk, req);
|
inet_csk_reqsk_queue_drop_and_put(sk, req);
|
||||||
goto lookup;
|
goto lookup;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (unlikely(is_meta_sk(sk) && !mptcp_can_new_subflow(sk))) {
|
||||||
|
inet_csk_reqsk_queue_drop_and_put(sk, req);
|
||||||
|
goto lookup;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
refcounted = true;
|
refcounted = true;
|
||||||
nsk = NULL;
|
nsk = NULL;
|
||||||
@ -1583,16 +1787,42 @@ process:
|
|||||||
}
|
}
|
||||||
|
|
||||||
sk_incoming_cpu_update(sk);
|
sk_incoming_cpu_update(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (mptcp(tcp_sk(sk))) {
|
||||||
|
meta_sk = mptcp_meta_sk(sk);
|
||||||
|
|
||||||
|
bh_lock_sock_nested(meta_sk);
|
||||||
|
if (sock_owned_by_user(meta_sk))
|
||||||
|
mptcp_prepare_for_backlog(sk, skb);
|
||||||
|
} else {
|
||||||
|
meta_sk = sk;
|
||||||
|
#endif
|
||||||
|
bh_lock_sock_nested(sk);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
bh_lock_sock_nested(sk);
|
|
||||||
tcp_segs_in(tcp_sk(sk), skb);
|
tcp_segs_in(tcp_sk(sk), skb);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!sock_owned_by_user(meta_sk)) {
|
||||||
|
#else
|
||||||
if (!sock_owned_by_user(sk)) {
|
if (!sock_owned_by_user(sk)) {
|
||||||
|
#endif
|
||||||
ret = tcp_v6_do_rcv(sk, skb);
|
ret = tcp_v6_do_rcv(sk, skb);
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
} else if (tcp_add_backlog(meta_sk, skb)) {
|
||||||
|
#else
|
||||||
} else if (tcp_add_backlog(sk, skb)) {
|
} else if (tcp_add_backlog(sk, skb)) {
|
||||||
|
#endif
|
||||||
goto discard_and_relse;
|
goto discard_and_relse;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
bh_unlock_sock(meta_sk);
|
||||||
|
#else
|
||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
put_and_return:
|
put_and_return:
|
||||||
if (refcounted)
|
if (refcounted)
|
||||||
@ -1605,6 +1835,19 @@ no_tcp_socket:
|
|||||||
|
|
||||||
tcp_v6_fill_cb(skb, hdr, th);
|
tcp_v6_fill_cb(skb, hdr, th);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (!sk && th->syn && !th->ack) {
|
||||||
|
int ret = mptcp_lookup_join(skb, NULL);
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
tcp_v6_send_reset(NULL, skb);
|
||||||
|
goto discard_it;
|
||||||
|
} else if (ret > 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (tcp_checksum_complete(skb)) {
|
if (tcp_checksum_complete(skb)) {
|
||||||
csum_error:
|
csum_error:
|
||||||
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
|
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
|
||||||
@ -1657,6 +1900,18 @@ do_time_wait:
|
|||||||
refcounted = false;
|
refcounted = false;
|
||||||
goto process;
|
goto process;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (th->syn && !th->ack) {
|
||||||
|
int ret = mptcp_lookup_join(skb, inet_twsk(sk));
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
tcp_v6_send_reset(NULL, skb);
|
||||||
|
goto discard_it;
|
||||||
|
} else if (ret > 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
/* to ACK */
|
/* to ACK */
|
||||||
/* fall through */
|
/* fall through */
|
||||||
@ -1711,13 +1966,19 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
struct timewait_sock_ops tcp6_timewait_sock_ops = {
|
||||||
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
|
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
|
||||||
.twsk_unique = tcp_twsk_unique,
|
.twsk_unique = tcp_twsk_unique,
|
||||||
.twsk_destructor = tcp_twsk_destructor,
|
.twsk_destructor = tcp_twsk_destructor,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct inet_connection_sock_af_ops ipv6_specific = {
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
const struct inet_connection_sock_af_ops ipv6_specific = {
|
||||||
.queue_xmit = inet6_csk_xmit,
|
.queue_xmit = inet6_csk_xmit,
|
||||||
.send_check = tcp_v6_send_check,
|
.send_check = tcp_v6_send_check,
|
||||||
.rebuild_header = inet6_sk_rebuild_header,
|
.rebuild_header = inet6_sk_rebuild_header,
|
||||||
@ -1748,7 +2009,10 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
|
|||||||
/*
|
/*
|
||||||
* TCP over IPv4 via INET6 API
|
* TCP over IPv4 via INET6 API
|
||||||
*/
|
*/
|
||||||
static const struct inet_connection_sock_af_ops ipv6_mapped = {
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
const struct inet_connection_sock_af_ops ipv6_mapped = {
|
||||||
.queue_xmit = ip_queue_xmit,
|
.queue_xmit = ip_queue_xmit,
|
||||||
.send_check = tcp_v4_send_check,
|
.send_check = tcp_v4_send_check,
|
||||||
.rebuild_header = inet_sk_rebuild_header,
|
.rebuild_header = inet_sk_rebuild_header,
|
||||||
@ -1784,7 +2048,12 @@ static int tcp_v6_init_sock(struct sock *sk)
|
|||||||
|
|
||||||
tcp_init_sock(sk);
|
tcp_init_sock(sk);
|
||||||
|
|
||||||
icsk->icsk_af_ops = &ipv6_specific;
|
#ifdef CONFIG_MPTCP
|
||||||
|
if (sock_flag(sk, SOCK_MPTCP))
|
||||||
|
icsk->icsk_af_ops = &mptcp_v6_specific;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
icsk->icsk_af_ops = &ipv6_specific;
|
||||||
|
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
|
tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
|
||||||
@ -1793,7 +2062,10 @@ static int tcp_v6_init_sock(struct sock *sk)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_v6_destroy_sock(struct sock *sk)
|
#ifndef CONFIG_MPTCP
|
||||||
|
static
|
||||||
|
#endif
|
||||||
|
void tcp_v6_destroy_sock(struct sock *sk)
|
||||||
{
|
{
|
||||||
tcp_v4_destroy_sock(sk);
|
tcp_v4_destroy_sock(sk);
|
||||||
inet6_destroy_sock(sk);
|
inet6_destroy_sock(sk);
|
||||||
@ -2020,6 +2292,11 @@ struct proto tcpv6_prot = {
|
|||||||
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
|
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
|
||||||
.max_header = MAX_TCP_HEADER,
|
.max_header = MAX_TCP_HEADER,
|
||||||
.obj_size = sizeof(struct tcp6_sock),
|
.obj_size = sizeof(struct tcp6_sock),
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
.useroffset = offsetof(struct tcp_sock, mptcp_sched_name),
|
||||||
|
.usersize = sizeof_field(struct tcp_sock, mptcp_sched_name) +
|
||||||
|
sizeof_field(struct tcp_sock, mptcp_pm_name),
|
||||||
|
#endif
|
||||||
.slab_flags = SLAB_TYPESAFE_BY_RCU,
|
.slab_flags = SLAB_TYPESAFE_BY_RCU,
|
||||||
.twsk_prot = &tcp6_timewait_sock_ops,
|
.twsk_prot = &tcp6_timewait_sock_ops,
|
||||||
.rsk_prot = &tcp6_request_sock_ops,
|
.rsk_prot = &tcp6_request_sock_ops,
|
||||||
@ -2030,6 +2307,9 @@ struct proto tcpv6_prot = {
|
|||||||
.compat_getsockopt = compat_tcp_getsockopt,
|
.compat_getsockopt = compat_tcp_getsockopt,
|
||||||
#endif
|
#endif
|
||||||
.diag_destroy = tcp_abort,
|
.diag_destroy = tcp_abort,
|
||||||
|
#ifdef CONFIG_MPTCP
|
||||||
|
.clear_sk = mptcp_clear_sk,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
/* thinking of making this const? Don't.
|
/* thinking of making this const? Don't.
|
||||||
|
146
net/mptcp/Kconfig
Executable file
146
net/mptcp/Kconfig
Executable file
@ -0,0 +1,146 @@
|
|||||||
|
#
|
||||||
|
# MPTCP configuration
|
||||||
|
#
|
||||||
|
config MPTCP
|
||||||
|
bool "MPTCP protocol"
|
||||||
|
depends on (IPV6=y || IPV6=n)
|
||||||
|
---help---
|
||||||
|
This replaces the normal TCP stack with a Multipath TCP stack,
|
||||||
|
able to use several paths at once.
|
||||||
|
|
||||||
|
menuconfig MPTCP_PM_ADVANCED
|
||||||
|
bool "MPTCP: advanced path-manager control"
|
||||||
|
depends on MPTCP=y
|
||||||
|
---help---
|
||||||
|
Support for selection of different path-managers. You should choose 'Y' here,
|
||||||
|
because otherwise you will not actively create new MPTCP-subflows.
|
||||||
|
|
||||||
|
if MPTCP_PM_ADVANCED
|
||||||
|
|
||||||
|
config MPTCP_FULLMESH
|
||||||
|
tristate "MPTCP Full-Mesh Path-Manager"
|
||||||
|
depends on MPTCP=y
|
||||||
|
---help---
|
||||||
|
This path-management module will create a full-mesh among all IP-addresses.
|
||||||
|
|
||||||
|
config MPTCP_NDIFFPORTS
|
||||||
|
tristate "MPTCP ndiff-ports"
|
||||||
|
depends on MPTCP=y
|
||||||
|
---help---
|
||||||
|
This path-management module will create multiple subflows between the same
|
||||||
|
pair of IP-addresses, modifying the source-port. You can set the number
|
||||||
|
of subflows via the mptcp_ndiffports-sysctl.
|
||||||
|
|
||||||
|
config MPTCP_BINDER
|
||||||
|
tristate "MPTCP Binder"
|
||||||
|
depends on (MPTCP=y)
|
||||||
|
---help---
|
||||||
|
This path-management module works like ndiffports, and adds the sysctl
|
||||||
|
option to set the gateway (and/or path to) per each additional subflow
|
||||||
|
via Loose Source Routing (IPv4 only).
|
||||||
|
|
||||||
|
config MPTCP_NETLINK
|
||||||
|
tristate "MPTCP Netlink Path-Manager"
|
||||||
|
depends on MPTCP=y
|
||||||
|
---help---
|
||||||
|
This path-management module is controlled over a Netlink interface. A userspace
|
||||||
|
module can therefore control the establishment of new subflows and the policy
|
||||||
|
to apply over those new subflows for every connection.
|
||||||
|
|
||||||
|
choice
|
||||||
|
prompt "Default MPTCP Path-Manager"
|
||||||
|
default DEFAULT_FULLMESH
|
||||||
|
help
|
||||||
|
Select the Path-Manager of your choice
|
||||||
|
|
||||||
|
config DEFAULT_FULLMESH
|
||||||
|
bool "Full mesh" if MPTCP_FULLMESH=y
|
||||||
|
|
||||||
|
config DEFAULT_NDIFFPORTS
|
||||||
|
bool "ndiff-ports" if MPTCP_NDIFFPORTS=y
|
||||||
|
|
||||||
|
config DEFAULT_BINDER
|
||||||
|
bool "binder" if MPTCP_BINDER=y
|
||||||
|
|
||||||
|
config DEFAULT_NETLINK
|
||||||
|
bool "Netlink" if MPTCP_NETLINK=y
|
||||||
|
|
||||||
|
config DEFAULT_DUMMY
|
||||||
|
bool "Default"
|
||||||
|
|
||||||
|
endchoice
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
config DEFAULT_MPTCP_PM
|
||||||
|
string
|
||||||
|
default "default" if DEFAULT_DUMMY
|
||||||
|
default "fullmesh" if DEFAULT_FULLMESH
|
||||||
|
default "ndiffports" if DEFAULT_NDIFFPORTS
|
||||||
|
default "binder" if DEFAULT_BINDER
|
||||||
|
default "default"
|
||||||
|
|
||||||
|
menuconfig MPTCP_SCHED_ADVANCED
|
||||||
|
bool "MPTCP: advanced scheduler control"
|
||||||
|
depends on MPTCP=y
|
||||||
|
---help---
|
||||||
|
Support for selection of different schedulers. You should choose 'Y' here,
|
||||||
|
if you want to choose a different scheduler than the default one.
|
||||||
|
|
||||||
|
if MPTCP_SCHED_ADVANCED
|
||||||
|
|
||||||
|
config MPTCP_BLEST
|
||||||
|
tristate "MPTCP BLEST"
|
||||||
|
depends on MPTCP=y
|
||||||
|
---help---
|
||||||
|
This is an experimental BLocking ESTimation-based (BLEST) scheduler.
|
||||||
|
|
||||||
|
config MPTCP_ROUNDROBIN
|
||||||
|
tristate "MPTCP Round-Robin"
|
||||||
|
depends on (MPTCP=y)
|
||||||
|
---help---
|
||||||
|
This is a very simple round-robin scheduler. Probably has bad performance
|
||||||
|
but might be interesting for researchers.
|
||||||
|
|
||||||
|
config MPTCP_REDUNDANT
|
||||||
|
tristate "MPTCP Redundant"
|
||||||
|
depends on (MPTCP=y)
|
||||||
|
---help---
|
||||||
|
This scheduler sends all packets redundantly over all subflows to decreases
|
||||||
|
latency and jitter on the cost of lower throughput.
|
||||||
|
|
||||||
|
choice
|
||||||
|
prompt "Default MPTCP Scheduler"
|
||||||
|
default DEFAULT_SCHEDULER
|
||||||
|
help
|
||||||
|
Select the Scheduler of your choice
|
||||||
|
|
||||||
|
config DEFAULT_SCHEDULER
|
||||||
|
bool "Default"
|
||||||
|
---help---
|
||||||
|
This is the default scheduler, sending first on the subflow
|
||||||
|
with the lowest RTT.
|
||||||
|
|
||||||
|
config DEFAULT_ROUNDROBIN
|
||||||
|
bool "Round-Robin" if MPTCP_ROUNDROBIN=y
|
||||||
|
---help---
|
||||||
|
This is the round-rob scheduler, sending in a round-robin
|
||||||
|
fashion..
|
||||||
|
|
||||||
|
config DEFAULT_REDUNDANT
|
||||||
|
bool "Redundant" if MPTCP_REDUNDANT=y
|
||||||
|
---help---
|
||||||
|
This is the redundant scheduler, sending packets redundantly over
|
||||||
|
all the subflows.
|
||||||
|
|
||||||
|
endchoice
|
||||||
|
endif
|
||||||
|
|
||||||
|
config DEFAULT_MPTCP_SCHED
|
||||||
|
string
|
||||||
|
depends on (MPTCP=y)
|
||||||
|
default "default" if DEFAULT_SCHEDULER
|
||||||
|
default "roundrobin" if DEFAULT_ROUNDROBIN
|
||||||
|
default "redundant" if DEFAULT_REDUNDANT
|
||||||
|
default "default"
|
||||||
|
|
24
net/mptcp/Makefile
Executable file
24
net/mptcp/Makefile
Executable file
@ -0,0 +1,24 @@
|
|||||||
|
#
|
||||||
|
## Makefile for MultiPath TCP support code.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
obj-$(CONFIG_MPTCP) += mptcp.o
|
||||||
|
|
||||||
|
mptcp-y := mptcp_ctrl.o mptcp_ipv4.o mptcp_pm.o \
|
||||||
|
mptcp_output.o mptcp_input.o mptcp_sched.o
|
||||||
|
|
||||||
|
obj-$(CONFIG_TCP_CONG_LIA) += mptcp_coupled.o
|
||||||
|
obj-$(CONFIG_TCP_CONG_OLIA) += mptcp_olia.o
|
||||||
|
obj-$(CONFIG_TCP_CONG_WVEGAS) += mptcp_wvegas.o
|
||||||
|
obj-$(CONFIG_TCP_CONG_BALIA) += mptcp_balia.o
|
||||||
|
obj-$(CONFIG_TCP_CONG_MCTCPDESYNC) += mctcp_desync.o
|
||||||
|
obj-$(CONFIG_MPTCP_FULLMESH) += mptcp_fullmesh.o
|
||||||
|
obj-$(CONFIG_MPTCP_NDIFFPORTS) += mptcp_ndiffports.o
|
||||||
|
obj-$(CONFIG_MPTCP_BINDER) += mptcp_binder.o
|
||||||
|
obj-$(CONFIG_MPTCP_NETLINK) += mptcp_netlink.o
|
||||||
|
obj-$(CONFIG_MPTCP_ROUNDROBIN) += mptcp_rr.o
|
||||||
|
obj-$(CONFIG_MPTCP_REDUNDANT) += mptcp_redundant.o
|
||||||
|
obj-$(CONFIG_MPTCP_BLEST) += mptcp_blest.o
|
||||||
|
|
||||||
|
mptcp-$(subst m,y,$(CONFIG_IPV6)) += mptcp_ipv6.o
|
193
net/mptcp/mctcp_desync.c
Executable file
193
net/mptcp/mctcp_desync.c
Executable file
@ -0,0 +1,193 @@
|
|||||||
|
/*
|
||||||
|
* Desynchronized Multi-Channel TCP Congestion Control Algorithm
|
||||||
|
*
|
||||||
|
* Implementation based on publications of "DMCTCP:Desynchronized Multi-Channel
|
||||||
|
* TCP for high speed access networks with tiny buffers" in 23rd international
|
||||||
|
* conference of Computer Communication and Networks (ICCCN), 2014, and
|
||||||
|
* "Exploring parallelism and desynchronization of TCP over high speed networks
|
||||||
|
* with tiny buffers" in Journal of Computer Communications Elsevier, 2015.
|
||||||
|
*
|
||||||
|
* http://ieeexplore.ieee.org/abstract/document/6911722/
|
||||||
|
* https://doi.org/10.1016/j.comcom.2015.07.010
|
||||||
|
*
|
||||||
|
* This prototype is for research purpose and is currently experimental code
|
||||||
|
* that only support a single path. Future support of multi-channel over
|
||||||
|
* multi-path requires channels grouping.
|
||||||
|
*
|
||||||
|
* Initial Design and Implementation:
|
||||||
|
* Cheng Cui <Cheng.Cui@netapp.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License as published by the Free
|
||||||
|
* Software Foundation; either version 2 of the License, or (at your option)
|
||||||
|
* any later version.
|
||||||
|
*/
|
||||||
|
#include <net/tcp.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
|
||||||
|
enum {
|
||||||
|
MASTER_CHANNEL = 1,
|
||||||
|
INI_MIN_CWND = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* private congestion control structure:
|
||||||
|
* off_tstamp: the last backoff timestamp for loss synchronization event
|
||||||
|
* off_subfid: the subflow which was backoff on off_tstamp
|
||||||
|
*/
|
||||||
|
struct mctcp_desync {
|
||||||
|
u64 off_tstamp;
|
||||||
|
u8 off_subfid;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline int mctcp_cc_sk_can_send(const struct sock *sk)
|
||||||
|
{
|
||||||
|
return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mctcp_desync_init(struct sock *sk)
|
||||||
|
{
|
||||||
|
if (mptcp(tcp_sk(sk))) {
|
||||||
|
struct mctcp_desync *ca = inet_csk_ca(mptcp_meta_sk(sk));
|
||||||
|
ca->off_tstamp = 0;
|
||||||
|
ca->off_subfid = 0;
|
||||||
|
}
|
||||||
|
/* If we do not mptcp, behave like reno: return */
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mctcp_desync_cong_avoid(struct sock *sk, u32 ack, u32 acked)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
|
if (!mptcp(tp)) {
|
||||||
|
tcp_reno_cong_avoid(sk, ack, acked);
|
||||||
|
return;
|
||||||
|
} else if (!tcp_is_cwnd_limited(sk)) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
const struct mctcp_desync *ca = inet_csk_ca(mptcp_meta_sk(sk));
|
||||||
|
const u8 subfid = tp->mptcp->path_index;
|
||||||
|
|
||||||
|
/* current aggregated cwnd */
|
||||||
|
u32 agg_cwnd = 0;
|
||||||
|
u32 min_cwnd = 0xffffffff;
|
||||||
|
u8 min_cwnd_subfid = 0;
|
||||||
|
|
||||||
|
/* In "safe" area, increase */
|
||||||
|
if (tcp_in_slow_start(tp)) {
|
||||||
|
if (ca->off_subfid) {
|
||||||
|
/* passed initial phase, allow slow start */
|
||||||
|
tcp_slow_start(tp, acked);
|
||||||
|
} else if (MASTER_CHANNEL == tp->mptcp->path_index) {
|
||||||
|
/* master channel is normal slow start in
|
||||||
|
* initial phase */
|
||||||
|
tcp_slow_start(tp, acked);
|
||||||
|
} else {
|
||||||
|
/* secondary channels increase slowly until
|
||||||
|
* the initial phase passed
|
||||||
|
*/
|
||||||
|
tp->snd_ssthresh = tp->snd_cwnd = INI_MIN_CWND;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
/* In dangerous area, increase slowly and linearly. */
|
||||||
|
const struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
/* get total cwnd and the subflow that has min cwnd */
|
||||||
|
mptcp_for_each_sub(tp->mpcb, mptcp) {
|
||||||
|
const struct sock *sub_sk = mptcp_to_sock(mptcp);
|
||||||
|
|
||||||
|
if (mctcp_cc_sk_can_send(sub_sk)) {
|
||||||
|
const struct tcp_sock *sub_tp =
|
||||||
|
tcp_sk(sub_sk);
|
||||||
|
agg_cwnd += sub_tp->snd_cwnd;
|
||||||
|
if(min_cwnd > sub_tp->snd_cwnd) {
|
||||||
|
min_cwnd = sub_tp->snd_cwnd;
|
||||||
|
min_cwnd_subfid =
|
||||||
|
sub_tp->mptcp->path_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* the smallest subflow grows faster than others */
|
||||||
|
if (subfid == min_cwnd_subfid) {
|
||||||
|
tcp_cong_avoid_ai(tp, min_cwnd, acked);
|
||||||
|
} else {
|
||||||
|
tcp_cong_avoid_ai(tp, agg_cwnd - min_cwnd,
|
||||||
|
acked);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 mctcp_desync_ssthresh(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
|
if (!mptcp(tp)) {
|
||||||
|
return max(tp->snd_cwnd >> 1U, 2U);
|
||||||
|
} else {
|
||||||
|
struct mctcp_desync *ca = inet_csk_ca(mptcp_meta_sk(sk));
|
||||||
|
const u8 subfid = tp->mptcp->path_index;
|
||||||
|
const struct mptcp_tcp_sock *mptcp;
|
||||||
|
u32 max_cwnd = 0;
|
||||||
|
u8 max_cwnd_subfid = 0;
|
||||||
|
|
||||||
|
/* Find the subflow that has the max cwnd. */
|
||||||
|
mptcp_for_each_sub(tp->mpcb, mptcp) {
|
||||||
|
const struct sock *sub_sk = mptcp_to_sock(mptcp);
|
||||||
|
|
||||||
|
if (mctcp_cc_sk_can_send(sub_sk)) {
|
||||||
|
const struct tcp_sock *sub_tp = tcp_sk(sub_sk);
|
||||||
|
if (max_cwnd < sub_tp->snd_cwnd) {
|
||||||
|
max_cwnd = sub_tp->snd_cwnd;
|
||||||
|
max_cwnd_subfid =
|
||||||
|
sub_tp->mptcp->path_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Use high resolution clock. */
|
||||||
|
if (subfid == max_cwnd_subfid) {
|
||||||
|
u64 now = tcp_clock_us();
|
||||||
|
u32 delta = tcp_stamp_us_delta(now, ca->off_tstamp);
|
||||||
|
|
||||||
|
if (delta < (tp->srtt_us >> 3)) {
|
||||||
|
/* desynchronize */
|
||||||
|
return tp->snd_cwnd;
|
||||||
|
} else {
|
||||||
|
ca->off_tstamp = now;
|
||||||
|
ca->off_subfid = subfid;
|
||||||
|
return max(max_cwnd >> 1U, 2U);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return tp->snd_cwnd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct tcp_congestion_ops mctcp_desync = {
|
||||||
|
.init = mctcp_desync_init,
|
||||||
|
.ssthresh = mctcp_desync_ssthresh,
|
||||||
|
.undo_cwnd = tcp_reno_undo_cwnd,
|
||||||
|
.cong_avoid = mctcp_desync_cong_avoid,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.name = "mctcpdesync",
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init mctcp_desync_register(void)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(struct mctcp_desync) > ICSK_CA_PRIV_SIZE);
|
||||||
|
return tcp_register_congestion_control(&mctcp_desync);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit mctcp_desync_unregister(void)
|
||||||
|
{
|
||||||
|
tcp_unregister_congestion_control(&mctcp_desync);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(mctcp_desync_register);
|
||||||
|
module_exit(mctcp_desync_unregister);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Cheng Cui");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("MCTCP: DESYNCHRONIZED MULTICHANNEL TCP CONGESTION CONTROL");
|
||||||
|
MODULE_VERSION("1.0");
|
261
net/mptcp/mptcp_balia.c
Executable file
261
net/mptcp/mptcp_balia.c
Executable file
@ -0,0 +1,261 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP implementation - Balia Congestion Control
|
||||||
|
* (Balanced Linked Adaptation Algorithm)
|
||||||
|
*
|
||||||
|
* Analysis, Design and Implementation:
|
||||||
|
* Qiuyu Peng <qpeng@caltech.edu>
|
||||||
|
* Anwar Walid <anwar@research.bell-labs.com>
|
||||||
|
* Jaehyun Hwang <jhyun.hwang@samsung.com>
|
||||||
|
* Steven H. Low <slow@caltech.edu>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <net/tcp.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
|
||||||
|
/* The variable 'rate' (i.e., x_r) will be scaled
|
||||||
|
* e.g., from B/s to KB/s, MB/s, or GB/s
|
||||||
|
* if max_rate > 2^rate_scale_limit
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int rate_scale_limit = 25;
|
||||||
|
static int alpha_scale = 10;
|
||||||
|
static int scale_num = 5;
|
||||||
|
|
||||||
|
struct mptcp_balia {
|
||||||
|
u64 ai;
|
||||||
|
u64 md;
|
||||||
|
bool forced_update;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline int mptcp_balia_sk_can_send(const struct sock *sk)
|
||||||
|
{
|
||||||
|
return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 mptcp_get_ai(const struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
return ((struct mptcp_balia *)inet_csk_ca(meta_sk))->ai;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mptcp_set_ai(const struct sock *meta_sk, u64 ai)
|
||||||
|
{
|
||||||
|
((struct mptcp_balia *)inet_csk_ca(meta_sk))->ai = ai;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 mptcp_get_md(const struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
return ((struct mptcp_balia *)inet_csk_ca(meta_sk))->md;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mptcp_set_md(const struct sock *meta_sk, u64 md)
|
||||||
|
{
|
||||||
|
((struct mptcp_balia *)inet_csk_ca(meta_sk))->md = md;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 mptcp_balia_scale(u64 val, int scale)
|
||||||
|
{
|
||||||
|
return (u64) val << scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool mptcp_get_forced(const struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
return ((struct mptcp_balia *)inet_csk_ca(meta_sk))->forced_update;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mptcp_set_forced(const struct sock *meta_sk, bool force)
|
||||||
|
{
|
||||||
|
((struct mptcp_balia *)inet_csk_ca(meta_sk))->forced_update = force;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_balia_recalc_ai(const struct sock *sk)
|
||||||
|
{
|
||||||
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
const struct mptcp_cb *mpcb = tp->mpcb;
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
u64 max_rate = 0, rate = 0, sum_rate = 0;
|
||||||
|
u64 alpha, ai = tp->snd_cwnd, md = (tp->snd_cwnd >> 1);
|
||||||
|
int num_scale_down = 0;
|
||||||
|
|
||||||
|
if (!mpcb)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Find max_rate first */
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
const struct sock *sub_sk = mptcp_to_sock(mptcp);
|
||||||
|
struct tcp_sock *sub_tp = tcp_sk(sub_sk);
|
||||||
|
u64 tmp;
|
||||||
|
|
||||||
|
if (!mptcp_balia_sk_can_send(sub_sk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
tmp = div_u64((u64)tp->mss_cache * sub_tp->snd_cwnd
|
||||||
|
* (USEC_PER_SEC << 3), sub_tp->srtt_us);
|
||||||
|
sum_rate += tmp;
|
||||||
|
|
||||||
|
if (tp == sub_tp)
|
||||||
|
rate = tmp;
|
||||||
|
|
||||||
|
if (tmp >= max_rate)
|
||||||
|
max_rate = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* At least, the current subflow should be able to send */
|
||||||
|
if (unlikely(!rate))
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
alpha = div64_u64(max_rate, rate);
|
||||||
|
|
||||||
|
/* Scale down max_rate if it is too high (e.g., >2^25) */
|
||||||
|
while (max_rate > mptcp_balia_scale(1, rate_scale_limit)) {
|
||||||
|
max_rate >>= scale_num;
|
||||||
|
num_scale_down++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num_scale_down) {
|
||||||
|
sum_rate = 0;
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
const struct sock *sub_sk = mptcp_to_sock(mptcp);
|
||||||
|
struct tcp_sock *sub_tp = tcp_sk(sub_sk);
|
||||||
|
u64 tmp;
|
||||||
|
|
||||||
|
if (!mptcp_balia_sk_can_send(sub_sk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
tmp = div_u64((u64)tp->mss_cache * sub_tp->snd_cwnd
|
||||||
|
* (USEC_PER_SEC << 3), sub_tp->srtt_us);
|
||||||
|
tmp >>= (scale_num * num_scale_down);
|
||||||
|
|
||||||
|
sum_rate += tmp;
|
||||||
|
}
|
||||||
|
rate >>= (scale_num * num_scale_down);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* (sum_rate)^2 * 10 * w_r
|
||||||
|
* ai = ------------------------------------
|
||||||
|
* (x_r + max_rate) * (4x_r + max_rate)
|
||||||
|
*/
|
||||||
|
sum_rate *= sum_rate;
|
||||||
|
|
||||||
|
ai = div64_u64(sum_rate * 10, rate + max_rate);
|
||||||
|
ai = div64_u64(ai * tp->snd_cwnd, (rate << 2) + max_rate);
|
||||||
|
|
||||||
|
if (unlikely(!ai))
|
||||||
|
ai = tp->snd_cwnd;
|
||||||
|
|
||||||
|
md = ((tp->snd_cwnd >> 1) * min(mptcp_balia_scale(alpha, alpha_scale),
|
||||||
|
mptcp_balia_scale(3, alpha_scale) >> 1))
|
||||||
|
>> alpha_scale;
|
||||||
|
|
||||||
|
exit:
|
||||||
|
mptcp_set_ai(sk, ai);
|
||||||
|
mptcp_set_md(sk, md);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_balia_init(struct sock *sk)
|
||||||
|
{
|
||||||
|
if (mptcp(tcp_sk(sk))) {
|
||||||
|
mptcp_set_forced(sk, 0);
|
||||||
|
mptcp_set_ai(sk, 0);
|
||||||
|
mptcp_set_md(sk, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_balia_cwnd_event(struct sock *sk, enum tcp_ca_event event)
|
||||||
|
{
|
||||||
|
if (event == CA_EVENT_COMPLETE_CWR || event == CA_EVENT_LOSS)
|
||||||
|
mptcp_balia_recalc_ai(sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_balia_set_state(struct sock *sk, u8 ca_state)
|
||||||
|
{
|
||||||
|
if (!mptcp(tcp_sk(sk)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
mptcp_set_forced(sk, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_balia_cong_avoid(struct sock *sk, u32 ack, u32 acked)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
int snd_cwnd;
|
||||||
|
|
||||||
|
if (!mptcp(tp)) {
|
||||||
|
tcp_reno_cong_avoid(sk, ack, acked);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!tcp_is_cwnd_limited(sk))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (tcp_in_slow_start(tp)) {
|
||||||
|
/* In "safe" area, increase. */
|
||||||
|
tcp_slow_start(tp, acked);
|
||||||
|
mptcp_balia_recalc_ai(sk);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mptcp_get_forced(mptcp_meta_sk(sk))) {
|
||||||
|
mptcp_balia_recalc_ai(sk);
|
||||||
|
mptcp_set_forced(sk, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
snd_cwnd = (int)mptcp_get_ai(sk);
|
||||||
|
|
||||||
|
if (tp->snd_cwnd_cnt >= snd_cwnd) {
|
||||||
|
if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
|
||||||
|
tp->snd_cwnd++;
|
||||||
|
mptcp_balia_recalc_ai(sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
tp->snd_cwnd_cnt = 0;
|
||||||
|
} else {
|
||||||
|
tp->snd_cwnd_cnt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 mptcp_balia_ssthresh(struct sock *sk)
|
||||||
|
{
|
||||||
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
|
if (unlikely(!mptcp(tp)))
|
||||||
|
return tcp_reno_ssthresh(sk);
|
||||||
|
else
|
||||||
|
return max((u32)(tp->snd_cwnd - mptcp_get_md(sk)), 1U);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct tcp_congestion_ops mptcp_balia = {
|
||||||
|
.init = mptcp_balia_init,
|
||||||
|
.ssthresh = mptcp_balia_ssthresh,
|
||||||
|
.cong_avoid = mptcp_balia_cong_avoid,
|
||||||
|
.undo_cwnd = tcp_reno_undo_cwnd,
|
||||||
|
.cwnd_event = mptcp_balia_cwnd_event,
|
||||||
|
.set_state = mptcp_balia_set_state,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.name = "balia",
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init mptcp_balia_register(void)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(struct mptcp_balia) > ICSK_CA_PRIV_SIZE);
|
||||||
|
return tcp_register_congestion_control(&mptcp_balia);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit mptcp_balia_unregister(void)
|
||||||
|
{
|
||||||
|
tcp_unregister_congestion_control(&mptcp_balia);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(mptcp_balia_register);
|
||||||
|
module_exit(mptcp_balia_unregister);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Jaehyun Hwang, Anwar Walid, Qiuyu Peng, Steven H. Low");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("MPTCP BALIA CONGESTION CONTROL ALGORITHM");
|
||||||
|
MODULE_VERSION("0.1");
|
494
net/mptcp/mptcp_binder.c
Executable file
494
net/mptcp/mptcp_binder.c
Executable file
@ -0,0 +1,494 @@
|
|||||||
|
#include <linux/module.h>
|
||||||
|
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/mptcp_v4.h>
|
||||||
|
|
||||||
|
#include <linux/route.h>
|
||||||
|
#include <linux/inet.h>
|
||||||
|
#include <linux/mroute.h>
|
||||||
|
#include <linux/spinlock_types.h>
|
||||||
|
#include <net/inet_ecn.h>
|
||||||
|
#include <net/route.h>
|
||||||
|
#include <net/xfrm.h>
|
||||||
|
#include <net/compat.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
|
||||||
|
#define MPTCP_GW_MAX_LISTS 10
|
||||||
|
#define MPTCP_GW_LIST_MAX_LEN 6
|
||||||
|
#define MPTCP_GW_SYSCTL_MAX_LEN (15 * MPTCP_GW_LIST_MAX_LEN * \
|
||||||
|
MPTCP_GW_MAX_LISTS)
|
||||||
|
|
||||||
|
struct mptcp_gw_list {
|
||||||
|
struct in_addr list[MPTCP_GW_MAX_LISTS][MPTCP_GW_LIST_MAX_LEN];
|
||||||
|
u8 len[MPTCP_GW_MAX_LISTS];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct binder_priv {
|
||||||
|
/* Worker struct for subflow establishment */
|
||||||
|
struct work_struct subflow_work;
|
||||||
|
|
||||||
|
struct mptcp_cb *mpcb;
|
||||||
|
|
||||||
|
/* Prevent multiple sub-sockets concurrently iterating over sockets */
|
||||||
|
spinlock_t *flow_lock;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct mptcp_gw_list *mptcp_gws;
|
||||||
|
static rwlock_t mptcp_gws_lock;
|
||||||
|
|
||||||
|
static int mptcp_binder_ndiffports __read_mostly = 1;
|
||||||
|
|
||||||
|
static char sysctl_mptcp_binder_gateways[MPTCP_GW_SYSCTL_MAX_LEN] __read_mostly;
|
||||||
|
|
||||||
|
static int mptcp_get_avail_list_ipv4(struct sock *sk)
|
||||||
|
{
|
||||||
|
int i, j, list_taken, opt_ret, opt_len;
|
||||||
|
unsigned char *opt_ptr, *opt_end_ptr, opt[MAX_IPOPTLEN];
|
||||||
|
|
||||||
|
for (i = 0; i < MPTCP_GW_MAX_LISTS; ++i) {
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
if (mptcp_gws->len[i] == 0)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
mptcp_debug("mptcp_get_avail_list_ipv4: List %i\n", i);
|
||||||
|
list_taken = 0;
|
||||||
|
|
||||||
|
/* Loop through all sub-sockets in this connection */
|
||||||
|
mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) {
|
||||||
|
sk = mptcp_to_sock(mptcp);
|
||||||
|
|
||||||
|
mptcp_debug("mptcp_get_avail_list_ipv4: Next sock\n");
|
||||||
|
|
||||||
|
/* Reset length and options buffer, then retrieve
|
||||||
|
* from socket
|
||||||
|
*/
|
||||||
|
opt_len = MAX_IPOPTLEN;
|
||||||
|
memset(opt, 0, MAX_IPOPTLEN);
|
||||||
|
opt_ret = ip_getsockopt(sk, IPPROTO_IP,
|
||||||
|
IP_OPTIONS, (char __user *)opt, (int __user *)&opt_len);
|
||||||
|
if (opt_ret < 0) {
|
||||||
|
mptcp_debug("%s: MPTCP subsocket getsockopt() IP_OPTIONS failed, error %d\n",
|
||||||
|
__func__, opt_ret);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If socket has no options, it has no stake in this list */
|
||||||
|
if (opt_len <= 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Iterate options buffer */
|
||||||
|
for (opt_ptr = &opt[0]; opt_ptr < &opt[opt_len]; opt_ptr++) {
|
||||||
|
if (*opt_ptr == IPOPT_LSRR) {
|
||||||
|
mptcp_debug("mptcp_get_avail_list_ipv4: LSRR options found\n");
|
||||||
|
goto sock_lsrr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
|
||||||
|
sock_lsrr:
|
||||||
|
/* Pointer to the 2nd to last address */
|
||||||
|
opt_end_ptr = opt_ptr+(*(opt_ptr+1))-4;
|
||||||
|
|
||||||
|
/* Addresses start 3 bytes after type offset */
|
||||||
|
opt_ptr += 3;
|
||||||
|
j = 0;
|
||||||
|
|
||||||
|
/* Different length lists cannot be the same */
|
||||||
|
if ((opt_end_ptr-opt_ptr)/4 != mptcp_gws->len[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Iterate if we are still inside options list
|
||||||
|
* and sysctl list
|
||||||
|
*/
|
||||||
|
while (opt_ptr < opt_end_ptr && j < mptcp_gws->len[i]) {
|
||||||
|
/* If there is a different address, this list must
|
||||||
|
* not be set on this socket
|
||||||
|
*/
|
||||||
|
if (memcmp(&mptcp_gws->list[i][j], opt_ptr, 4))
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Jump 4 bytes to next address */
|
||||||
|
opt_ptr += 4;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Reached the end without a differing address, lists
|
||||||
|
* are therefore identical.
|
||||||
|
*/
|
||||||
|
if (j == mptcp_gws->len[i]) {
|
||||||
|
mptcp_debug("mptcp_get_avail_list_ipv4: List already used\n");
|
||||||
|
list_taken = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Free list found if not taken by a socket */
|
||||||
|
if (!list_taken) {
|
||||||
|
mptcp_debug("mptcp_get_avail_list_ipv4: List free\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i >= MPTCP_GW_MAX_LISTS)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
return i;
|
||||||
|
error:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The list of addresses is parsed each time a new connection is opened,
|
||||||
|
* to make sure it's up to date. In case of error, all the lists are
|
||||||
|
* marked as unavailable and the subflow's fingerprint is set to 0.
|
||||||
|
*/
|
||||||
|
static void mptcp_v4_add_lsrr(struct sock *sk, struct in_addr addr)
|
||||||
|
{
|
||||||
|
int i, j, ret;
|
||||||
|
unsigned char opt[MAX_IPOPTLEN] = {0};
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct binder_priv *fmp = (struct binder_priv *)&tp->mpcb->mptcp_pm[0];
|
||||||
|
|
||||||
|
/* Read lock: multiple sockets can read LSRR addresses at the same
|
||||||
|
* time, but writes are done in mutual exclusion.
|
||||||
|
* Spin lock: must search for free list for one socket at a time, or
|
||||||
|
* multiple sockets could take the same list.
|
||||||
|
*/
|
||||||
|
read_lock(&mptcp_gws_lock);
|
||||||
|
spin_lock(fmp->flow_lock);
|
||||||
|
|
||||||
|
i = mptcp_get_avail_list_ipv4(sk);
|
||||||
|
|
||||||
|
/* Execution enters here only if a free path is found.
|
||||||
|
*/
|
||||||
|
if (i >= 0) {
|
||||||
|
opt[0] = IPOPT_NOP;
|
||||||
|
opt[1] = IPOPT_LSRR;
|
||||||
|
opt[2] = sizeof(mptcp_gws->list[i][0].s_addr) *
|
||||||
|
(mptcp_gws->len[i] + 1) + 3;
|
||||||
|
opt[3] = IPOPT_MINOFF;
|
||||||
|
for (j = 0; j < mptcp_gws->len[i]; ++j)
|
||||||
|
memcpy(opt + 4 +
|
||||||
|
(j * sizeof(mptcp_gws->list[i][0].s_addr)),
|
||||||
|
&mptcp_gws->list[i][j].s_addr,
|
||||||
|
sizeof(mptcp_gws->list[i][0].s_addr));
|
||||||
|
/* Final destination must be part of IP_OPTIONS parameter. */
|
||||||
|
memcpy(opt + 4 + (j * sizeof(addr.s_addr)), &addr.s_addr,
|
||||||
|
sizeof(addr.s_addr));
|
||||||
|
|
||||||
|
/* setsockopt must be inside the lock, otherwise another
|
||||||
|
* subflow could fail to see that we have taken a list.
|
||||||
|
*/
|
||||||
|
ret = ip_setsockopt(sk, IPPROTO_IP, IP_OPTIONS, (char __user *)opt,
|
||||||
|
4 + sizeof(mptcp_gws->list[i][0].s_addr) * (mptcp_gws->len[i] + 1));
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
mptcp_debug("%s: MPTCP subsock setsockopt() IP_OPTIONS failed, error %d\n",
|
||||||
|
__func__, ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(fmp->flow_lock);
|
||||||
|
read_unlock(&mptcp_gws_lock);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parses gateways string for a list of paths to different
|
||||||
|
* gateways, and stores them for use with the Loose Source Routing (LSRR)
|
||||||
|
* socket option. Each list must have "," separated addresses, and the lists
|
||||||
|
* themselves must be separated by "-". Returns -1 in case one or more of the
|
||||||
|
* addresses is not a valid ipv4/6 address.
|
||||||
|
*/
|
||||||
|
static int mptcp_parse_gateway_ipv4(char *gateways)
|
||||||
|
{
|
||||||
|
int i, j, k, ret;
|
||||||
|
char *tmp_string = NULL;
|
||||||
|
struct in_addr tmp_addr;
|
||||||
|
|
||||||
|
tmp_string = kzalloc(16, GFP_KERNEL);
|
||||||
|
if (tmp_string == NULL)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
write_lock(&mptcp_gws_lock);
|
||||||
|
|
||||||
|
memset(mptcp_gws, 0, sizeof(struct mptcp_gw_list));
|
||||||
|
|
||||||
|
/* A TMP string is used since inet_pton needs a null terminated string
|
||||||
|
* but we do not want to modify the sysctl for obvious reasons.
|
||||||
|
* i will iterate over the SYSCTL string, j will iterate over the
|
||||||
|
* temporary string where each IP is copied into, k will iterate over
|
||||||
|
* the IPs in each list.
|
||||||
|
*/
|
||||||
|
for (i = j = k = 0;
|
||||||
|
i < MPTCP_GW_SYSCTL_MAX_LEN && k < MPTCP_GW_MAX_LISTS;
|
||||||
|
++i) {
|
||||||
|
if (gateways[i] == '-' || gateways[i] == ',' || gateways[i] == '\0') {
|
||||||
|
/* If the temp IP is empty and the current list is
|
||||||
|
* empty, we are done.
|
||||||
|
*/
|
||||||
|
if (j == 0 && mptcp_gws->len[k] == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Terminate the temp IP string, then if it is
|
||||||
|
* non-empty parse the IP and copy it.
|
||||||
|
*/
|
||||||
|
tmp_string[j] = '\0';
|
||||||
|
if (j > 0) {
|
||||||
|
mptcp_debug("mptcp_parse_gateway_list tmp: %s i: %d\n", tmp_string, i);
|
||||||
|
|
||||||
|
ret = in4_pton(tmp_string, strlen(tmp_string),
|
||||||
|
(u8 *)&tmp_addr.s_addr, '\0',
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
mptcp_debug("mptcp_parse_gateway_list ret: %d s_addr: %pI4\n",
|
||||||
|
ret,
|
||||||
|
&tmp_addr.s_addr);
|
||||||
|
memcpy(&mptcp_gws->list[k][mptcp_gws->len[k]].s_addr,
|
||||||
|
&tmp_addr.s_addr,
|
||||||
|
sizeof(tmp_addr.s_addr));
|
||||||
|
mptcp_gws->len[k]++;
|
||||||
|
j = 0;
|
||||||
|
tmp_string[j] = '\0';
|
||||||
|
/* Since we can't impose a limit to
|
||||||
|
* what the user can input, make sure
|
||||||
|
* there are not too many IPs in the
|
||||||
|
* SYSCTL string.
|
||||||
|
*/
|
||||||
|
if (mptcp_gws->len[k] > MPTCP_GW_LIST_MAX_LEN) {
|
||||||
|
mptcp_debug("mptcp_parse_gateway_list too many members in list %i: max %i\n",
|
||||||
|
k,
|
||||||
|
MPTCP_GW_LIST_MAX_LEN);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gateways[i] == '-' || gateways[i] == '\0')
|
||||||
|
++k;
|
||||||
|
} else {
|
||||||
|
tmp_string[j] = gateways[i];
|
||||||
|
++j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Number of flows is number of gateway lists plus master flow */
|
||||||
|
mptcp_binder_ndiffports = k+1;
|
||||||
|
|
||||||
|
write_unlock(&mptcp_gws_lock);
|
||||||
|
kfree(tmp_string);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
error:
|
||||||
|
memset(mptcp_gws, 0, sizeof(struct mptcp_gw_list));
|
||||||
|
memset(gateways, 0, sizeof(char) * MPTCP_GW_SYSCTL_MAX_LEN);
|
||||||
|
write_unlock(&mptcp_gws_lock);
|
||||||
|
kfree(tmp_string);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create all new subflows, by doing calls to mptcp_initX_subsockets
|
||||||
|
*
|
||||||
|
* This function uses a goto next_subflow, to allow releasing the lock between
|
||||||
|
* new subflows and giving other processes a chance to do some work on the
|
||||||
|
* socket and potentially finishing the communication.
|
||||||
|
**/
|
||||||
|
static void create_subflow_worker(struct work_struct *work)
|
||||||
|
{
|
||||||
|
const struct binder_priv *pm_priv = container_of(work,
|
||||||
|
struct binder_priv,
|
||||||
|
subflow_work);
|
||||||
|
struct mptcp_cb *mpcb = pm_priv->mpcb;
|
||||||
|
struct sock *meta_sk = mpcb->meta_sk;
|
||||||
|
int iter = 0;
|
||||||
|
|
||||||
|
next_subflow:
|
||||||
|
if (iter) {
|
||||||
|
release_sock(meta_sk);
|
||||||
|
mutex_unlock(&mpcb->mpcb_mutex);
|
||||||
|
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
mutex_lock(&mpcb->mpcb_mutex);
|
||||||
|
lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
|
||||||
|
|
||||||
|
if (!mptcp(tcp_sk(meta_sk)))
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
iter++;
|
||||||
|
|
||||||
|
if (sock_flag(meta_sk, SOCK_DEAD))
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
if (mpcb->master_sk &&
|
||||||
|
!tcp_sk(mpcb->master_sk)->mptcp->fully_established)
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
if (mptcp_binder_ndiffports > iter &&
|
||||||
|
mptcp_binder_ndiffports > mptcp_subflow_count(mpcb)) {
|
||||||
|
struct mptcp_loc4 loc;
|
||||||
|
struct mptcp_rem4 rem;
|
||||||
|
|
||||||
|
loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr;
|
||||||
|
loc.loc4_id = 0;
|
||||||
|
loc.low_prio = 0;
|
||||||
|
|
||||||
|
rem.addr.s_addr = inet_sk(meta_sk)->inet_daddr;
|
||||||
|
rem.port = inet_sk(meta_sk)->inet_dport;
|
||||||
|
rem.rem4_id = 0; /* Default 0 */
|
||||||
|
|
||||||
|
mptcp_init4_subsockets(meta_sk, &loc, &rem);
|
||||||
|
|
||||||
|
goto next_subflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
exit:
|
||||||
|
release_sock(meta_sk);
|
||||||
|
mutex_unlock(&mpcb->mpcb_mutex);
|
||||||
|
mptcp_mpcb_put(mpcb);
|
||||||
|
sock_put(meta_sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void binder_new_session(const struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct binder_priv *fmp = (struct binder_priv *)&mpcb->mptcp_pm[0];
|
||||||
|
static DEFINE_SPINLOCK(flow_lock);
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
|
if (meta_sk->sk_family == AF_INET6 &&
|
||||||
|
!mptcp_v6_is_v4_mapped(meta_sk)) {
|
||||||
|
mptcp_fallback_default(mpcb);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Initialize workqueue-struct */
|
||||||
|
INIT_WORK(&fmp->subflow_work, create_subflow_worker);
|
||||||
|
fmp->mpcb = mpcb;
|
||||||
|
|
||||||
|
fmp->flow_lock = &flow_lock;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void binder_create_subflows(struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct binder_priv *pm_priv = (struct binder_priv *)&mpcb->mptcp_pm[0];
|
||||||
|
|
||||||
|
if (mptcp_in_infinite_mapping_weak(mpcb) ||
|
||||||
|
mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!work_pending(&pm_priv->subflow_work)) {
|
||||||
|
sock_hold(meta_sk);
|
||||||
|
refcount_inc(&mpcb->mpcb_refcnt);
|
||||||
|
queue_work(mptcp_wq, &pm_priv->subflow_work);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int binder_get_local_id(const struct sock *meta_sk, sa_family_t family,
|
||||||
|
union inet_addr *addr, bool *low_prio)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Callback functions, executed when syctl mptcp.mptcp_gateways is updated.
|
||||||
|
* Inspired from proc_tcp_congestion_control().
|
||||||
|
*/
|
||||||
|
static int proc_mptcp_gateways(struct ctl_table *ctl, int write,
|
||||||
|
void __user *buffer, size_t *lenp,
|
||||||
|
loff_t *ppos)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct ctl_table tbl = {
|
||||||
|
.maxlen = MPTCP_GW_SYSCTL_MAX_LEN,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (write) {
|
||||||
|
tbl.data = kzalloc(MPTCP_GW_SYSCTL_MAX_LEN, GFP_KERNEL);
|
||||||
|
if (tbl.data == NULL)
|
||||||
|
return -ENOMEM;
|
||||||
|
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
|
||||||
|
if (ret == 0) {
|
||||||
|
ret = mptcp_parse_gateway_ipv4(tbl.data);
|
||||||
|
memcpy(ctl->data, tbl.data, MPTCP_GW_SYSCTL_MAX_LEN);
|
||||||
|
}
|
||||||
|
kfree(tbl.data);
|
||||||
|
} else {
|
||||||
|
ret = proc_dostring(ctl, write, buffer, lenp, ppos);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct mptcp_pm_ops binder __read_mostly = {
|
||||||
|
.new_session = binder_new_session,
|
||||||
|
.fully_established = binder_create_subflows,
|
||||||
|
.get_local_id = binder_get_local_id,
|
||||||
|
.init_subsocket_v4 = mptcp_v4_add_lsrr,
|
||||||
|
.name = "binder",
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct ctl_table binder_table[] = {
|
||||||
|
{
|
||||||
|
.procname = "mptcp_binder_gateways",
|
||||||
|
.data = &sysctl_mptcp_binder_gateways,
|
||||||
|
.maxlen = sizeof(char) * MPTCP_GW_SYSCTL_MAX_LEN,
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = &proc_mptcp_gateways
|
||||||
|
},
|
||||||
|
{ }
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct ctl_table_header *mptcp_sysctl_binder;
|
||||||
|
|
||||||
|
/* General initialization of MPTCP_PM */
|
||||||
|
static int __init binder_register(void)
|
||||||
|
{
|
||||||
|
mptcp_gws = kzalloc(sizeof(*mptcp_gws), GFP_KERNEL);
|
||||||
|
if (!mptcp_gws)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
rwlock_init(&mptcp_gws_lock);
|
||||||
|
|
||||||
|
BUILD_BUG_ON(sizeof(struct binder_priv) > MPTCP_PM_SIZE);
|
||||||
|
|
||||||
|
mptcp_sysctl_binder = register_net_sysctl(&init_net, "net/mptcp",
|
||||||
|
binder_table);
|
||||||
|
if (!mptcp_sysctl_binder)
|
||||||
|
goto sysctl_fail;
|
||||||
|
|
||||||
|
if (mptcp_register_path_manager(&binder))
|
||||||
|
goto pm_failed;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
pm_failed:
|
||||||
|
unregister_net_sysctl_table(mptcp_sysctl_binder);
|
||||||
|
sysctl_fail:
|
||||||
|
kfree(mptcp_gws);
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void binder_unregister(void)
|
||||||
|
{
|
||||||
|
mptcp_unregister_path_manager(&binder);
|
||||||
|
unregister_net_sysctl_table(mptcp_sysctl_binder);
|
||||||
|
kfree(mptcp_gws);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(binder_register);
|
||||||
|
module_exit(binder_unregister);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Luca Boccassi, Duncan Eastoe, Christoph Paasch (ndiffports)");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("BINDER MPTCP");
|
||||||
|
MODULE_VERSION("0.1");
|
481
net/mptcp/mptcp_blest.c
Executable file
481
net/mptcp/mptcp_blest.c
Executable file
@ -0,0 +1,481 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* MPTCP Scheduler to reduce HoL-blocking and spurious retransmissions.
|
||||||
|
*
|
||||||
|
* Algorithm Design:
|
||||||
|
* Simone Ferlin <ferlin@simula.no>
|
||||||
|
* Ozgu Alay <ozgu@simula.no>
|
||||||
|
* Olivier Mehani <olivier.mehani@nicta.com.au>
|
||||||
|
* Roksana Boreli <roksana.boreli@nicta.com.au>
|
||||||
|
*
|
||||||
|
* Initial Implementation:
|
||||||
|
* Simone Ferlin <ferlin@simula.no>
|
||||||
|
*
|
||||||
|
* Additional Authors:
|
||||||
|
* Daniel Weber <weberd@cs.uni-bonn.de>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <trace/events/tcp.h>
|
||||||
|
|
||||||
|
static unsigned char lambda __read_mostly = 12;
|
||||||
|
module_param(lambda, byte, 0644);
|
||||||
|
MODULE_PARM_DESC(lambda, "Divided by 10 for scaling factor of fast flow rate estimation");
|
||||||
|
|
||||||
|
static unsigned char max_lambda __read_mostly = 13;
|
||||||
|
module_param(max_lambda, byte, 0644);
|
||||||
|
MODULE_PARM_DESC(max_lambda, "Divided by 10 for maximum scaling factor of fast flow rate estimation");
|
||||||
|
|
||||||
|
static unsigned char min_lambda __read_mostly = 10;
|
||||||
|
module_param(min_lambda, byte, 0644);
|
||||||
|
MODULE_PARM_DESC(min_lambda, "Divided by 10 for minimum scaling factor of fast flow rate estimation");
|
||||||
|
|
||||||
|
static unsigned char dyn_lambda_good = 10; /* 1% */
|
||||||
|
module_param(dyn_lambda_good, byte, 0644);
|
||||||
|
MODULE_PARM_DESC(dyn_lambda_good, "Decrease of lambda in positive case.");
|
||||||
|
|
||||||
|
static unsigned char dyn_lambda_bad = 40; /* 4% */
|
||||||
|
module_param(dyn_lambda_bad, byte, 0644);
|
||||||
|
MODULE_PARM_DESC(dyn_lambda_bad, "Increase of lambda in negative case.");
|
||||||
|
|
||||||
|
struct blestsched_priv {
|
||||||
|
u32 last_rbuf_opti;
|
||||||
|
u32 min_srtt_us;
|
||||||
|
u32 max_srtt_us;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct blestsched_cb {
|
||||||
|
bool retrans_flag;
|
||||||
|
s16 lambda_1000; /* values range from min_lambda * 100 to max_lambda * 100 */
|
||||||
|
u32 last_lambda_update;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct blestsched_priv *blestsched_get_priv(const struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return (struct blestsched_priv *)&tp->mptcp->mptcp_sched[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct blestsched_cb *blestsched_get_cb(const struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return (struct blestsched_cb *)&tp->mpcb->mptcp_sched[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blestsched_update_lambda(struct sock *meta_sk, struct sock *sk)
|
||||||
|
{
|
||||||
|
struct blestsched_cb *blest_cb = blestsched_get_cb(tcp_sk(meta_sk));
|
||||||
|
struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk));
|
||||||
|
|
||||||
|
if (tcp_jiffies32 - blest_cb->last_lambda_update < usecs_to_jiffies(blest_p->min_srtt_us >> 3))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* if there have been retransmissions of packets of the slow flow
|
||||||
|
* during the slow flows last RTT => increase lambda
|
||||||
|
* otherwise decrease
|
||||||
|
*/
|
||||||
|
if (blest_cb->retrans_flag) {
|
||||||
|
/* need to slow down on the slow flow */
|
||||||
|
blest_cb->lambda_1000 += dyn_lambda_bad;
|
||||||
|
} else {
|
||||||
|
/* use the slow flow more */
|
||||||
|
blest_cb->lambda_1000 -= dyn_lambda_good;
|
||||||
|
}
|
||||||
|
blest_cb->retrans_flag = false;
|
||||||
|
|
||||||
|
/* cap lambda_1000 to its value range */
|
||||||
|
blest_cb->lambda_1000 = min_t(s16, blest_cb->lambda_1000, max_lambda * 100);
|
||||||
|
blest_cb->lambda_1000 = max_t(s16, blest_cb->lambda_1000, min_lambda * 100);
|
||||||
|
|
||||||
|
blest_cb->last_lambda_update = tcp_jiffies32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* how many bytes will sk send during the rtt of another, slower flow? */
|
||||||
|
static u32 blestsched_estimate_bytes(struct sock *sk, u32 time_8)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct blestsched_priv *blest_p = blestsched_get_priv(tp);
|
||||||
|
struct blestsched_cb *blest_cb = blestsched_get_cb(mptcp_meta_tp(tp));
|
||||||
|
u32 avg_rtt, num_rtts, ca_cwnd, packets;
|
||||||
|
|
||||||
|
avg_rtt = (blest_p->min_srtt_us + blest_p->max_srtt_us) / 2;
|
||||||
|
if (avg_rtt == 0)
|
||||||
|
num_rtts = 1; /* sanity */
|
||||||
|
else
|
||||||
|
num_rtts = (time_8 / avg_rtt) + 1; /* round up */
|
||||||
|
|
||||||
|
/* during num_rtts, how many bytes will be sent on the flow?
|
||||||
|
* assumes for simplification that Reno is applied as congestion-control
|
||||||
|
*/
|
||||||
|
if (tp->snd_ssthresh == TCP_INFINITE_SSTHRESH) {
|
||||||
|
/* we are in initial slow start */
|
||||||
|
if (num_rtts > 16)
|
||||||
|
num_rtts = 16; /* cap for sanity */
|
||||||
|
packets = tp->snd_cwnd * ((1 << num_rtts) - 1); /* cwnd + 2*cwnd + 4*cwnd */
|
||||||
|
} else {
|
||||||
|
ca_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh + 1); /* assume we jump to CA already */
|
||||||
|
packets = (ca_cwnd + (num_rtts - 1) / 2) * num_rtts;
|
||||||
|
}
|
||||||
|
|
||||||
|
return div_u64(((u64)packets) * tp->mss_cache * blest_cb->lambda_1000, 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 blestsched_estimate_linger_time(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct blestsched_priv *blest_p = blestsched_get_priv(tp);
|
||||||
|
u32 estimate, slope, inflight, cwnd;
|
||||||
|
|
||||||
|
inflight = tcp_packets_in_flight(tp) + 1; /* take into account the new one */
|
||||||
|
cwnd = tp->snd_cwnd;
|
||||||
|
|
||||||
|
if (inflight >= cwnd) {
|
||||||
|
estimate = blest_p->max_srtt_us;
|
||||||
|
} else {
|
||||||
|
slope = blest_p->max_srtt_us - blest_p->min_srtt_us;
|
||||||
|
if (cwnd == 0)
|
||||||
|
cwnd = 1; /* sanity */
|
||||||
|
estimate = blest_p->min_srtt_us + (slope * inflight) / cwnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (tp->srtt_us > estimate) ? tp->srtt_us : estimate;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This is the BLEST scheduler. This function decides on which flow to send
|
||||||
|
* a given MSS. If all subflows are found to be busy or the currently best
|
||||||
|
* subflow is estimated to possibly cause HoL-blocking, NULL is returned.
|
||||||
|
*/
|
||||||
|
struct sock *blest_get_available_subflow(struct sock *meta_sk, struct sk_buff *skb,
|
||||||
|
bool zero_wnd_test)
|
||||||
|
{
|
||||||
|
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct sock *bestsk, *minsk = NULL;
|
||||||
|
struct tcp_sock *meta_tp, *besttp;
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
struct blestsched_priv *blest_p;
|
||||||
|
u32 min_srtt = U32_MAX;
|
||||||
|
|
||||||
|
/* Answer data_fin on same subflow!!! */
|
||||||
|
if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
|
||||||
|
skb && mptcp_is_data_fin(skb)) {
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
bestsk = mptcp_to_sock(mptcp);
|
||||||
|
|
||||||
|
if (tcp_sk(bestsk)->mptcp->path_index == mpcb->dfin_path_index &&
|
||||||
|
mptcp_is_available(bestsk, skb, zero_wnd_test))
|
||||||
|
return bestsk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* First, find the overall best subflow */
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
bestsk = mptcp_to_sock(mptcp);
|
||||||
|
besttp = tcp_sk(bestsk);
|
||||||
|
blest_p = blestsched_get_priv(besttp);
|
||||||
|
|
||||||
|
/* Set of states for which we are allowed to send data */
|
||||||
|
if (!mptcp_sk_can_send(bestsk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* We do not send data on this subflow unless it is
|
||||||
|
* fully established, i.e. the 4th ack has been received.
|
||||||
|
*/
|
||||||
|
if (besttp->mptcp->pre_established)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
blest_p->min_srtt_us = min(blest_p->min_srtt_us, besttp->srtt_us);
|
||||||
|
blest_p->max_srtt_us = max(blest_p->max_srtt_us, besttp->srtt_us);
|
||||||
|
|
||||||
|
/* record minimal rtt */
|
||||||
|
if (besttp->srtt_us < min_srtt) {
|
||||||
|
min_srtt = besttp->srtt_us;
|
||||||
|
minsk = bestsk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* find the current best subflow according to the default scheduler */
|
||||||
|
bestsk = get_available_subflow(meta_sk, skb, zero_wnd_test);
|
||||||
|
|
||||||
|
/* if we decided to use a slower flow, we have the option of not using it at all */
|
||||||
|
if (bestsk && minsk && bestsk != minsk) {
|
||||||
|
u32 slow_linger_time, fast_bytes, slow_inflight_bytes, slow_bytes, avail_space;
|
||||||
|
u32 buffered_bytes = 0;
|
||||||
|
|
||||||
|
meta_tp = tcp_sk(meta_sk);
|
||||||
|
besttp = tcp_sk(bestsk);
|
||||||
|
|
||||||
|
blestsched_update_lambda(meta_sk, bestsk);
|
||||||
|
|
||||||
|
/* if we send this SKB now, it will be acked in besttp->srtt seconds
|
||||||
|
* during this time: how many bytes will we send on the fast flow?
|
||||||
|
*/
|
||||||
|
slow_linger_time = blestsched_estimate_linger_time(bestsk);
|
||||||
|
fast_bytes = blestsched_estimate_bytes(minsk, slow_linger_time);
|
||||||
|
|
||||||
|
if (skb)
|
||||||
|
buffered_bytes = skb->len;
|
||||||
|
|
||||||
|
/* is the required space available in the mptcp meta send window?
|
||||||
|
* we assume that all bytes inflight on the slow path will be acked in besttp->srtt seconds
|
||||||
|
* (just like the SKB if it was sent now) -> that means that those inflight bytes will
|
||||||
|
* keep occupying space in the meta window until then
|
||||||
|
*/
|
||||||
|
slow_inflight_bytes = besttp->write_seq - besttp->snd_una;
|
||||||
|
slow_bytes = buffered_bytes + slow_inflight_bytes; // bytes of this SKB plus those in flight already
|
||||||
|
|
||||||
|
avail_space = (slow_bytes < meta_tp->snd_wnd) ? (meta_tp->snd_wnd - slow_bytes) : 0;
|
||||||
|
|
||||||
|
if (fast_bytes > avail_space) {
|
||||||
|
/* sending this SKB on the slow flow means
|
||||||
|
* we wouldn't be able to send all the data we'd like to send on the fast flow
|
||||||
|
* so don't do that
|
||||||
|
*/
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bestsk;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* copy from mptcp_sched.c: mptcp_rcv_buf_optimization */
|
||||||
|
static struct sk_buff *mptcp_blest_rcv_buf_optimization(struct sock *sk, int penal)
|
||||||
|
{
|
||||||
|
struct sock *meta_sk;
|
||||||
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
struct sk_buff *skb_head;
|
||||||
|
struct blestsched_priv *blest_p = blestsched_get_priv(tp);
|
||||||
|
struct blestsched_cb *blest_cb;
|
||||||
|
|
||||||
|
meta_sk = mptcp_meta_sk(sk);
|
||||||
|
skb_head = tcp_rtx_queue_head(meta_sk);
|
||||||
|
|
||||||
|
if (!skb_head)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* If penalization is optional (coming from mptcp_next_segment() and
|
||||||
|
* We are not send-buffer-limited we do not penalize. The retransmission
|
||||||
|
* is just an optimization to fix the idle-time due to the delay before
|
||||||
|
* we wake up the application.
|
||||||
|
*/
|
||||||
|
if (!penal && sk_stream_memory_free(meta_sk))
|
||||||
|
goto retrans;
|
||||||
|
|
||||||
|
/* Record the occurrence of a retransmission to update the lambda value */
|
||||||
|
blest_cb = blestsched_get_cb(tcp_sk(meta_sk));
|
||||||
|
blest_cb->retrans_flag = true;
|
||||||
|
|
||||||
|
/* Only penalize again after an RTT has elapsed */
|
||||||
|
if (tcp_jiffies32 - blest_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3))
|
||||||
|
goto retrans;
|
||||||
|
|
||||||
|
/* Half the cwnd of the slow flows */
|
||||||
|
mptcp_for_each_sub(tp->mpcb, mptcp) {
|
||||||
|
struct tcp_sock *tp_it = mptcp->tp;
|
||||||
|
|
||||||
|
if (tp_it != tp &&
|
||||||
|
TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
|
||||||
|
if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) {
|
||||||
|
u32 prior_cwnd = tp_it->snd_cwnd;
|
||||||
|
|
||||||
|
tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
|
||||||
|
|
||||||
|
/* If in slow start, do not reduce the ssthresh */
|
||||||
|
if (prior_cwnd >= tp_it->snd_ssthresh)
|
||||||
|
tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U);
|
||||||
|
|
||||||
|
blest_p->last_rbuf_opti = tcp_jiffies32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
retrans:
|
||||||
|
|
||||||
|
/* Segment not yet injected into this path? Take it!!! */
|
||||||
|
if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) {
|
||||||
|
bool do_retrans = false;
|
||||||
|
mptcp_for_each_sub(tp->mpcb, mptcp) {
|
||||||
|
struct tcp_sock *tp_it = mptcp->tp;
|
||||||
|
|
||||||
|
if (tp_it != tp &&
|
||||||
|
TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
|
||||||
|
if (tp_it->snd_cwnd <= 4) {
|
||||||
|
do_retrans = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (4 * tp->srtt_us >= tp_it->srtt_us) {
|
||||||
|
do_retrans = false;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
do_retrans = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (do_retrans && mptcp_is_available(sk, skb_head, false)) {
|
||||||
|
trace_mptcp_retransmit(sk, skb_head);
|
||||||
|
return skb_head;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* copy from mptcp_sched.c: __mptcp_next_segment */
|
||||||
|
/* Returns the next segment to be sent from the mptcp meta-queue.
|
||||||
|
* (chooses the reinject queue if any segment is waiting in it, otherwise,
|
||||||
|
* chooses the normal write queue).
|
||||||
|
* Sets *@reinject to 1 if the returned segment comes from the
|
||||||
|
* reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
|
||||||
|
* and sets it to -1 if it is a meta-level retransmission to optimize the
|
||||||
|
* receive-buffer.
|
||||||
|
*/
|
||||||
|
static struct sk_buff *__mptcp_blest_next_segment(struct sock *meta_sk, int *reinject)
|
||||||
|
{
|
||||||
|
const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct sk_buff *skb = NULL;
|
||||||
|
|
||||||
|
*reinject = 0;
|
||||||
|
|
||||||
|
/* If we are in fallback-mode, just take from the meta-send-queue */
|
||||||
|
if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
|
||||||
|
return tcp_send_head(meta_sk);
|
||||||
|
|
||||||
|
skb = skb_peek(&mpcb->reinject_queue);
|
||||||
|
|
||||||
|
if (skb) {
|
||||||
|
*reinject = 1;
|
||||||
|
} else {
|
||||||
|
skb = tcp_send_head(meta_sk);
|
||||||
|
|
||||||
|
if (!skb && meta_sk->sk_socket &&
|
||||||
|
test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
|
||||||
|
sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
|
||||||
|
struct sock *subsk = blest_get_available_subflow(meta_sk, NULL,
|
||||||
|
false);
|
||||||
|
if (!subsk)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
skb = mptcp_blest_rcv_buf_optimization(subsk, 0);
|
||||||
|
if (skb)
|
||||||
|
*reinject = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* copy from mptcp_sched.c: mptcp_next_segment */
|
||||||
|
static struct sk_buff *mptcp_blest_next_segment(struct sock *meta_sk,
|
||||||
|
int *reinject,
|
||||||
|
struct sock **subsk,
|
||||||
|
unsigned int *limit)
|
||||||
|
{
|
||||||
|
struct sk_buff *skb = __mptcp_blest_next_segment(meta_sk, reinject);
|
||||||
|
unsigned int mss_now;
|
||||||
|
struct tcp_sock *subtp;
|
||||||
|
u16 gso_max_segs;
|
||||||
|
u32 max_len, max_segs, window, needed;
|
||||||
|
|
||||||
|
/* As we set it, we have to reset it as well. */
|
||||||
|
*limit = 0;
|
||||||
|
|
||||||
|
if (!skb)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
*subsk = blest_get_available_subflow(meta_sk, skb, false);
|
||||||
|
if (!*subsk)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
subtp = tcp_sk(*subsk);
|
||||||
|
mss_now = tcp_current_mss(*subsk);
|
||||||
|
|
||||||
|
if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) {
|
||||||
|
skb = mptcp_blest_rcv_buf_optimization(*subsk, 1);
|
||||||
|
if (skb)
|
||||||
|
*reinject = -1;
|
||||||
|
else
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No splitting required, as we will only send one single segment */
|
||||||
|
if (skb->len <= mss_now)
|
||||||
|
return skb;
|
||||||
|
|
||||||
|
/* The following is similar to tcp_mss_split_point, but
|
||||||
|
* we do not care about nagle, because we will anyways
|
||||||
|
* use TCP_NAGLE_PUSH, which overrides this.
|
||||||
|
*
|
||||||
|
* So, we first limit according to the cwnd/gso-size and then according
|
||||||
|
* to the subflow's window.
|
||||||
|
*/
|
||||||
|
|
||||||
|
gso_max_segs = (*subsk)->sk_gso_max_segs;
|
||||||
|
if (!gso_max_segs) /* No gso supported on the subflow's NIC */
|
||||||
|
gso_max_segs = 1;
|
||||||
|
max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
|
||||||
|
if (!max_segs)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
max_len = mss_now * max_segs;
|
||||||
|
window = tcp_wnd_end(subtp) - subtp->write_seq;
|
||||||
|
|
||||||
|
needed = min(skb->len, window);
|
||||||
|
if (max_len <= skb->len)
|
||||||
|
/* Take max_win, which is actually the cwnd/gso-size */
|
||||||
|
*limit = max_len;
|
||||||
|
else
|
||||||
|
/* Or, take the window */
|
||||||
|
*limit = needed;
|
||||||
|
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blestsched_init(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk));
|
||||||
|
struct blestsched_cb *blest_cb = blestsched_get_cb(tcp_sk(mptcp_meta_sk(sk)));
|
||||||
|
|
||||||
|
blest_p->last_rbuf_opti = tcp_jiffies32;
|
||||||
|
blest_p->min_srtt_us = U32_MAX;
|
||||||
|
blest_p->max_srtt_us = 0;
|
||||||
|
|
||||||
|
if (!blest_cb->lambda_1000) {
|
||||||
|
blest_cb->lambda_1000 = lambda * 100;
|
||||||
|
blest_cb->last_lambda_update = tcp_jiffies32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct mptcp_sched_ops mptcp_sched_blest = {
|
||||||
|
.get_subflow = blest_get_available_subflow,
|
||||||
|
.next_segment = mptcp_blest_next_segment,
|
||||||
|
.init = blestsched_init,
|
||||||
|
.name = "blest",
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init blest_register(void)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(struct blestsched_priv) > MPTCP_SCHED_SIZE);
|
||||||
|
BUILD_BUG_ON(sizeof(struct blestsched_cb) > MPTCP_SCHED_DATA_SIZE);
|
||||||
|
|
||||||
|
if (mptcp_register_scheduler(&mptcp_sched_blest))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blest_unregister(void)
|
||||||
|
{
|
||||||
|
mptcp_unregister_scheduler(&mptcp_sched_blest);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(blest_register);
|
||||||
|
module_exit(blest_unregister);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Simone Ferlin, Daniel Weber");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("BLEST scheduler for MPTCP, based on default minimum RTT scheduler");
|
||||||
|
MODULE_VERSION("0.95");
|
262
net/mptcp/mptcp_coupled.c
Executable file
262
net/mptcp/mptcp_coupled.c
Executable file
@ -0,0 +1,262 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP implementation - Linked Increase congestion control Algorithm (LIA)
|
||||||
|
*
|
||||||
|
* Initial Design & Implementation:
|
||||||
|
* Sébastien Barré <sebastien.barre@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Current Maintainer & Author:
|
||||||
|
* Christoph Paasch <christoph.paasch@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Additional authors:
|
||||||
|
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
|
||||||
|
* Gregory Detal <gregory.detal@uclouvain.be>
|
||||||
|
* Fabien Duchêne <fabien.duchene@uclouvain.be>
|
||||||
|
* Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
|
||||||
|
* Lavkesh Lahngir <lavkesh51@gmail.com>
|
||||||
|
* Andreas Ripke <ripke@neclab.eu>
|
||||||
|
* Vlad Dogaru <vlad.dogaru@intel.com>
|
||||||
|
* Octavian Purdila <octavian.purdila@intel.com>
|
||||||
|
* John Ronan <jronan@tssg.org>
|
||||||
|
* Catalin Nicutar <catalin.nicutar@gmail.com>
|
||||||
|
* Brandon Heller <brandonh@stanford.edu>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
#include <net/tcp.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
|
||||||
|
/* Scaling is done in the numerator with alpha_scale_num and in the denominator
|
||||||
|
* with alpha_scale_den.
|
||||||
|
*
|
||||||
|
* To downscale, we just need to use alpha_scale.
|
||||||
|
*
|
||||||
|
* We have: alpha_scale = alpha_scale_num / (alpha_scale_den ^ 2)
|
||||||
|
*/
|
||||||
|
static int alpha_scale_den = 10;
|
||||||
|
static int alpha_scale_num = 32;
|
||||||
|
static int alpha_scale = 12;
|
||||||
|
|
||||||
|
struct mptcp_ccc {
|
||||||
|
u64 alpha;
|
||||||
|
bool forced_update;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline int mptcp_ccc_sk_can_send(const struct sock *sk)
|
||||||
|
{
|
||||||
|
return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 mptcp_get_alpha(const struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
return ((struct mptcp_ccc *)inet_csk_ca(meta_sk))->alpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mptcp_set_alpha(const struct sock *meta_sk, u64 alpha)
|
||||||
|
{
|
||||||
|
((struct mptcp_ccc *)inet_csk_ca(meta_sk))->alpha = alpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 mptcp_ccc_scale(u32 val, int scale)
|
||||||
|
{
|
||||||
|
return (u64) val << scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool mptcp_get_forced(const struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
return ((struct mptcp_ccc *)inet_csk_ca(meta_sk))->forced_update;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mptcp_set_forced(const struct sock *meta_sk, bool force)
|
||||||
|
{
|
||||||
|
((struct mptcp_ccc *)inet_csk_ca(meta_sk))->forced_update = force;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_ccc_recalc_alpha(const struct sock *sk)
|
||||||
|
{
|
||||||
|
const struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb;
|
||||||
|
const struct mptcp_tcp_sock *mptcp;
|
||||||
|
int best_cwnd = 0, best_rtt = 0, can_send = 0;
|
||||||
|
u64 max_numerator = 0, sum_denominator = 0, alpha = 1;
|
||||||
|
|
||||||
|
if (!mpcb)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Do regular alpha-calculation for multiple subflows */
|
||||||
|
|
||||||
|
/* Find the max numerator of the alpha-calculation */
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
const struct sock *sub_sk = mptcp_to_sock(mptcp);
|
||||||
|
struct tcp_sock *sub_tp = tcp_sk(sub_sk);
|
||||||
|
u64 tmp;
|
||||||
|
|
||||||
|
if (!mptcp_ccc_sk_can_send(sub_sk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
can_send++;
|
||||||
|
|
||||||
|
/* We need to look for the path, that provides the max-value.
|
||||||
|
* Integer-overflow is not possible here, because
|
||||||
|
* tmp will be in u64.
|
||||||
|
*/
|
||||||
|
tmp = div64_u64(mptcp_ccc_scale(sub_tp->snd_cwnd,
|
||||||
|
alpha_scale_num), (u64)sub_tp->srtt_us * sub_tp->srtt_us);
|
||||||
|
|
||||||
|
if (tmp >= max_numerator) {
|
||||||
|
max_numerator = tmp;
|
||||||
|
best_cwnd = sub_tp->snd_cwnd;
|
||||||
|
best_rtt = sub_tp->srtt_us;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No subflow is able to send - we don't care anymore */
|
||||||
|
if (unlikely(!can_send))
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
/* Calculate the denominator */
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
const struct sock *sub_sk = mptcp_to_sock(mptcp);
|
||||||
|
struct tcp_sock *sub_tp = tcp_sk(sub_sk);
|
||||||
|
|
||||||
|
if (!mptcp_ccc_sk_can_send(sub_sk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
sum_denominator += div_u64(
|
||||||
|
mptcp_ccc_scale(sub_tp->snd_cwnd,
|
||||||
|
alpha_scale_den) * best_rtt,
|
||||||
|
sub_tp->srtt_us);
|
||||||
|
}
|
||||||
|
sum_denominator *= sum_denominator;
|
||||||
|
if (unlikely(!sum_denominator)) {
|
||||||
|
pr_err("%s: sum_denominator == 0\n", __func__);
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
const struct sock *sub_sk = mptcp_to_sock(mptcp);
|
||||||
|
struct tcp_sock *sub_tp = tcp_sk(sub_sk);
|
||||||
|
pr_err("%s: pi:%d, state:%d\n, rtt:%u, cwnd: %u",
|
||||||
|
__func__, sub_tp->mptcp->path_index,
|
||||||
|
sub_sk->sk_state, sub_tp->srtt_us,
|
||||||
|
sub_tp->snd_cwnd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
alpha = div64_u64(mptcp_ccc_scale(best_cwnd, alpha_scale_num), sum_denominator);
|
||||||
|
|
||||||
|
if (unlikely(!alpha))
|
||||||
|
alpha = 1;
|
||||||
|
|
||||||
|
exit:
|
||||||
|
mptcp_set_alpha(mptcp_meta_sk(sk), alpha);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_ccc_init(struct sock *sk)
|
||||||
|
{
|
||||||
|
if (mptcp(tcp_sk(sk))) {
|
||||||
|
mptcp_set_forced(mptcp_meta_sk(sk), 0);
|
||||||
|
mptcp_set_alpha(mptcp_meta_sk(sk), 1);
|
||||||
|
}
|
||||||
|
/* If we do not mptcp, behave like reno: return */
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_ccc_cwnd_event(struct sock *sk, enum tcp_ca_event event)
|
||||||
|
{
|
||||||
|
if (event == CA_EVENT_LOSS)
|
||||||
|
mptcp_ccc_recalc_alpha(sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_ccc_set_state(struct sock *sk, u8 ca_state)
|
||||||
|
{
|
||||||
|
if (!mptcp(tcp_sk(sk)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
mptcp_set_forced(mptcp_meta_sk(sk), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_ccc_cong_avoid(struct sock *sk, u32 ack, u32 acked)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
int snd_cwnd;
|
||||||
|
u64 alpha;
|
||||||
|
|
||||||
|
if (!mptcp(tp)) {
|
||||||
|
tcp_reno_cong_avoid(sk, ack, acked);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!tcp_is_cwnd_limited(sk))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (tcp_in_slow_start(tp)) {
|
||||||
|
/* In "safe" area, increase. */
|
||||||
|
tcp_slow_start(tp, acked);
|
||||||
|
mptcp_ccc_recalc_alpha(sk);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mptcp_get_forced(mptcp_meta_sk(sk))) {
|
||||||
|
mptcp_ccc_recalc_alpha(sk);
|
||||||
|
mptcp_set_forced(mptcp_meta_sk(sk), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
alpha = mptcp_get_alpha(mptcp_meta_sk(sk));
|
||||||
|
|
||||||
|
/* This may happen, if at the initialization, the mpcb
|
||||||
|
* was not yet attached to the sock, and thus
|
||||||
|
* initializing alpha failed.
|
||||||
|
*/
|
||||||
|
if (unlikely(!alpha))
|
||||||
|
alpha = 1;
|
||||||
|
|
||||||
|
snd_cwnd = (int)div_u64((u64)mptcp_ccc_scale(1, alpha_scale), alpha);
|
||||||
|
|
||||||
|
/* snd_cwnd_cnt >= max (scale * tot_cwnd / alpha, cwnd)
|
||||||
|
* Thus, we select here the max value.
|
||||||
|
*/
|
||||||
|
if (snd_cwnd < tp->snd_cwnd)
|
||||||
|
snd_cwnd = tp->snd_cwnd;
|
||||||
|
|
||||||
|
if (tp->snd_cwnd_cnt >= snd_cwnd) {
|
||||||
|
if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
|
||||||
|
tp->snd_cwnd++;
|
||||||
|
mptcp_ccc_recalc_alpha(sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
tp->snd_cwnd_cnt = 0;
|
||||||
|
} else {
|
||||||
|
tp->snd_cwnd_cnt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct tcp_congestion_ops mptcp_ccc = {
|
||||||
|
.init = mptcp_ccc_init,
|
||||||
|
.ssthresh = tcp_reno_ssthresh,
|
||||||
|
.cong_avoid = mptcp_ccc_cong_avoid,
|
||||||
|
.undo_cwnd = tcp_reno_undo_cwnd,
|
||||||
|
.cwnd_event = mptcp_ccc_cwnd_event,
|
||||||
|
.set_state = mptcp_ccc_set_state,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.name = "lia",
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init mptcp_ccc_register(void)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(struct mptcp_ccc) > ICSK_CA_PRIV_SIZE);
|
||||||
|
return tcp_register_congestion_control(&mptcp_ccc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit mptcp_ccc_unregister(void)
|
||||||
|
{
|
||||||
|
tcp_unregister_congestion_control(&mptcp_ccc);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(mptcp_ccc_register);
|
||||||
|
module_exit(mptcp_ccc_unregister);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Christoph Paasch, Sébastien Barré");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("MPTCP LINKED INCREASE CONGESTION CONTROL ALGORITHM");
|
||||||
|
MODULE_VERSION("0.1");
|
3135
net/mptcp/mptcp_ctrl.c
Executable file
3135
net/mptcp/mptcp_ctrl.c
Executable file
File diff suppressed because it is too large
Load Diff
1963
net/mptcp/mptcp_fullmesh.c
Executable file
1963
net/mptcp/mptcp_fullmesh.c
Executable file
File diff suppressed because it is too large
Load Diff
2431
net/mptcp/mptcp_input.c
Executable file
2431
net/mptcp/mptcp_input.c
Executable file
File diff suppressed because it is too large
Load Diff
427
net/mptcp/mptcp_ipv4.c
Executable file
427
net/mptcp/mptcp_ipv4.c
Executable file
@ -0,0 +1,427 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP implementation - IPv4-specific functions
|
||||||
|
*
|
||||||
|
* Initial Design & Implementation:
|
||||||
|
* Sébastien Barré <sebastien.barre@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Current Maintainer:
|
||||||
|
* Christoph Paasch <christoph.paasch@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Additional authors:
|
||||||
|
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
|
||||||
|
* Gregory Detal <gregory.detal@uclouvain.be>
|
||||||
|
* Fabien Duchêne <fabien.duchene@uclouvain.be>
|
||||||
|
* Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
|
||||||
|
* Lavkesh Lahngir <lavkesh51@gmail.com>
|
||||||
|
* Andreas Ripke <ripke@neclab.eu>
|
||||||
|
* Vlad Dogaru <vlad.dogaru@intel.com>
|
||||||
|
* Octavian Purdila <octavian.purdila@intel.com>
|
||||||
|
* John Ronan <jronan@tssg.org>
|
||||||
|
* Catalin Nicutar <catalin.nicutar@gmail.com>
|
||||||
|
* Brandon Heller <brandonh@stanford.edu>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/export.h>
|
||||||
|
#include <linux/ip.h>
|
||||||
|
#include <linux/list.h>
|
||||||
|
#include <linux/skbuff.h>
|
||||||
|
#include <linux/spinlock.h>
|
||||||
|
#include <linux/tcp.h>
|
||||||
|
|
||||||
|
#include <net/inet_common.h>
|
||||||
|
#include <net/inet_connection_sock.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/mptcp_v4.h>
|
||||||
|
#include <net/request_sock.h>
|
||||||
|
#include <net/tcp.h>
|
||||||
|
|
||||||
|
u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport)
|
||||||
|
{
|
||||||
|
return siphash_4u32((__force u32)saddr, (__force u32)daddr,
|
||||||
|
(__force u32)sport << 16 | (__force u32)dport,
|
||||||
|
mptcp_seed++, &mptcp_secret);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
|
||||||
|
u32 seed)
|
||||||
|
{
|
||||||
|
return siphash_2u64((__force u64)saddr << 32 | (__force u64)daddr,
|
||||||
|
(__force u64)seed << 32 | (__force u64)sport << 16 | (__force u64)dport,
|
||||||
|
&mptcp_secret);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void mptcp_v4_reqsk_destructor(struct request_sock *req)
|
||||||
|
{
|
||||||
|
mptcp_reqsk_destructor(req);
|
||||||
|
|
||||||
|
tcp_v4_reqsk_destructor(req);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int mptcp_v4_init_req(struct request_sock *req, const struct sock *sk,
|
||||||
|
struct sk_buff *skb, bool want_cookie)
|
||||||
|
{
|
||||||
|
tcp_request_sock_ipv4_ops.init_req(req, sk, skb, want_cookie);
|
||||||
|
|
||||||
|
mptcp_rsk(req)->hash_entry.pprev = NULL;
|
||||||
|
mptcp_rsk(req)->is_sub = 0;
|
||||||
|
inet_rsk(req)->mptcp_rqsk = 1;
|
||||||
|
|
||||||
|
/* In case of SYN-cookies, we wait for the isn to be generated - it is
|
||||||
|
* input to the key-generation.
|
||||||
|
*/
|
||||||
|
if (!want_cookie)
|
||||||
|
mptcp_reqsk_init(req, sk, skb, false);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_SYN_COOKIES
|
||||||
|
static u32 mptcp_v4_cookie_init_seq(struct request_sock *req, const struct sock *sk,
|
||||||
|
const struct sk_buff *skb, __u16 *mssp)
|
||||||
|
{
|
||||||
|
__u32 isn = cookie_v4_init_sequence(req, sk, skb, mssp);
|
||||||
|
|
||||||
|
tcp_rsk(req)->snt_isn = isn;
|
||||||
|
|
||||||
|
mptcp_reqsk_init(req, sk, skb, true);
|
||||||
|
|
||||||
|
return isn;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* May be called without holding the meta-level lock */
|
||||||
|
static int mptcp_v4_join_init_req(struct request_sock *req, const struct sock *meta_sk,
|
||||||
|
struct sk_buff *skb, bool want_cookie)
|
||||||
|
{
|
||||||
|
struct mptcp_request_sock *mtreq = mptcp_rsk(req);
|
||||||
|
const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
union inet_addr addr;
|
||||||
|
int loc_id;
|
||||||
|
bool low_prio = false;
|
||||||
|
|
||||||
|
/* We need to do this as early as possible. Because, if we fail later
|
||||||
|
* (e.g., get_local_id), then reqsk_free tries to remove the
|
||||||
|
* request-socket from the htb in mptcp_hash_request_remove as pprev
|
||||||
|
* may be different from NULL.
|
||||||
|
*/
|
||||||
|
mtreq->hash_entry.pprev = NULL;
|
||||||
|
|
||||||
|
tcp_request_sock_ipv4_ops.init_req(req, meta_sk, skb, want_cookie);
|
||||||
|
|
||||||
|
mtreq->mptcp_loc_nonce = mptcp_v4_get_nonce(ip_hdr(skb)->saddr,
|
||||||
|
ip_hdr(skb)->daddr,
|
||||||
|
tcp_hdr(skb)->source,
|
||||||
|
tcp_hdr(skb)->dest);
|
||||||
|
addr.ip = inet_rsk(req)->ir_loc_addr;
|
||||||
|
loc_id = mpcb->pm_ops->get_local_id(meta_sk, AF_INET, &addr, &low_prio);
|
||||||
|
if (loc_id == -1)
|
||||||
|
return -1;
|
||||||
|
mtreq->loc_id = loc_id;
|
||||||
|
mtreq->low_prio = low_prio;
|
||||||
|
|
||||||
|
mptcp_join_reqsk_init(mpcb, req, skb);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Similar to tcp_request_sock_ops */
|
||||||
|
struct request_sock_ops mptcp_request_sock_ops __read_mostly = {
|
||||||
|
.family = PF_INET,
|
||||||
|
.obj_size = sizeof(struct mptcp_request_sock),
|
||||||
|
.rtx_syn_ack = tcp_rtx_synack,
|
||||||
|
.send_ack = tcp_v4_reqsk_send_ack,
|
||||||
|
.destructor = mptcp_v4_reqsk_destructor,
|
||||||
|
.send_reset = tcp_v4_send_reset,
|
||||||
|
.syn_ack_timeout = tcp_syn_ack_timeout,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Similar to: tcp_v4_conn_request
|
||||||
|
* May be called without holding the meta-level lock
|
||||||
|
*/
|
||||||
|
static int mptcp_v4_join_request(struct sock *meta_sk, struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
return tcp_conn_request(&mptcp_request_sock_ops,
|
||||||
|
&mptcp_join_request_sock_ipv4_ops,
|
||||||
|
meta_sk, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Similar to: tcp_v4_do_rcv
|
||||||
|
* We only process join requests here. (either the SYN or the final ACK)
|
||||||
|
*/
|
||||||
|
int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
|
const struct iphdr *iph = ip_hdr(skb);
|
||||||
|
struct sock *child, *rsk = NULL, *sk;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
sk = inet_lookup_established(sock_net(meta_sk), &tcp_hashinfo,
|
||||||
|
iph->saddr, th->source, iph->daddr,
|
||||||
|
th->dest, inet_iif(skb));
|
||||||
|
|
||||||
|
if (!sk)
|
||||||
|
goto new_subflow;
|
||||||
|
|
||||||
|
if (is_meta_sk(sk)) {
|
||||||
|
WARN("%s Did not find a sub-sk - did found the meta!\n", __func__);
|
||||||
|
sock_put(sk);
|
||||||
|
goto discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sk->sk_state == TCP_TIME_WAIT) {
|
||||||
|
inet_twsk_put(inet_twsk(sk));
|
||||||
|
goto discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sk->sk_state == TCP_NEW_SYN_RECV) {
|
||||||
|
struct request_sock *req = inet_reqsk(sk);
|
||||||
|
bool req_stolen;
|
||||||
|
|
||||||
|
if (!mptcp_can_new_subflow(meta_sk))
|
||||||
|
goto reset_and_discard;
|
||||||
|
|
||||||
|
local_bh_disable();
|
||||||
|
child = tcp_check_req(meta_sk, skb, req, false, &req_stolen);
|
||||||
|
if (!child) {
|
||||||
|
reqsk_put(req);
|
||||||
|
local_bh_enable();
|
||||||
|
goto discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (child != meta_sk) {
|
||||||
|
ret = mptcp_finish_handshake(child, skb);
|
||||||
|
if (ret) {
|
||||||
|
rsk = child;
|
||||||
|
local_bh_enable();
|
||||||
|
goto reset_and_discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
local_bh_enable();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* tcp_check_req failed */
|
||||||
|
reqsk_put(req);
|
||||||
|
|
||||||
|
local_bh_enable();
|
||||||
|
goto discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = tcp_v4_do_rcv(sk, skb);
|
||||||
|
sock_put(sk);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
new_subflow:
|
||||||
|
if (!mptcp_can_new_subflow(meta_sk))
|
||||||
|
goto reset_and_discard;
|
||||||
|
|
||||||
|
child = tcp_v4_cookie_check(meta_sk, skb);
|
||||||
|
if (!child)
|
||||||
|
goto discard;
|
||||||
|
|
||||||
|
if (child != meta_sk) {
|
||||||
|
ret = mptcp_finish_handshake(child, skb);
|
||||||
|
if (ret) {
|
||||||
|
rsk = child;
|
||||||
|
goto reset_and_discard;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tcp_hdr(skb)->syn) {
|
||||||
|
local_bh_disable();
|
||||||
|
mptcp_v4_join_request(meta_sk, skb);
|
||||||
|
local_bh_enable();
|
||||||
|
}
|
||||||
|
|
||||||
|
discard:
|
||||||
|
kfree_skb(skb);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
reset_and_discard:
|
||||||
|
tcp_v4_send_reset(rsk, skb);
|
||||||
|
goto discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create a new IPv4 subflow.
|
||||||
|
*
|
||||||
|
* We are in user-context and meta-sock-lock is hold.
|
||||||
|
*/
|
||||||
|
int __mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc,
|
||||||
|
__be16 sport, struct mptcp_rem4 *rem,
|
||||||
|
struct sock **subsk)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp;
|
||||||
|
struct sock *sk;
|
||||||
|
struct sockaddr_in loc_in, rem_in;
|
||||||
|
struct socket_alloc sock_full;
|
||||||
|
struct socket *sock = (struct socket *)&sock_full;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/** First, create and prepare the new socket */
|
||||||
|
memcpy(&sock_full, meta_sk->sk_socket, sizeof(sock_full));
|
||||||
|
sock->state = SS_UNCONNECTED;
|
||||||
|
sock->ops = NULL;
|
||||||
|
|
||||||
|
ret = inet_create(sock_net(meta_sk), sock, IPPROTO_TCP, 1);
|
||||||
|
if (unlikely(ret < 0)) {
|
||||||
|
net_err_ratelimited("%s inet_create failed ret: %d\n",
|
||||||
|
__func__, ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
sk = sock->sk;
|
||||||
|
tp = tcp_sk(sk);
|
||||||
|
|
||||||
|
/* All subsockets need the MPTCP-lock-class */
|
||||||
|
lockdep_set_class_and_name(&(sk)->sk_lock.slock, &meta_slock_key, meta_slock_key_name);
|
||||||
|
lockdep_init_map(&(sk)->sk_lock.dep_map, meta_key_name, &meta_key, 0);
|
||||||
|
|
||||||
|
ret = mptcp_add_sock(meta_sk, sk, loc->loc4_id, rem->rem4_id, GFP_KERNEL);
|
||||||
|
if (ret) {
|
||||||
|
net_err_ratelimited("%s mptcp_add_sock failed ret: %d\n",
|
||||||
|
__func__, ret);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
tp->mptcp->slave_sk = 1;
|
||||||
|
tp->mptcp->low_prio = loc->low_prio;
|
||||||
|
|
||||||
|
/* Initializing the timer for an MPTCP subflow */
|
||||||
|
timer_setup(&tp->mptcp->mptcp_ack_timer, mptcp_ack_handler, 0);
|
||||||
|
|
||||||
|
/** Then, connect the socket to the peer */
|
||||||
|
loc_in.sin_family = AF_INET;
|
||||||
|
rem_in.sin_family = AF_INET;
|
||||||
|
loc_in.sin_port = sport;
|
||||||
|
if (rem->port)
|
||||||
|
rem_in.sin_port = rem->port;
|
||||||
|
else
|
||||||
|
rem_in.sin_port = inet_sk(meta_sk)->inet_dport;
|
||||||
|
loc_in.sin_addr = loc->addr;
|
||||||
|
rem_in.sin_addr = rem->addr;
|
||||||
|
|
||||||
|
if (loc->if_idx)
|
||||||
|
sk->sk_bound_dev_if = loc->if_idx;
|
||||||
|
|
||||||
|
ret = kernel_bind(sock, (struct sockaddr *)&loc_in,
|
||||||
|
sizeof(struct sockaddr_in));
|
||||||
|
if (ret < 0) {
|
||||||
|
net_err_ratelimited("%s: token %#x bind() to %pI4 index %d failed, error %d\n",
|
||||||
|
__func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
|
||||||
|
&loc_in.sin_addr, loc->if_idx, ret);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
mptcp_debug("%s: token %#x pi %d src_addr:%pI4:%d dst_addr:%pI4:%d ifidx: %d\n",
|
||||||
|
__func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
|
||||||
|
tp->mptcp->path_index, &loc_in.sin_addr,
|
||||||
|
ntohs(loc_in.sin_port), &rem_in.sin_addr,
|
||||||
|
ntohs(rem_in.sin_port), loc->if_idx);
|
||||||
|
|
||||||
|
if (tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v4)
|
||||||
|
tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v4(sk, rem->addr);
|
||||||
|
|
||||||
|
ret = kernel_connect(sock, (struct sockaddr *)&rem_in,
|
||||||
|
sizeof(struct sockaddr_in), O_NONBLOCK);
|
||||||
|
if (ret < 0 && ret != -EINPROGRESS) {
|
||||||
|
net_err_ratelimited("%s: MPTCP subsocket connect() failed, error %d\n",
|
||||||
|
__func__, ret);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINSYNTX);
|
||||||
|
|
||||||
|
sk_set_socket(sk, meta_sk->sk_socket);
|
||||||
|
sk->sk_wq = meta_sk->sk_wq;
|
||||||
|
|
||||||
|
if (subsk)
|
||||||
|
*subsk = sk;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
error:
|
||||||
|
/* May happen if mptcp_add_sock fails first */
|
||||||
|
if (!mptcp(tp)) {
|
||||||
|
tcp_close(sk, 0);
|
||||||
|
} else {
|
||||||
|
local_bh_disable();
|
||||||
|
mptcp_sub_force_close(sk);
|
||||||
|
local_bh_enable();
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(__mptcp_init4_subsockets);
|
||||||
|
|
||||||
|
const struct inet_connection_sock_af_ops mptcp_v4_specific = {
|
||||||
|
.queue_xmit = ip_queue_xmit,
|
||||||
|
.send_check = tcp_v4_send_check,
|
||||||
|
.rebuild_header = inet_sk_rebuild_header,
|
||||||
|
.sk_rx_dst_set = inet_sk_rx_dst_set,
|
||||||
|
.conn_request = mptcp_conn_request,
|
||||||
|
.syn_recv_sock = tcp_v4_syn_recv_sock,
|
||||||
|
.net_header_len = sizeof(struct iphdr),
|
||||||
|
.setsockopt = ip_setsockopt,
|
||||||
|
.getsockopt = ip_getsockopt,
|
||||||
|
.addr2sockaddr = inet_csk_addr2sockaddr,
|
||||||
|
.sockaddr_len = sizeof(struct sockaddr_in),
|
||||||
|
#ifdef CONFIG_COMPAT
|
||||||
|
.compat_setsockopt = compat_ip_setsockopt,
|
||||||
|
.compat_getsockopt = compat_ip_getsockopt,
|
||||||
|
#endif
|
||||||
|
.mtu_reduced = tcp_v4_mtu_reduced,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct tcp_request_sock_ops mptcp_request_sock_ipv4_ops;
|
||||||
|
struct tcp_request_sock_ops mptcp_join_request_sock_ipv4_ops;
|
||||||
|
|
||||||
|
/* General initialization of IPv4 for MPTCP */
|
||||||
|
int mptcp_pm_v4_init(void)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
struct request_sock_ops *ops = &mptcp_request_sock_ops;
|
||||||
|
|
||||||
|
mptcp_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
|
||||||
|
mptcp_request_sock_ipv4_ops.init_req = mptcp_v4_init_req;
|
||||||
|
#ifdef CONFIG_SYN_COOKIES
|
||||||
|
mptcp_request_sock_ipv4_ops.cookie_init_seq = mptcp_v4_cookie_init_seq;
|
||||||
|
#endif
|
||||||
|
mptcp_join_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
|
||||||
|
mptcp_join_request_sock_ipv4_ops.init_req = mptcp_v4_join_init_req;
|
||||||
|
|
||||||
|
ops->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", "MPTCP");
|
||||||
|
if (ops->slab_name == NULL) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ops->slab = kmem_cache_create(ops->slab_name, ops->obj_size, 0,
|
||||||
|
SLAB_TYPESAFE_BY_RCU|SLAB_HWCACHE_ALIGN,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if (ops->slab == NULL) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto err_reqsk_create;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
err_reqsk_create:
|
||||||
|
kfree(ops->slab_name);
|
||||||
|
ops->slab_name = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mptcp_pm_v4_undo(void)
|
||||||
|
{
|
||||||
|
kmem_cache_destroy(mptcp_request_sock_ops.slab);
|
||||||
|
kfree(mptcp_request_sock_ops.slab_name);
|
||||||
|
}
|
475
net/mptcp/mptcp_ipv6.c
Executable file
475
net/mptcp/mptcp_ipv6.c
Executable file
@ -0,0 +1,475 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP implementation - IPv6-specific functions
|
||||||
|
*
|
||||||
|
* Initial Design & Implementation:
|
||||||
|
* Sébastien Barré <sebastien.barre@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Current Maintainer:
|
||||||
|
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
|
||||||
|
*
|
||||||
|
* Additional authors:
|
||||||
|
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
|
||||||
|
* Gregory Detal <gregory.detal@uclouvain.be>
|
||||||
|
* Fabien Duchêne <fabien.duchene@uclouvain.be>
|
||||||
|
* Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
|
||||||
|
* Lavkesh Lahngir <lavkesh51@gmail.com>
|
||||||
|
* Andreas Ripke <ripke@neclab.eu>
|
||||||
|
* Vlad Dogaru <vlad.dogaru@intel.com>
|
||||||
|
* Octavian Purdila <octavian.purdila@intel.com>
|
||||||
|
* John Ronan <jronan@tssg.org>
|
||||||
|
* Catalin Nicutar <catalin.nicutar@gmail.com>
|
||||||
|
* Brandon Heller <brandonh@stanford.edu>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/export.h>
|
||||||
|
#include <linux/in6.h>
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
|
||||||
|
#include <net/addrconf.h>
|
||||||
|
#include <net/flow.h>
|
||||||
|
#include <net/inet6_connection_sock.h>
|
||||||
|
#include <net/inet6_hashtables.h>
|
||||||
|
#include <net/inet_common.h>
|
||||||
|
#include <net/ipv6.h>
|
||||||
|
#include <net/ip6_checksum.h>
|
||||||
|
#include <net/ip6_route.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/mptcp_v6.h>
|
||||||
|
#include <net/tcp.h>
|
||||||
|
#include <net/transp_v6.h>
|
||||||
|
|
||||||
|
__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr,
|
||||||
|
__be16 sport, __be16 dport)
|
||||||
|
{
|
||||||
|
const struct {
|
||||||
|
struct in6_addr saddr;
|
||||||
|
struct in6_addr daddr;
|
||||||
|
u32 seed;
|
||||||
|
__be16 sport;
|
||||||
|
__be16 dport;
|
||||||
|
} __aligned(SIPHASH_ALIGNMENT) combined = {
|
||||||
|
.saddr = *(struct in6_addr *)saddr,
|
||||||
|
.daddr = *(struct in6_addr *)daddr,
|
||||||
|
.seed = mptcp_seed++,
|
||||||
|
.sport = sport,
|
||||||
|
.dport = dport
|
||||||
|
};
|
||||||
|
|
||||||
|
return siphash(&combined, offsetofend(typeof(combined), dport),
|
||||||
|
&mptcp_secret);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr,
|
||||||
|
__be16 sport, __be16 dport, u32 seed)
|
||||||
|
{
|
||||||
|
const struct {
|
||||||
|
struct in6_addr saddr;
|
||||||
|
struct in6_addr daddr;
|
||||||
|
u32 seed;
|
||||||
|
__be16 sport;
|
||||||
|
__be16 dport;
|
||||||
|
} __aligned(SIPHASH_ALIGNMENT) combined = {
|
||||||
|
.saddr = *(struct in6_addr *)saddr,
|
||||||
|
.daddr = *(struct in6_addr *)daddr,
|
||||||
|
.seed = seed,
|
||||||
|
.sport = sport,
|
||||||
|
.dport = dport
|
||||||
|
};
|
||||||
|
|
||||||
|
return siphash(&combined, offsetofend(typeof(combined), dport),
|
||||||
|
&mptcp_secret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_v6_reqsk_destructor(struct request_sock *req)
|
||||||
|
{
|
||||||
|
mptcp_reqsk_destructor(req);
|
||||||
|
|
||||||
|
tcp_v6_reqsk_destructor(req);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int mptcp_v6_init_req(struct request_sock *req, const struct sock *sk,
|
||||||
|
struct sk_buff *skb, bool want_cookie)
|
||||||
|
{
|
||||||
|
tcp_request_sock_ipv6_ops.init_req(req, sk, skb, want_cookie);
|
||||||
|
|
||||||
|
mptcp_rsk(req)->hash_entry.pprev = NULL;
|
||||||
|
mptcp_rsk(req)->is_sub = 0;
|
||||||
|
inet_rsk(req)->mptcp_rqsk = 1;
|
||||||
|
|
||||||
|
/* In case of SYN-cookies, we wait for the isn to be generated - it is
|
||||||
|
* input to the key-generation.
|
||||||
|
*/
|
||||||
|
if (!want_cookie)
|
||||||
|
mptcp_reqsk_init(req, sk, skb, false);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_SYN_COOKIES
|
||||||
|
static u32 mptcp_v6_cookie_init_seq(struct request_sock *req, const struct sock *sk,
|
||||||
|
const struct sk_buff *skb, __u16 *mssp)
|
||||||
|
{
|
||||||
|
__u32 isn = cookie_v6_init_sequence(req, sk, skb, mssp);
|
||||||
|
|
||||||
|
tcp_rsk(req)->snt_isn = isn;
|
||||||
|
|
||||||
|
mptcp_reqsk_init(req, sk, skb, true);
|
||||||
|
|
||||||
|
return isn;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* May be called without holding the meta-level lock */
|
||||||
|
static int mptcp_v6_join_init_req(struct request_sock *req, const struct sock *meta_sk,
|
||||||
|
struct sk_buff *skb, bool want_cookie)
|
||||||
|
{
|
||||||
|
struct mptcp_request_sock *mtreq = mptcp_rsk(req);
|
||||||
|
const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
union inet_addr addr;
|
||||||
|
int loc_id;
|
||||||
|
bool low_prio = false;
|
||||||
|
|
||||||
|
/* We need to do this as early as possible. Because, if we fail later
|
||||||
|
* (e.g., get_local_id), then reqsk_free tries to remove the
|
||||||
|
* request-socket from the htb in mptcp_hash_request_remove as pprev
|
||||||
|
* may be different from NULL.
|
||||||
|
*/
|
||||||
|
mtreq->hash_entry.pprev = NULL;
|
||||||
|
|
||||||
|
tcp_request_sock_ipv6_ops.init_req(req, meta_sk, skb, want_cookie);
|
||||||
|
|
||||||
|
mtreq->mptcp_loc_nonce = mptcp_v6_get_nonce(ipv6_hdr(skb)->saddr.s6_addr32,
|
||||||
|
ipv6_hdr(skb)->daddr.s6_addr32,
|
||||||
|
tcp_hdr(skb)->source,
|
||||||
|
tcp_hdr(skb)->dest);
|
||||||
|
addr.in6 = inet_rsk(req)->ir_v6_loc_addr;
|
||||||
|
loc_id = mpcb->pm_ops->get_local_id(meta_sk, AF_INET6, &addr, &low_prio);
|
||||||
|
if (loc_id == -1)
|
||||||
|
return -1;
|
||||||
|
mtreq->loc_id = loc_id;
|
||||||
|
mtreq->low_prio = low_prio;
|
||||||
|
|
||||||
|
mptcp_join_reqsk_init(mpcb, req, skb);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Similar to tcp6_request_sock_ops */
|
||||||
|
struct request_sock_ops mptcp6_request_sock_ops __read_mostly = {
|
||||||
|
.family = AF_INET6,
|
||||||
|
.obj_size = sizeof(struct mptcp_request_sock),
|
||||||
|
.rtx_syn_ack = tcp_rtx_synack,
|
||||||
|
.send_ack = tcp_v6_reqsk_send_ack,
|
||||||
|
.destructor = mptcp_v6_reqsk_destructor,
|
||||||
|
.send_reset = tcp_v6_send_reset,
|
||||||
|
.syn_ack_timeout = tcp_syn_ack_timeout,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Similar to: tcp_v6_conn_request
|
||||||
|
* May be called without holding the meta-level lock
|
||||||
|
*/
|
||||||
|
static int mptcp_v6_join_request(struct sock *meta_sk, struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
return tcp_conn_request(&mptcp6_request_sock_ops,
|
||||||
|
&mptcp_join_request_sock_ipv6_ops,
|
||||||
|
meta_sk, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
|
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
|
||||||
|
struct sock *child, *rsk = NULL, *sk;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
sk = __inet6_lookup_established(sock_net(meta_sk),
|
||||||
|
&tcp_hashinfo,
|
||||||
|
&ip6h->saddr, th->source,
|
||||||
|
&ip6h->daddr, ntohs(th->dest),
|
||||||
|
tcp_v6_iif(skb), tcp_v6_sdif(skb));
|
||||||
|
|
||||||
|
if (!sk)
|
||||||
|
goto new_subflow;
|
||||||
|
|
||||||
|
if (is_meta_sk(sk)) {
|
||||||
|
WARN("%s Did not find a sub-sk - did found the meta!\n", __func__);
|
||||||
|
sock_put(sk);
|
||||||
|
goto discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sk->sk_state == TCP_TIME_WAIT) {
|
||||||
|
inet_twsk_put(inet_twsk(sk));
|
||||||
|
goto discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sk->sk_state == TCP_NEW_SYN_RECV) {
|
||||||
|
struct request_sock *req = inet_reqsk(sk);
|
||||||
|
bool req_stolen;
|
||||||
|
|
||||||
|
if (!mptcp_can_new_subflow(meta_sk))
|
||||||
|
goto reset_and_discard;
|
||||||
|
|
||||||
|
local_bh_disable();
|
||||||
|
child = tcp_check_req(meta_sk, skb, req, false, &req_stolen);
|
||||||
|
if (!child) {
|
||||||
|
reqsk_put(req);
|
||||||
|
local_bh_enable();
|
||||||
|
goto discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (child != meta_sk) {
|
||||||
|
ret = mptcp_finish_handshake(child, skb);
|
||||||
|
if (ret) {
|
||||||
|
rsk = child;
|
||||||
|
local_bh_enable();
|
||||||
|
goto reset_and_discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
local_bh_enable();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* tcp_check_req failed */
|
||||||
|
reqsk_put(req);
|
||||||
|
|
||||||
|
local_bh_enable();
|
||||||
|
goto discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = tcp_v6_do_rcv(sk, skb);
|
||||||
|
sock_put(sk);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
new_subflow:
|
||||||
|
if (!mptcp_can_new_subflow(meta_sk))
|
||||||
|
goto reset_and_discard;
|
||||||
|
|
||||||
|
child = tcp_v6_cookie_check(meta_sk, skb);
|
||||||
|
if (!child)
|
||||||
|
goto discard;
|
||||||
|
|
||||||
|
if (child != meta_sk) {
|
||||||
|
ret = mptcp_finish_handshake(child, skb);
|
||||||
|
if (ret) {
|
||||||
|
rsk = child;
|
||||||
|
goto reset_and_discard;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tcp_hdr(skb)->syn) {
|
||||||
|
local_bh_disable();
|
||||||
|
mptcp_v6_join_request(meta_sk, skb);
|
||||||
|
local_bh_enable();
|
||||||
|
}
|
||||||
|
|
||||||
|
discard:
|
||||||
|
kfree_skb(skb);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
reset_and_discard:
|
||||||
|
tcp_v6_send_reset(rsk, skb);
|
||||||
|
goto discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create a new IPv6 subflow.
|
||||||
|
*
|
||||||
|
* We are in user-context and meta-sock-lock is hold.
|
||||||
|
*/
|
||||||
|
int __mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc,
|
||||||
|
__be16 sport, struct mptcp_rem6 *rem,
|
||||||
|
struct sock **subsk)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp;
|
||||||
|
struct sock *sk;
|
||||||
|
struct sockaddr_in6 loc_in, rem_in;
|
||||||
|
struct socket_alloc sock_full;
|
||||||
|
struct socket *sock = (struct socket *)&sock_full;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/** First, create and prepare the new socket */
|
||||||
|
memcpy(&sock_full, meta_sk->sk_socket, sizeof(sock_full));
|
||||||
|
sock->state = SS_UNCONNECTED;
|
||||||
|
sock->ops = NULL;
|
||||||
|
|
||||||
|
ret = inet6_create(sock_net(meta_sk), sock, IPPROTO_TCP, 1);
|
||||||
|
if (unlikely(ret < 0)) {
|
||||||
|
net_err_ratelimited("%s inet6_create failed ret: %d\n",
|
||||||
|
__func__, ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
sk = sock->sk;
|
||||||
|
tp = tcp_sk(sk);
|
||||||
|
|
||||||
|
/* All subsockets need the MPTCP-lock-class */
|
||||||
|
lockdep_set_class_and_name(&(sk)->sk_lock.slock, &meta_slock_key, meta_slock_key_name);
|
||||||
|
lockdep_init_map(&(sk)->sk_lock.dep_map, meta_key_name, &meta_key, 0);
|
||||||
|
|
||||||
|
ret = mptcp_add_sock(meta_sk, sk, loc->loc6_id, rem->rem6_id, GFP_KERNEL);
|
||||||
|
if (ret) {
|
||||||
|
net_err_ratelimited("%s mptcp_add_sock failed ret: %d\n",
|
||||||
|
__func__, ret);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
tp->mptcp->slave_sk = 1;
|
||||||
|
tp->mptcp->low_prio = loc->low_prio;
|
||||||
|
|
||||||
|
/* Initializing the timer for an MPTCP subflow */
|
||||||
|
timer_setup(&tp->mptcp->mptcp_ack_timer, mptcp_ack_handler, 0);
|
||||||
|
|
||||||
|
/** Then, connect the socket to the peer */
|
||||||
|
loc_in.sin6_family = AF_INET6;
|
||||||
|
rem_in.sin6_family = AF_INET6;
|
||||||
|
loc_in.sin6_port = sport;
|
||||||
|
if (rem->port)
|
||||||
|
rem_in.sin6_port = rem->port;
|
||||||
|
else
|
||||||
|
rem_in.sin6_port = inet_sk(meta_sk)->inet_dport;
|
||||||
|
loc_in.sin6_addr = loc->addr;
|
||||||
|
rem_in.sin6_addr = rem->addr;
|
||||||
|
|
||||||
|
if (loc->if_idx)
|
||||||
|
sk->sk_bound_dev_if = loc->if_idx;
|
||||||
|
|
||||||
|
ret = kernel_bind(sock, (struct sockaddr *)&loc_in,
|
||||||
|
sizeof(struct sockaddr_in6));
|
||||||
|
if (ret < 0) {
|
||||||
|
net_err_ratelimited("%s: token %#x bind() to %pI6 index %d failed, error %d\n",
|
||||||
|
__func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
|
||||||
|
&loc_in.sin6_addr, loc->if_idx, ret);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
mptcp_debug("%s: token %#x pi %d src_addr:%pI6:%d dst_addr:%pI6:%d ifidx: %u\n",
|
||||||
|
__func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
|
||||||
|
tp->mptcp->path_index, &loc_in.sin6_addr,
|
||||||
|
ntohs(loc_in.sin6_port), &rem_in.sin6_addr,
|
||||||
|
ntohs(rem_in.sin6_port), loc->if_idx);
|
||||||
|
|
||||||
|
if (tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v6)
|
||||||
|
tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v6(sk, rem->addr);
|
||||||
|
|
||||||
|
ret = kernel_connect(sock, (struct sockaddr *)&rem_in,
|
||||||
|
sizeof(struct sockaddr_in6), O_NONBLOCK);
|
||||||
|
if (ret < 0 && ret != -EINPROGRESS) {
|
||||||
|
net_err_ratelimited("%s: MPTCP subsocket connect() failed, error %d\n",
|
||||||
|
__func__, ret);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINSYNTX);
|
||||||
|
|
||||||
|
sk_set_socket(sk, meta_sk->sk_socket);
|
||||||
|
sk->sk_wq = meta_sk->sk_wq;
|
||||||
|
|
||||||
|
if (subsk)
|
||||||
|
*subsk = sk;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
error:
|
||||||
|
/* May happen if mptcp_add_sock fails first */
|
||||||
|
if (!mptcp(tp)) {
|
||||||
|
tcp_close(sk, 0);
|
||||||
|
} else {
|
||||||
|
local_bh_disable();
|
||||||
|
mptcp_sub_force_close(sk);
|
||||||
|
local_bh_enable();
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(__mptcp_init6_subsockets);
|
||||||
|
|
||||||
|
const struct inet_connection_sock_af_ops mptcp_v6_specific = {
|
||||||
|
.queue_xmit = inet6_csk_xmit,
|
||||||
|
.send_check = tcp_v6_send_check,
|
||||||
|
.rebuild_header = inet6_sk_rebuild_header,
|
||||||
|
.sk_rx_dst_set = inet6_sk_rx_dst_set,
|
||||||
|
.conn_request = mptcp_conn_request,
|
||||||
|
.syn_recv_sock = tcp_v6_syn_recv_sock,
|
||||||
|
.net_header_len = sizeof(struct ipv6hdr),
|
||||||
|
.net_frag_header_len = sizeof(struct frag_hdr),
|
||||||
|
.setsockopt = ipv6_setsockopt,
|
||||||
|
.getsockopt = ipv6_getsockopt,
|
||||||
|
.addr2sockaddr = inet6_csk_addr2sockaddr,
|
||||||
|
.sockaddr_len = sizeof(struct sockaddr_in6),
|
||||||
|
#ifdef CONFIG_COMPAT
|
||||||
|
.compat_setsockopt = compat_ipv6_setsockopt,
|
||||||
|
.compat_getsockopt = compat_ipv6_getsockopt,
|
||||||
|
#endif
|
||||||
|
.mtu_reduced = tcp_v6_mtu_reduced,
|
||||||
|
};
|
||||||
|
|
||||||
|
const struct inet_connection_sock_af_ops mptcp_v6_mapped = {
|
||||||
|
.queue_xmit = ip_queue_xmit,
|
||||||
|
.send_check = tcp_v4_send_check,
|
||||||
|
.rebuild_header = inet_sk_rebuild_header,
|
||||||
|
.sk_rx_dst_set = inet_sk_rx_dst_set,
|
||||||
|
.conn_request = mptcp_conn_request,
|
||||||
|
.syn_recv_sock = tcp_v6_syn_recv_sock,
|
||||||
|
.net_header_len = sizeof(struct iphdr),
|
||||||
|
.setsockopt = ipv6_setsockopt,
|
||||||
|
.getsockopt = ipv6_getsockopt,
|
||||||
|
.addr2sockaddr = inet6_csk_addr2sockaddr,
|
||||||
|
.sockaddr_len = sizeof(struct sockaddr_in6),
|
||||||
|
#ifdef CONFIG_COMPAT
|
||||||
|
.compat_setsockopt = compat_ipv6_setsockopt,
|
||||||
|
.compat_getsockopt = compat_ipv6_getsockopt,
|
||||||
|
#endif
|
||||||
|
.mtu_reduced = tcp_v4_mtu_reduced,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct tcp_request_sock_ops mptcp_request_sock_ipv6_ops;
|
||||||
|
struct tcp_request_sock_ops mptcp_join_request_sock_ipv6_ops;
|
||||||
|
|
||||||
|
int mptcp_pm_v6_init(void)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
struct request_sock_ops *ops = &mptcp6_request_sock_ops;
|
||||||
|
|
||||||
|
mptcp_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
|
||||||
|
mptcp_request_sock_ipv6_ops.init_req = mptcp_v6_init_req;
|
||||||
|
#ifdef CONFIG_SYN_COOKIES
|
||||||
|
mptcp_request_sock_ipv6_ops.cookie_init_seq = mptcp_v6_cookie_init_seq;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
mptcp_join_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
|
||||||
|
mptcp_join_request_sock_ipv6_ops.init_req = mptcp_v6_join_init_req;
|
||||||
|
|
||||||
|
ops->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", "MPTCP6");
|
||||||
|
if (ops->slab_name == NULL) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ops->slab = kmem_cache_create(ops->slab_name, ops->obj_size, 0,
|
||||||
|
SLAB_TYPESAFE_BY_RCU|SLAB_HWCACHE_ALIGN,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if (ops->slab == NULL) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto err_reqsk_create;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
err_reqsk_create:
|
||||||
|
kfree(ops->slab_name);
|
||||||
|
ops->slab_name = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mptcp_pm_v6_undo(void)
|
||||||
|
{
|
||||||
|
kmem_cache_destroy(mptcp6_request_sock_ops.slab);
|
||||||
|
kfree(mptcp6_request_sock_ops.slab_name);
|
||||||
|
}
|
174
net/mptcp/mptcp_ndiffports.c
Executable file
174
net/mptcp/mptcp_ndiffports.c
Executable file
@ -0,0 +1,174 @@
|
|||||||
|
#include <linux/module.h>
|
||||||
|
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <net/mptcp_v4.h>
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
|
#include <net/mptcp_v6.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct ndiffports_priv {
|
||||||
|
/* Worker struct for subflow establishment */
|
||||||
|
struct work_struct subflow_work;
|
||||||
|
|
||||||
|
struct mptcp_cb *mpcb;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int num_subflows __read_mostly = 2;
|
||||||
|
module_param(num_subflows, int, 0644);
|
||||||
|
MODULE_PARM_DESC(num_subflows, "choose the number of subflows per MPTCP connection");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create all new subflows, by doing calls to mptcp_initX_subsockets
|
||||||
|
*
|
||||||
|
* This function uses a goto next_subflow, to allow releasing the lock between
|
||||||
|
* new subflows and giving other processes a chance to do some work on the
|
||||||
|
* socket and potentially finishing the communication.
|
||||||
|
**/
|
||||||
|
static void create_subflow_worker(struct work_struct *work)
|
||||||
|
{
|
||||||
|
const struct ndiffports_priv *pm_priv = container_of(work,
|
||||||
|
struct ndiffports_priv,
|
||||||
|
subflow_work);
|
||||||
|
struct mptcp_cb *mpcb = pm_priv->mpcb;
|
||||||
|
struct sock *meta_sk = mpcb->meta_sk;
|
||||||
|
int iter = 0;
|
||||||
|
|
||||||
|
next_subflow:
|
||||||
|
if (iter) {
|
||||||
|
release_sock(meta_sk);
|
||||||
|
mutex_unlock(&mpcb->mpcb_mutex);
|
||||||
|
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
mutex_lock(&mpcb->mpcb_mutex);
|
||||||
|
lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
|
||||||
|
|
||||||
|
if (!mptcp(tcp_sk(meta_sk)))
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
iter++;
|
||||||
|
|
||||||
|
if (sock_flag(meta_sk, SOCK_DEAD))
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
if (mpcb->master_sk &&
|
||||||
|
!tcp_sk(mpcb->master_sk)->mptcp->fully_established)
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
if (num_subflows > iter && num_subflows > mptcp_subflow_count(mpcb)) {
|
||||||
|
if (meta_sk->sk_family == AF_INET ||
|
||||||
|
mptcp_v6_is_v4_mapped(meta_sk)) {
|
||||||
|
struct mptcp_loc4 loc;
|
||||||
|
struct mptcp_rem4 rem;
|
||||||
|
|
||||||
|
loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr;
|
||||||
|
loc.loc4_id = 0;
|
||||||
|
loc.low_prio = 0;
|
||||||
|
if (mpcb->master_sk)
|
||||||
|
loc.if_idx = mpcb->master_sk->sk_bound_dev_if;
|
||||||
|
else
|
||||||
|
loc.if_idx = 0;
|
||||||
|
|
||||||
|
rem.addr.s_addr = inet_sk(meta_sk)->inet_daddr;
|
||||||
|
rem.port = inet_sk(meta_sk)->inet_dport;
|
||||||
|
rem.rem4_id = 0; /* Default 0 */
|
||||||
|
|
||||||
|
mptcp_init4_subsockets(meta_sk, &loc, &rem);
|
||||||
|
} else {
|
||||||
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
|
struct mptcp_loc6 loc;
|
||||||
|
struct mptcp_rem6 rem;
|
||||||
|
|
||||||
|
loc.addr = inet6_sk(meta_sk)->saddr;
|
||||||
|
loc.loc6_id = 0;
|
||||||
|
loc.low_prio = 0;
|
||||||
|
if (mpcb->master_sk)
|
||||||
|
loc.if_idx = mpcb->master_sk->sk_bound_dev_if;
|
||||||
|
else
|
||||||
|
loc.if_idx = 0;
|
||||||
|
|
||||||
|
rem.addr = meta_sk->sk_v6_daddr;
|
||||||
|
rem.port = inet_sk(meta_sk)->inet_dport;
|
||||||
|
rem.rem6_id = 0; /* Default 0 */
|
||||||
|
|
||||||
|
mptcp_init6_subsockets(meta_sk, &loc, &rem);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
goto next_subflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
exit:
|
||||||
|
release_sock(meta_sk);
|
||||||
|
mutex_unlock(&mpcb->mpcb_mutex);
|
||||||
|
mptcp_mpcb_put(mpcb);
|
||||||
|
sock_put(meta_sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ndiffports_new_session(const struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct ndiffports_priv *fmp = (struct ndiffports_priv *)&mpcb->mptcp_pm[0];
|
||||||
|
|
||||||
|
/* Initialize workqueue-struct */
|
||||||
|
INIT_WORK(&fmp->subflow_work, create_subflow_worker);
|
||||||
|
fmp->mpcb = mpcb;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ndiffports_create_subflows(struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct ndiffports_priv *pm_priv = (struct ndiffports_priv *)&mpcb->mptcp_pm[0];
|
||||||
|
|
||||||
|
if (mptcp_in_infinite_mapping_weak(mpcb) ||
|
||||||
|
mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!work_pending(&pm_priv->subflow_work)) {
|
||||||
|
sock_hold(meta_sk);
|
||||||
|
refcount_inc(&mpcb->mpcb_refcnt);
|
||||||
|
queue_work(mptcp_wq, &pm_priv->subflow_work);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ndiffports_get_local_id(const struct sock *meta_sk,
|
||||||
|
sa_family_t family, union inet_addr *addr,
|
||||||
|
bool *low_prio)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct mptcp_pm_ops ndiffports __read_mostly = {
|
||||||
|
.new_session = ndiffports_new_session,
|
||||||
|
.fully_established = ndiffports_create_subflows,
|
||||||
|
.get_local_id = ndiffports_get_local_id,
|
||||||
|
.name = "ndiffports",
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* General initialization of MPTCP_PM */
|
||||||
|
static int __init ndiffports_register(void)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(struct ndiffports_priv) > MPTCP_PM_SIZE);
|
||||||
|
|
||||||
|
if (mptcp_register_path_manager(&ndiffports))
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
exit:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ndiffports_unregister(void)
|
||||||
|
{
|
||||||
|
mptcp_unregister_path_manager(&ndiffports);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(ndiffports_register);
|
||||||
|
module_exit(ndiffports_unregister);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Christoph Paasch");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("NDIFF-PORTS MPTCP");
|
||||||
|
MODULE_VERSION("0.88");
|
1277
net/mptcp/mptcp_netlink.c
Executable file
1277
net/mptcp/mptcp_netlink.c
Executable file
File diff suppressed because it is too large
Load Diff
318
net/mptcp/mptcp_olia.c
Executable file
318
net/mptcp/mptcp_olia.c
Executable file
@ -0,0 +1,318 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP implementation - OPPORTUNISTIC LINKED INCREASES CONGESTION CONTROL:
|
||||||
|
*
|
||||||
|
* Algorithm design:
|
||||||
|
* Ramin Khalili <ramin.khalili@epfl.ch>
|
||||||
|
* Nicolas Gast <nicolas.gast@epfl.ch>
|
||||||
|
* Jean-Yves Le Boudec <jean-yves.leboudec@epfl.ch>
|
||||||
|
*
|
||||||
|
* Implementation:
|
||||||
|
* Ramin Khalili <ramin.khalili@epfl.ch>
|
||||||
|
*
|
||||||
|
* Ported to the official MPTCP-kernel:
|
||||||
|
* Christoph Paasch <christoph.paasch@uclouvain.be>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <net/tcp.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
|
||||||
|
static int scale = 10;
|
||||||
|
|
||||||
|
struct mptcp_olia {
|
||||||
|
u32 mptcp_loss1;
|
||||||
|
u32 mptcp_loss2;
|
||||||
|
u32 mptcp_loss3;
|
||||||
|
int epsilon_num;
|
||||||
|
u32 epsilon_den;
|
||||||
|
int mptcp_snd_cwnd_cnt;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline int mptcp_olia_sk_can_send(const struct sock *sk)
|
||||||
|
{
|
||||||
|
return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 mptcp_olia_scale(u64 val, int scale)
|
||||||
|
{
|
||||||
|
return (u64) val << scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* take care of artificially inflate (see RFC5681)
|
||||||
|
* of cwnd during fast-retransmit phase
|
||||||
|
*/
|
||||||
|
static u32 mptcp_get_crt_cwnd(struct sock *sk)
|
||||||
|
{
|
||||||
|
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
|
|
||||||
|
if (icsk->icsk_ca_state == TCP_CA_Recovery)
|
||||||
|
return tcp_sk(sk)->snd_ssthresh;
|
||||||
|
else
|
||||||
|
return tcp_sk(sk)->snd_cwnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* return the dominator of the first term of the increasing term */
|
||||||
|
static u64 mptcp_get_rate(const struct mptcp_cb *mpcb , u32 path_rtt)
|
||||||
|
{
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
u64 rate = 1; /* We have to avoid a zero-rate because it is used as a divisor */
|
||||||
|
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct sock *sk = mptcp_to_sock(mptcp);
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
u64 scaled_num;
|
||||||
|
u32 tmp_cwnd;
|
||||||
|
|
||||||
|
if (!mptcp_olia_sk_can_send(sk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
tmp_cwnd = mptcp_get_crt_cwnd(sk);
|
||||||
|
scaled_num = mptcp_olia_scale(tmp_cwnd, scale) * path_rtt;
|
||||||
|
rate += div_u64(scaled_num , tp->srtt_us);
|
||||||
|
}
|
||||||
|
rate *= rate;
|
||||||
|
return rate;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* find the maximum cwnd, used to find set M */
|
||||||
|
static u32 mptcp_get_max_cwnd(const struct mptcp_cb *mpcb)
|
||||||
|
{
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
u32 best_cwnd = 0;
|
||||||
|
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct sock *sk = mptcp_to_sock(mptcp);
|
||||||
|
u32 tmp_cwnd;
|
||||||
|
|
||||||
|
if (!mptcp_olia_sk_can_send(sk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
tmp_cwnd = mptcp_get_crt_cwnd(sk);
|
||||||
|
if (tmp_cwnd > best_cwnd)
|
||||||
|
best_cwnd = tmp_cwnd;
|
||||||
|
}
|
||||||
|
return best_cwnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_get_epsilon(const struct mptcp_cb *mpcb)
|
||||||
|
{
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
struct mptcp_olia *ca;
|
||||||
|
struct tcp_sock *tp;
|
||||||
|
struct sock *sk;
|
||||||
|
u64 tmp_int, tmp_rtt, best_int = 0, best_rtt = 1;
|
||||||
|
u32 max_cwnd, tmp_cwnd, established_cnt = 0;
|
||||||
|
u8 M = 0, B_not_M = 0;
|
||||||
|
|
||||||
|
/* TODO - integrate this in the following loop - we just want to iterate once */
|
||||||
|
|
||||||
|
max_cwnd = mptcp_get_max_cwnd(mpcb);
|
||||||
|
|
||||||
|
/* find the best path */
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
sk = mptcp_to_sock(mptcp);
|
||||||
|
tp = tcp_sk(sk);
|
||||||
|
ca = inet_csk_ca(sk);
|
||||||
|
|
||||||
|
if (!mptcp_olia_sk_can_send(sk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
established_cnt++;
|
||||||
|
|
||||||
|
tmp_rtt = (u64)tp->srtt_us * tp->srtt_us;
|
||||||
|
/* TODO - check here and rename variables */
|
||||||
|
tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
|
||||||
|
ca->mptcp_loss2 - ca->mptcp_loss1);
|
||||||
|
|
||||||
|
if ((u64)tmp_int * best_rtt >= (u64)best_int * tmp_rtt) {
|
||||||
|
best_rtt = tmp_rtt;
|
||||||
|
best_int = tmp_int;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO - integrate this here in mptcp_get_max_cwnd and in the previous loop */
|
||||||
|
/* find the size of M and B_not_M */
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
sk = mptcp_to_sock(mptcp);
|
||||||
|
tp = tcp_sk(sk);
|
||||||
|
ca = inet_csk_ca(sk);
|
||||||
|
|
||||||
|
if (!mptcp_olia_sk_can_send(sk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
tmp_cwnd = mptcp_get_crt_cwnd(sk);
|
||||||
|
if (tmp_cwnd == max_cwnd) {
|
||||||
|
M++;
|
||||||
|
} else {
|
||||||
|
tmp_rtt = (u64)tp->srtt_us * tp->srtt_us;
|
||||||
|
tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
|
||||||
|
ca->mptcp_loss2 - ca->mptcp_loss1);
|
||||||
|
|
||||||
|
if ((u64)tmp_int * best_rtt == (u64)best_int * tmp_rtt)
|
||||||
|
B_not_M++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check if the path is in M or B_not_M and set the value of epsilon accordingly */
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
sk = mptcp_to_sock(mptcp);
|
||||||
|
tp = tcp_sk(sk);
|
||||||
|
ca = inet_csk_ca(sk);
|
||||||
|
|
||||||
|
if (!mptcp_olia_sk_can_send(sk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (B_not_M == 0) {
|
||||||
|
ca->epsilon_num = 0;
|
||||||
|
ca->epsilon_den = 1;
|
||||||
|
} else {
|
||||||
|
tmp_rtt = (u64)tp->srtt_us * tp->srtt_us;
|
||||||
|
tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
|
||||||
|
ca->mptcp_loss2 - ca->mptcp_loss1);
|
||||||
|
tmp_cwnd = mptcp_get_crt_cwnd(sk);
|
||||||
|
|
||||||
|
if (tmp_cwnd < max_cwnd &&
|
||||||
|
(u64)tmp_int * best_rtt == (u64)best_int * tmp_rtt) {
|
||||||
|
ca->epsilon_num = 1;
|
||||||
|
ca->epsilon_den = established_cnt * B_not_M;
|
||||||
|
} else if (tmp_cwnd == max_cwnd) {
|
||||||
|
ca->epsilon_num = -1;
|
||||||
|
ca->epsilon_den = established_cnt * M;
|
||||||
|
} else {
|
||||||
|
ca->epsilon_num = 0;
|
||||||
|
ca->epsilon_den = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* setting the initial values */
|
||||||
|
static void mptcp_olia_init(struct sock *sk)
|
||||||
|
{
|
||||||
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct mptcp_olia *ca = inet_csk_ca(sk);
|
||||||
|
|
||||||
|
if (mptcp(tp)) {
|
||||||
|
ca->mptcp_loss1 = tp->snd_una;
|
||||||
|
ca->mptcp_loss2 = tp->snd_una;
|
||||||
|
ca->mptcp_loss3 = tp->snd_una;
|
||||||
|
ca->mptcp_snd_cwnd_cnt = 0;
|
||||||
|
ca->epsilon_num = 0;
|
||||||
|
ca->epsilon_den = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* updating inter-loss distance and ssthresh */
|
||||||
|
static void mptcp_olia_set_state(struct sock *sk, u8 new_state)
|
||||||
|
{
|
||||||
|
if (!mptcp(tcp_sk(sk)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (new_state == TCP_CA_Loss ||
|
||||||
|
new_state == TCP_CA_Recovery || new_state == TCP_CA_CWR) {
|
||||||
|
struct mptcp_olia *ca = inet_csk_ca(sk);
|
||||||
|
|
||||||
|
if (ca->mptcp_loss3 != ca->mptcp_loss2 &&
|
||||||
|
!inet_csk(sk)->icsk_retransmits) {
|
||||||
|
ca->mptcp_loss1 = ca->mptcp_loss2;
|
||||||
|
ca->mptcp_loss2 = ca->mptcp_loss3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* main algorithm */
|
||||||
|
static void mptcp_olia_cong_avoid(struct sock *sk, u32 ack, u32 acked)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct mptcp_olia *ca = inet_csk_ca(sk);
|
||||||
|
const struct mptcp_cb *mpcb = tp->mpcb;
|
||||||
|
|
||||||
|
u64 inc_num, inc_den, rate, cwnd_scaled;
|
||||||
|
|
||||||
|
if (!mptcp(tp)) {
|
||||||
|
tcp_reno_cong_avoid(sk, ack, acked);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ca->mptcp_loss3 = tp->snd_una;
|
||||||
|
|
||||||
|
if (!tcp_is_cwnd_limited(sk))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* slow start if it is in the safe area */
|
||||||
|
if (tcp_in_slow_start(tp)) {
|
||||||
|
tcp_slow_start(tp, acked);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
mptcp_get_epsilon(mpcb);
|
||||||
|
rate = mptcp_get_rate(mpcb, tp->srtt_us);
|
||||||
|
cwnd_scaled = mptcp_olia_scale(tp->snd_cwnd, scale);
|
||||||
|
inc_den = ca->epsilon_den * tp->snd_cwnd * rate ? : 1;
|
||||||
|
|
||||||
|
/* calculate the increasing term, scaling is used to reduce the rounding effect */
|
||||||
|
if (ca->epsilon_num == -1) {
|
||||||
|
if (ca->epsilon_den * cwnd_scaled * cwnd_scaled < rate) {
|
||||||
|
inc_num = rate - ca->epsilon_den *
|
||||||
|
cwnd_scaled * cwnd_scaled;
|
||||||
|
ca->mptcp_snd_cwnd_cnt -= div64_u64(
|
||||||
|
mptcp_olia_scale(inc_num , scale) , inc_den);
|
||||||
|
} else {
|
||||||
|
inc_num = ca->epsilon_den *
|
||||||
|
cwnd_scaled * cwnd_scaled - rate;
|
||||||
|
ca->mptcp_snd_cwnd_cnt += div64_u64(
|
||||||
|
mptcp_olia_scale(inc_num , scale) , inc_den);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
inc_num = ca->epsilon_num * rate +
|
||||||
|
ca->epsilon_den * cwnd_scaled * cwnd_scaled;
|
||||||
|
ca->mptcp_snd_cwnd_cnt += div64_u64(
|
||||||
|
mptcp_olia_scale(inc_num , scale) , inc_den);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (ca->mptcp_snd_cwnd_cnt >= (1 << scale) - 1) {
|
||||||
|
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
|
||||||
|
tp->snd_cwnd++;
|
||||||
|
ca->mptcp_snd_cwnd_cnt = 0;
|
||||||
|
} else if (ca->mptcp_snd_cwnd_cnt <= 0 - (1 << scale) + 1) {
|
||||||
|
tp->snd_cwnd = max((int) 1 , (int) tp->snd_cwnd - 1);
|
||||||
|
ca->mptcp_snd_cwnd_cnt = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct tcp_congestion_ops mptcp_olia = {
|
||||||
|
.init = mptcp_olia_init,
|
||||||
|
.ssthresh = tcp_reno_ssthresh,
|
||||||
|
.cong_avoid = mptcp_olia_cong_avoid,
|
||||||
|
.undo_cwnd = tcp_reno_undo_cwnd,
|
||||||
|
.set_state = mptcp_olia_set_state,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.name = "olia",
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init mptcp_olia_register(void)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(struct mptcp_olia) > ICSK_CA_PRIV_SIZE);
|
||||||
|
return tcp_register_congestion_control(&mptcp_olia);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit mptcp_olia_unregister(void)
|
||||||
|
{
|
||||||
|
tcp_unregister_congestion_control(&mptcp_olia);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(mptcp_olia_register);
|
||||||
|
module_exit(mptcp_olia_unregister);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Ramin Khalili, Nicolas Gast, Jean-Yves Le Boudec");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("MPTCP COUPLED CONGESTION CONTROL");
|
||||||
|
MODULE_VERSION("0.1");
|
1929
net/mptcp/mptcp_output.c
Executable file
1929
net/mptcp/mptcp_output.c
Executable file
File diff suppressed because it is too large
Load Diff
226
net/mptcp/mptcp_pm.c
Executable file
226
net/mptcp/mptcp_pm.c
Executable file
@ -0,0 +1,226 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP implementation - MPTCP-subflow-management
|
||||||
|
*
|
||||||
|
* Initial Design & Implementation:
|
||||||
|
* Sébastien Barré <sebastien.barre@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Current Maintainer & Author:
|
||||||
|
* Christoph Paasch <christoph.paasch@uclouvain.be>
|
||||||
|
*
|
||||||
|
* Additional authors:
|
||||||
|
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
|
||||||
|
* Gregory Detal <gregory.detal@uclouvain.be>
|
||||||
|
* Fabien Duchêne <fabien.duchene@uclouvain.be>
|
||||||
|
* Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
|
||||||
|
* Lavkesh Lahngir <lavkesh51@gmail.com>
|
||||||
|
* Andreas Ripke <ripke@neclab.eu>
|
||||||
|
* Vlad Dogaru <vlad.dogaru@intel.com>
|
||||||
|
* Octavian Purdila <octavian.purdila@intel.com>
|
||||||
|
* John Ronan <jronan@tssg.org>
|
||||||
|
* Catalin Nicutar <catalin.nicutar@gmail.com>
|
||||||
|
* Brandon Heller <brandonh@stanford.edu>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
|
||||||
|
static DEFINE_SPINLOCK(mptcp_pm_list_lock);
|
||||||
|
static LIST_HEAD(mptcp_pm_list);
|
||||||
|
|
||||||
|
static int mptcp_default_id(const struct sock *meta_sk, sa_family_t family,
|
||||||
|
union inet_addr *addr, bool *low_prio)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct mptcp_pm_ops mptcp_pm_default = {
|
||||||
|
.get_local_id = mptcp_default_id, /* We do not care */
|
||||||
|
.name = "default",
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct mptcp_pm_ops *mptcp_pm_find(const char *name)
|
||||||
|
{
|
||||||
|
struct mptcp_pm_ops *e;
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(e, &mptcp_pm_list, list) {
|
||||||
|
if (strcmp(e->name, name) == 0)
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mptcp_register_path_manager(struct mptcp_pm_ops *pm)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!pm->get_local_id)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
spin_lock(&mptcp_pm_list_lock);
|
||||||
|
if (mptcp_pm_find(pm->name)) {
|
||||||
|
pr_notice("%s already registered\n", pm->name);
|
||||||
|
ret = -EEXIST;
|
||||||
|
} else {
|
||||||
|
list_add_tail_rcu(&pm->list, &mptcp_pm_list);
|
||||||
|
pr_info("%s registered\n", pm->name);
|
||||||
|
}
|
||||||
|
spin_unlock(&mptcp_pm_list_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(mptcp_register_path_manager);
|
||||||
|
|
||||||
|
void mptcp_unregister_path_manager(struct mptcp_pm_ops *pm)
|
||||||
|
{
|
||||||
|
spin_lock(&mptcp_pm_list_lock);
|
||||||
|
list_del_rcu(&pm->list);
|
||||||
|
spin_unlock(&mptcp_pm_list_lock);
|
||||||
|
|
||||||
|
/* Wait for outstanding readers to complete before the
|
||||||
|
* module gets removed entirely.
|
||||||
|
*
|
||||||
|
* A try_module_get() should fail by now as our module is
|
||||||
|
* in "going" state since no refs are held anymore and
|
||||||
|
* module_exit() handler being called.
|
||||||
|
*/
|
||||||
|
synchronize_rcu();
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(mptcp_unregister_path_manager);
|
||||||
|
|
||||||
|
void mptcp_get_default_path_manager(char *name)
|
||||||
|
{
|
||||||
|
struct mptcp_pm_ops *pm;
|
||||||
|
|
||||||
|
BUG_ON(list_empty(&mptcp_pm_list));
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
pm = list_entry(mptcp_pm_list.next, struct mptcp_pm_ops, list);
|
||||||
|
strncpy(name, pm->name, MPTCP_PM_NAME_MAX);
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
int mptcp_set_default_path_manager(const char *name)
|
||||||
|
{
|
||||||
|
struct mptcp_pm_ops *pm;
|
||||||
|
int ret = -ENOENT;
|
||||||
|
|
||||||
|
spin_lock(&mptcp_pm_list_lock);
|
||||||
|
pm = mptcp_pm_find(name);
|
||||||
|
#ifdef CONFIG_MODULES
|
||||||
|
if (!pm && capable(CAP_NET_ADMIN)) {
|
||||||
|
spin_unlock(&mptcp_pm_list_lock);
|
||||||
|
|
||||||
|
request_module("mptcp_%s", name);
|
||||||
|
spin_lock(&mptcp_pm_list_lock);
|
||||||
|
pm = mptcp_pm_find(name);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (pm) {
|
||||||
|
list_move(&pm->list, &mptcp_pm_list);
|
||||||
|
ret = 0;
|
||||||
|
} else {
|
||||||
|
pr_info("%s is not available\n", name);
|
||||||
|
}
|
||||||
|
spin_unlock(&mptcp_pm_list_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct mptcp_pm_ops *__mptcp_pm_find_autoload(const char *name)
|
||||||
|
{
|
||||||
|
struct mptcp_pm_ops *pm = mptcp_pm_find(name);
|
||||||
|
#ifdef CONFIG_MODULES
|
||||||
|
if (!pm && capable(CAP_NET_ADMIN)) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
request_module("mptcp_%s", name);
|
||||||
|
rcu_read_lock();
|
||||||
|
pm = mptcp_pm_find(name);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return pm;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mptcp_init_path_manager(struct mptcp_cb *mpcb)
|
||||||
|
{
|
||||||
|
struct mptcp_pm_ops *pm;
|
||||||
|
struct sock *meta_sk = mpcb->meta_sk;
|
||||||
|
struct tcp_sock *meta_tp = tcp_sk(meta_sk);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
/* if path manager was set using socket option */
|
||||||
|
if (meta_tp->mptcp_pm_setsockopt) {
|
||||||
|
pm = __mptcp_pm_find_autoload(meta_tp->mptcp_pm_name);
|
||||||
|
if (pm && try_module_get(pm->owner)) {
|
||||||
|
mpcb->pm_ops = pm;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(pm, &mptcp_pm_list, list) {
|
||||||
|
if (try_module_get(pm->owner)) {
|
||||||
|
mpcb->pm_ops = pm;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Change path manager for socket */
|
||||||
|
int mptcp_set_path_manager(struct sock *sk, const char *name)
|
||||||
|
{
|
||||||
|
struct mptcp_pm_ops *pm;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
pm = __mptcp_pm_find_autoload(name);
|
||||||
|
|
||||||
|
if (!pm) {
|
||||||
|
err = -ENOENT;
|
||||||
|
} else if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
|
||||||
|
err = -EPERM;
|
||||||
|
} else {
|
||||||
|
strcpy(tcp_sk(sk)->mptcp_pm_name, name);
|
||||||
|
tcp_sk(sk)->mptcp_pm_setsockopt = 1;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Manage refcounts on socket close. */
|
||||||
|
void mptcp_cleanup_path_manager(struct mptcp_cb *mpcb)
|
||||||
|
{
|
||||||
|
module_put(mpcb->pm_ops->owner);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fallback to the default path-manager. */
|
||||||
|
void mptcp_fallback_default(struct mptcp_cb *mpcb)
|
||||||
|
{
|
||||||
|
struct mptcp_pm_ops *pm;
|
||||||
|
|
||||||
|
mptcp_cleanup_path_manager(mpcb);
|
||||||
|
pm = mptcp_pm_find("default");
|
||||||
|
|
||||||
|
/* Cannot fail - it's the default module */
|
||||||
|
try_module_get(pm->owner);
|
||||||
|
mpcb->pm_ops = pm;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(mptcp_fallback_default);
|
||||||
|
|
||||||
|
/* Set default value from kernel configuration at bootup */
|
||||||
|
static int __init mptcp_path_manager_default(void)
|
||||||
|
{
|
||||||
|
return mptcp_set_default_path_manager("fullmesh");
|
||||||
|
}
|
||||||
|
late_initcall(mptcp_path_manager_default);
|
389
net/mptcp/mptcp_redundant.c
Executable file
389
net/mptcp/mptcp_redundant.c
Executable file
@ -0,0 +1,389 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP Scheduler to reduce latency and jitter.
|
||||||
|
*
|
||||||
|
* This scheduler sends all packets redundantly on all available subflows.
|
||||||
|
*
|
||||||
|
* Initial Design & Implementation:
|
||||||
|
* Tobias Erbshaeusser <erbshauesser@dvs.tu-darmstadt.de>
|
||||||
|
* Alexander Froemmgen <froemmge@dvs.tu-darmstadt.de>
|
||||||
|
*
|
||||||
|
* Initial corrections & modifications:
|
||||||
|
* Christian Pinedo <christian.pinedo@ehu.eus>
|
||||||
|
* Igor Lopez <igor.lopez@ehu.eus>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
|
||||||
|
/* Struct to store the data of a single subflow */
|
||||||
|
struct redsched_priv {
|
||||||
|
/* The skb or NULL */
|
||||||
|
struct sk_buff *skb;
|
||||||
|
/* End sequence number of the skb. This number should be checked
|
||||||
|
* to be valid before the skb field is used
|
||||||
|
*/
|
||||||
|
u32 skb_end_seq;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Struct to store the data of the control block */
|
||||||
|
struct redsched_cb {
|
||||||
|
/* The next subflow where a skb should be sent or NULL */
|
||||||
|
struct tcp_sock *next_subflow;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Returns the socket data from a given subflow socket */
|
||||||
|
static struct redsched_priv *redsched_get_priv(struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return (struct redsched_priv *)&tp->mptcp->mptcp_sched[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns the control block data from a given meta socket */
|
||||||
|
static struct redsched_cb *redsched_get_cb(struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return (struct redsched_cb *)&tp->mpcb->mptcp_sched[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool redsched_get_active_valid_sks(struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
struct tcp_sock *meta_tp = tcp_sk(meta_sk);
|
||||||
|
struct mptcp_cb *mpcb = meta_tp->mpcb;
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
int active_valid_sks = 0;
|
||||||
|
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct sock *sk = mptcp_to_sock(mptcp);
|
||||||
|
|
||||||
|
if (subflow_is_active((struct tcp_sock *)sk) &&
|
||||||
|
!mptcp_is_def_unavailable(sk))
|
||||||
|
active_valid_sks++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return active_valid_sks;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool redsched_use_subflow(struct sock *meta_sk,
|
||||||
|
int active_valid_sks,
|
||||||
|
struct tcp_sock *tp,
|
||||||
|
struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
if (!skb || !mptcp_is_available((struct sock *)tp, skb, false))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (TCP_SKB_CB(skb)->path_mask != 0)
|
||||||
|
return subflow_is_active(tp);
|
||||||
|
|
||||||
|
if (TCP_SKB_CB(skb)->path_mask == 0) {
|
||||||
|
if (active_valid_sks == -1)
|
||||||
|
active_valid_sks = redsched_get_active_valid_sks(meta_sk);
|
||||||
|
|
||||||
|
if (subflow_is_backup(tp) && active_valid_sks > 0)
|
||||||
|
return false;
|
||||||
|
else
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define mptcp_entry_next_rcu(__mptcp) \
|
||||||
|
hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \
|
||||||
|
&(__mptcp)->node)), struct mptcp_tcp_sock, node)
|
||||||
|
|
||||||
|
static void redsched_update_next_subflow(struct tcp_sock *tp,
|
||||||
|
struct redsched_cb *red_cb)
|
||||||
|
{
|
||||||
|
struct mptcp_tcp_sock *mptcp = mptcp_entry_next_rcu(tp->mptcp);
|
||||||
|
|
||||||
|
if (mptcp)
|
||||||
|
red_cb->next_subflow = mptcp->tp;
|
||||||
|
else
|
||||||
|
red_cb->next_subflow = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct sock *red_get_available_subflow(struct sock *meta_sk,
|
||||||
|
struct sk_buff *skb,
|
||||||
|
bool zero_wnd_test)
|
||||||
|
{
|
||||||
|
struct tcp_sock *meta_tp = tcp_sk(meta_sk);
|
||||||
|
struct mptcp_cb *mpcb = meta_tp->mpcb;
|
||||||
|
struct redsched_cb *red_cb = redsched_get_cb(meta_tp);
|
||||||
|
struct tcp_sock *first_tp = red_cb->next_subflow, *tp;
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
int found = 0;
|
||||||
|
|
||||||
|
/* Answer data_fin on same subflow */
|
||||||
|
if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
|
||||||
|
skb && mptcp_is_data_fin(skb)) {
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct sock *sk = mptcp_to_sock(mptcp);
|
||||||
|
|
||||||
|
if (tcp_sk(sk)->mptcp->path_index ==
|
||||||
|
mpcb->dfin_path_index &&
|
||||||
|
mptcp_is_available(sk, skb, zero_wnd_test))
|
||||||
|
return sk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!first_tp && !hlist_empty(&mpcb->conn_list)) {
|
||||||
|
first_tp = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&mpcb->conn_list)),
|
||||||
|
struct mptcp_tcp_sock, node)->tp;
|
||||||
|
}
|
||||||
|
tp = first_tp;
|
||||||
|
|
||||||
|
/* still NULL (no subflow in conn_list?) */
|
||||||
|
if (!first_tp)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* Search for a subflow to send it.
|
||||||
|
*
|
||||||
|
* We want to pick a subflow that is after 'first_tp' in the list of subflows.
|
||||||
|
* Thus, the first mptcp_for_each_sub()-loop tries to walk the list up
|
||||||
|
* to the subflow 'tp' and then checks whether any one of the remaining
|
||||||
|
* ones is eligible to send.
|
||||||
|
* The second mptcp_for_each-sub()-loop is then iterating from the
|
||||||
|
* beginning of the list up to 'first_tp'.
|
||||||
|
*/
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
/* We go up to the subflow 'tp' and start from there */
|
||||||
|
if (tp == mptcp->tp)
|
||||||
|
found = 1;
|
||||||
|
|
||||||
|
if (!found)
|
||||||
|
continue;
|
||||||
|
tp = mptcp->tp;
|
||||||
|
|
||||||
|
if (mptcp_is_available((struct sock *)tp, skb,
|
||||||
|
zero_wnd_test)) {
|
||||||
|
redsched_update_next_subflow(tp, red_cb);
|
||||||
|
return (struct sock *)tp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
tp = mptcp->tp;
|
||||||
|
|
||||||
|
if (tp == first_tp)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (mptcp_is_available((struct sock *)tp, skb,
|
||||||
|
zero_wnd_test)) {
|
||||||
|
redsched_update_next_subflow(tp, red_cb);
|
||||||
|
return (struct sock *)tp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No space */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Corrects the stored skb pointers if they are invalid */
|
||||||
|
static void redsched_correct_skb_pointers(struct sock *meta_sk,
|
||||||
|
struct redsched_priv *red_p)
|
||||||
|
{
|
||||||
|
struct tcp_sock *meta_tp = tcp_sk(meta_sk);
|
||||||
|
|
||||||
|
if (red_p->skb && !after(red_p->skb_end_seq, meta_tp->snd_una))
|
||||||
|
red_p->skb = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns the next skb from the queue */
|
||||||
|
static struct sk_buff *redsched_next_skb_from_queue(struct sk_buff_head *queue,
|
||||||
|
struct sk_buff *previous,
|
||||||
|
struct sock *meta_sk)
|
||||||
|
{
|
||||||
|
struct sk_buff *skb;
|
||||||
|
|
||||||
|
if (!previous)
|
||||||
|
return skb_peek(queue);
|
||||||
|
|
||||||
|
/* sk_data->skb stores the last scheduled packet for this subflow.
|
||||||
|
* If sk_data->skb was scheduled but not sent (e.g., due to nagle),
|
||||||
|
* we have to schedule it again.
|
||||||
|
*
|
||||||
|
* For the redundant scheduler, there are two cases:
|
||||||
|
* 1. sk_data->skb was not sent on another subflow:
|
||||||
|
* we have to schedule it again to ensure that we do not
|
||||||
|
* skip this packet.
|
||||||
|
* 2. sk_data->skb was already sent on another subflow:
|
||||||
|
* with regard to the redundant semantic, we have to
|
||||||
|
* schedule it again. However, we keep it simple and ignore it,
|
||||||
|
* as it was already sent by another subflow.
|
||||||
|
* This might be changed in the future.
|
||||||
|
*
|
||||||
|
* For case 1, send_head is equal previous, as only a single
|
||||||
|
* packet can be skipped.
|
||||||
|
*/
|
||||||
|
if (tcp_send_head(meta_sk) == previous)
|
||||||
|
return tcp_send_head(meta_sk);
|
||||||
|
|
||||||
|
skb = skb_rb_next(previous);
|
||||||
|
if (skb)
|
||||||
|
return skb;
|
||||||
|
|
||||||
|
return tcp_send_head(meta_sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct sk_buff *mptcp_red_next_segment(struct sock *meta_sk,
|
||||||
|
int *reinject,
|
||||||
|
struct sock **subsk,
|
||||||
|
unsigned int *limit)
|
||||||
|
{
|
||||||
|
struct tcp_sock *meta_tp = tcp_sk(meta_sk);
|
||||||
|
struct mptcp_cb *mpcb = meta_tp->mpcb;
|
||||||
|
struct redsched_cb *red_cb = redsched_get_cb(meta_tp);
|
||||||
|
struct tcp_sock *first_tp = red_cb->next_subflow, *tp;
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
int active_valid_sks = -1;
|
||||||
|
struct sk_buff *skb;
|
||||||
|
int found = 0;
|
||||||
|
|
||||||
|
/* As we set it, we have to reset it as well. */
|
||||||
|
*limit = 0;
|
||||||
|
|
||||||
|
if (skb_queue_empty(&mpcb->reinject_queue) &&
|
||||||
|
skb_queue_empty(&meta_sk->sk_write_queue))
|
||||||
|
/* Nothing to send */
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* First try reinjections */
|
||||||
|
skb = skb_peek(&mpcb->reinject_queue);
|
||||||
|
if (skb) {
|
||||||
|
*subsk = get_available_subflow(meta_sk, skb, false);
|
||||||
|
if (!*subsk)
|
||||||
|
return NULL;
|
||||||
|
*reinject = 1;
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Then try indistinctly redundant and normal skbs */
|
||||||
|
|
||||||
|
if (!first_tp && !hlist_empty(&mpcb->conn_list)) {
|
||||||
|
first_tp = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&mpcb->conn_list)),
|
||||||
|
struct mptcp_tcp_sock, node)->tp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* still NULL (no subflow in conn_list?) */
|
||||||
|
if (!first_tp)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
tp = first_tp;
|
||||||
|
|
||||||
|
*reinject = 0;
|
||||||
|
active_valid_sks = redsched_get_active_valid_sks(meta_sk);
|
||||||
|
|
||||||
|
/* We want to pick a subflow that is after 'first_tp' in the list of subflows.
|
||||||
|
* Thus, the first mptcp_for_each_sub()-loop tries to walk the list up
|
||||||
|
* to the subflow 'tp' and then checks whether any one of the remaining
|
||||||
|
* ones can send a segment.
|
||||||
|
* The second mptcp_for_each-sub()-loop is then iterating from the
|
||||||
|
* beginning of the list up to 'first_tp'.
|
||||||
|
*/
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct redsched_priv *red_p;
|
||||||
|
|
||||||
|
if (tp == mptcp->tp)
|
||||||
|
found = 1;
|
||||||
|
|
||||||
|
if (!found)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
tp = mptcp->tp;
|
||||||
|
|
||||||
|
/* Correct the skb pointers of the current subflow */
|
||||||
|
red_p = redsched_get_priv(tp);
|
||||||
|
redsched_correct_skb_pointers(meta_sk, red_p);
|
||||||
|
|
||||||
|
skb = redsched_next_skb_from_queue(&meta_sk->sk_write_queue,
|
||||||
|
red_p->skb, meta_sk);
|
||||||
|
if (skb && redsched_use_subflow(meta_sk, active_valid_sks, tp,
|
||||||
|
skb)) {
|
||||||
|
red_p->skb = skb;
|
||||||
|
red_p->skb_end_seq = TCP_SKB_CB(skb)->end_seq;
|
||||||
|
redsched_update_next_subflow(tp, red_cb);
|
||||||
|
*subsk = (struct sock *)tp;
|
||||||
|
|
||||||
|
if (TCP_SKB_CB(skb)->path_mask)
|
||||||
|
*reinject = -1;
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct redsched_priv *red_p;
|
||||||
|
|
||||||
|
tp = mptcp->tp;
|
||||||
|
|
||||||
|
if (tp == first_tp)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Correct the skb pointers of the current subflow */
|
||||||
|
red_p = redsched_get_priv(tp);
|
||||||
|
redsched_correct_skb_pointers(meta_sk, red_p);
|
||||||
|
|
||||||
|
skb = redsched_next_skb_from_queue(&meta_sk->sk_write_queue,
|
||||||
|
red_p->skb, meta_sk);
|
||||||
|
if (skb && redsched_use_subflow(meta_sk, active_valid_sks, tp,
|
||||||
|
skb)) {
|
||||||
|
red_p->skb = skb;
|
||||||
|
red_p->skb_end_seq = TCP_SKB_CB(skb)->end_seq;
|
||||||
|
redsched_update_next_subflow(tp, red_cb);
|
||||||
|
*subsk = (struct sock *)tp;
|
||||||
|
|
||||||
|
if (TCP_SKB_CB(skb)->path_mask)
|
||||||
|
*reinject = -1;
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Nothing to send */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void redsched_release(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct redsched_cb *red_cb = redsched_get_cb(tp);
|
||||||
|
|
||||||
|
/* Check if the next subflow would be the released one. If yes correct
|
||||||
|
* the pointer
|
||||||
|
*/
|
||||||
|
if (red_cb->next_subflow == tp)
|
||||||
|
redsched_update_next_subflow(tp, red_cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct mptcp_sched_ops mptcp_sched_red = {
|
||||||
|
.get_subflow = red_get_available_subflow,
|
||||||
|
.next_segment = mptcp_red_next_segment,
|
||||||
|
.release = redsched_release,
|
||||||
|
.name = "redundant",
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init red_register(void)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(struct redsched_priv) > MPTCP_SCHED_SIZE);
|
||||||
|
BUILD_BUG_ON(sizeof(struct redsched_cb) > MPTCP_SCHED_DATA_SIZE);
|
||||||
|
|
||||||
|
if (mptcp_register_scheduler(&mptcp_sched_red))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void red_unregister(void)
|
||||||
|
{
|
||||||
|
mptcp_unregister_scheduler(&mptcp_sched_red);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(red_register);
|
||||||
|
module_exit(red_unregister);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Tobias Erbshaeusser, Alexander Froemmgen");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("REDUNDANT MPTCP");
|
||||||
|
MODULE_VERSION("0.90");
|
309
net/mptcp/mptcp_rr.c
Executable file
309
net/mptcp/mptcp_rr.c
Executable file
@ -0,0 +1,309 @@
|
|||||||
|
/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
|
||||||
|
static unsigned char num_segments __read_mostly = 1;
|
||||||
|
module_param(num_segments, byte, 0644);
|
||||||
|
MODULE_PARM_DESC(num_segments, "The number of consecutive segments that are part of a burst");
|
||||||
|
|
||||||
|
static bool cwnd_limited __read_mostly = 1;
|
||||||
|
module_param(cwnd_limited, bool, 0644);
|
||||||
|
MODULE_PARM_DESC(cwnd_limited, "if set to 1, the scheduler tries to fill the congestion-window on all subflows");
|
||||||
|
|
||||||
|
struct rrsched_priv {
|
||||||
|
unsigned char quota;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct rrsched_priv *rrsched_get_priv(const struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return (struct rrsched_priv *)&tp->mptcp->mptcp_sched[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the sub-socket sk available to send the skb? */
|
||||||
|
static bool mptcp_rr_is_available(const struct sock *sk, const struct sk_buff *skb,
|
||||||
|
bool zero_wnd_test, bool cwnd_test)
|
||||||
|
{
|
||||||
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
unsigned int space, in_flight;
|
||||||
|
|
||||||
|
/* Set of states for which we are allowed to send data */
|
||||||
|
if (!mptcp_sk_can_send(sk))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* We do not send data on this subflow unless it is
|
||||||
|
* fully established, i.e. the 4th ack has been received.
|
||||||
|
*/
|
||||||
|
if (tp->mptcp->pre_established)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (tp->pf)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) {
|
||||||
|
/* If SACK is disabled, and we got a loss, TCP does not exit
|
||||||
|
* the loss-state until something above high_seq has been acked.
|
||||||
|
* (see tcp_try_undo_recovery)
|
||||||
|
*
|
||||||
|
* high_seq is the snd_nxt at the moment of the RTO. As soon
|
||||||
|
* as we have an RTO, we won't push data on the subflow.
|
||||||
|
* Thus, snd_una can never go beyond high_seq.
|
||||||
|
*/
|
||||||
|
if (!tcp_is_reno(tp))
|
||||||
|
return false;
|
||||||
|
else if (tp->snd_una != tp->high_seq)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!tp->mptcp->fully_established) {
|
||||||
|
/* Make sure that we send in-order data */
|
||||||
|
if (skb && tp->mptcp->second_packet &&
|
||||||
|
tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!cwnd_test)
|
||||||
|
goto zero_wnd_test;
|
||||||
|
|
||||||
|
in_flight = tcp_packets_in_flight(tp);
|
||||||
|
/* Not even a single spot in the cwnd */
|
||||||
|
if (in_flight >= tp->snd_cwnd)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Now, check if what is queued in the subflow's send-queue
|
||||||
|
* already fills the cwnd.
|
||||||
|
*/
|
||||||
|
space = (tp->snd_cwnd - in_flight) * tp->mss_cache;
|
||||||
|
|
||||||
|
if (tp->write_seq - tp->snd_nxt > space)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
zero_wnd_test:
|
||||||
|
if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp)))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Are we not allowed to reinject this skb on tp? */
|
||||||
|
static int mptcp_rr_dont_reinject_skb(const struct tcp_sock *tp, const struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
/* If the skb has already been enqueued in this sk, try to find
|
||||||
|
* another one.
|
||||||
|
*/
|
||||||
|
return skb &&
|
||||||
|
/* Has the skb already been enqueued into this subsocket? */
|
||||||
|
mptcp_pi_to_flag(tp->mptcp->path_index) & TCP_SKB_CB(skb)->path_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We just look for any subflow that is available */
|
||||||
|
static struct sock *rr_get_available_subflow(struct sock *meta_sk,
|
||||||
|
struct sk_buff *skb,
|
||||||
|
bool zero_wnd_test)
|
||||||
|
{
|
||||||
|
const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct sock *sk = NULL, *bestsk = NULL, *backupsk = NULL;
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
/* Answer data_fin on same subflow!!! */
|
||||||
|
if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
|
||||||
|
skb && mptcp_is_data_fin(skb)) {
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
sk = mptcp_to_sock(mptcp);
|
||||||
|
if (tcp_sk(sk)->mptcp->path_index == mpcb->dfin_path_index &&
|
||||||
|
mptcp_rr_is_available(sk, skb, zero_wnd_test, true))
|
||||||
|
return sk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* First, find the best subflow */
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct tcp_sock *tp;
|
||||||
|
|
||||||
|
sk = mptcp_to_sock(mptcp);
|
||||||
|
tp = tcp_sk(sk);
|
||||||
|
|
||||||
|
if (!mptcp_rr_is_available(sk, skb, zero_wnd_test, true))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (mptcp_rr_dont_reinject_skb(tp, skb)) {
|
||||||
|
backupsk = sk;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
bestsk = sk;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bestsk) {
|
||||||
|
sk = bestsk;
|
||||||
|
} else if (backupsk) {
|
||||||
|
/* It has been sent on all subflows once - let's give it a
|
||||||
|
* chance again by restarting its pathmask.
|
||||||
|
*/
|
||||||
|
if (skb)
|
||||||
|
TCP_SKB_CB(skb)->path_mask = 0;
|
||||||
|
sk = backupsk;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sk;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns the next segment to be sent from the mptcp meta-queue.
|
||||||
|
* (chooses the reinject queue if any segment is waiting in it, otherwise,
|
||||||
|
* chooses the normal write queue).
|
||||||
|
* Sets *@reinject to 1 if the returned segment comes from the
|
||||||
|
* reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
|
||||||
|
* and sets it to -1 if it is a meta-level retransmission to optimize the
|
||||||
|
* receive-buffer.
|
||||||
|
*/
|
||||||
|
static struct sk_buff *__mptcp_rr_next_segment(const struct sock *meta_sk, int *reinject)
|
||||||
|
{
|
||||||
|
const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct sk_buff *skb = NULL;
|
||||||
|
|
||||||
|
*reinject = 0;
|
||||||
|
|
||||||
|
/* If we are in fallback-mode, just take from the meta-send-queue */
|
||||||
|
if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
|
||||||
|
return tcp_send_head(meta_sk);
|
||||||
|
|
||||||
|
skb = skb_peek(&mpcb->reinject_queue);
|
||||||
|
|
||||||
|
if (skb)
|
||||||
|
*reinject = 1;
|
||||||
|
else
|
||||||
|
skb = tcp_send_head(meta_sk);
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk,
|
||||||
|
int *reinject,
|
||||||
|
struct sock **subsk,
|
||||||
|
unsigned int *limit)
|
||||||
|
{
|
||||||
|
const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct sock *choose_sk = NULL;
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
struct sk_buff *skb = __mptcp_rr_next_segment(meta_sk, reinject);
|
||||||
|
unsigned char split = num_segments;
|
||||||
|
unsigned char iter = 0, full_subs = 0;
|
||||||
|
|
||||||
|
/* As we set it, we have to reset it as well. */
|
||||||
|
*limit = 0;
|
||||||
|
|
||||||
|
if (!skb)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (*reinject) {
|
||||||
|
*subsk = rr_get_available_subflow(meta_sk, skb, false);
|
||||||
|
if (!*subsk)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
|
||||||
|
retry:
|
||||||
|
|
||||||
|
/* First, we look for a subflow who is currently being used */
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct sock *sk_it = mptcp_to_sock(mptcp);
|
||||||
|
struct tcp_sock *tp_it = tcp_sk(sk_it);
|
||||||
|
struct rrsched_priv *rr_p = rrsched_get_priv(tp_it);
|
||||||
|
|
||||||
|
if (!mptcp_rr_is_available(sk_it, skb, false, cwnd_limited))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
iter++;
|
||||||
|
|
||||||
|
/* Is this subflow currently being used? */
|
||||||
|
if (rr_p->quota > 0 && rr_p->quota < num_segments) {
|
||||||
|
split = num_segments - rr_p->quota;
|
||||||
|
choose_sk = sk_it;
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Or, it's totally unused */
|
||||||
|
if (!rr_p->quota) {
|
||||||
|
split = num_segments;
|
||||||
|
choose_sk = sk_it;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Or, it must then be fully used */
|
||||||
|
if (rr_p->quota >= num_segments)
|
||||||
|
full_subs++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* All considered subflows have a full quota, and we considered at
|
||||||
|
* least one.
|
||||||
|
*/
|
||||||
|
if (iter && iter == full_subs) {
|
||||||
|
/* So, we restart this round by setting quota to 0 and retry
|
||||||
|
* to find a subflow.
|
||||||
|
*/
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct sock *sk_it = mptcp_to_sock(mptcp);
|
||||||
|
struct tcp_sock *tp_it = tcp_sk(sk_it);
|
||||||
|
struct rrsched_priv *rr_p = rrsched_get_priv(tp_it);
|
||||||
|
|
||||||
|
if (!mptcp_rr_is_available(sk_it, skb, false, cwnd_limited))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
rr_p->quota = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
found:
|
||||||
|
if (choose_sk) {
|
||||||
|
unsigned int mss_now;
|
||||||
|
struct tcp_sock *choose_tp = tcp_sk(choose_sk);
|
||||||
|
struct rrsched_priv *rr_p = rrsched_get_priv(choose_tp);
|
||||||
|
|
||||||
|
if (!mptcp_rr_is_available(choose_sk, skb, false, true))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
*subsk = choose_sk;
|
||||||
|
mss_now = tcp_current_mss(*subsk);
|
||||||
|
*limit = split * mss_now;
|
||||||
|
|
||||||
|
if (skb->len > mss_now)
|
||||||
|
rr_p->quota += DIV_ROUND_UP(skb->len, mss_now);
|
||||||
|
else
|
||||||
|
rr_p->quota++;
|
||||||
|
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct mptcp_sched_ops mptcp_sched_rr = {
|
||||||
|
.get_subflow = rr_get_available_subflow,
|
||||||
|
.next_segment = mptcp_rr_next_segment,
|
||||||
|
.name = "roundrobin",
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init rr_register(void)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(struct rrsched_priv) > MPTCP_SCHED_SIZE);
|
||||||
|
|
||||||
|
if (mptcp_register_scheduler(&mptcp_sched_rr))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rr_unregister(void)
|
||||||
|
{
|
||||||
|
mptcp_unregister_scheduler(&mptcp_sched_rr);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(rr_register);
|
||||||
|
module_exit(rr_unregister);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Christoph Paasch");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("ROUNDROBIN MPTCP");
|
||||||
|
MODULE_VERSION("0.89");
|
634
net/mptcp/mptcp_sched.c
Executable file
634
net/mptcp/mptcp_sched.c
Executable file
@ -0,0 +1,634 @@
|
|||||||
|
/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <trace/events/tcp.h>
|
||||||
|
|
||||||
|
static DEFINE_SPINLOCK(mptcp_sched_list_lock);
|
||||||
|
static LIST_HEAD(mptcp_sched_list);
|
||||||
|
|
||||||
|
struct defsched_priv {
|
||||||
|
u32 last_rbuf_opti;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct defsched_priv *defsched_get_priv(const struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return (struct defsched_priv *)&tp->mptcp->mptcp_sched[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool mptcp_is_def_unavailable(struct sock *sk)
|
||||||
|
{
|
||||||
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
|
/* Set of states for which we are allowed to send data */
|
||||||
|
if (!mptcp_sk_can_send(sk))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* We do not send data on this subflow unless it is
|
||||||
|
* fully established, i.e. the 4th ack has been received.
|
||||||
|
*/
|
||||||
|
if (tp->mptcp->pre_established)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (tp->pf)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(mptcp_is_def_unavailable);
|
||||||
|
|
||||||
|
static bool mptcp_is_temp_unavailable(struct sock *sk,
|
||||||
|
const struct sk_buff *skb,
|
||||||
|
bool zero_wnd_test)
|
||||||
|
{
|
||||||
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
unsigned int mss_now, space, in_flight;
|
||||||
|
|
||||||
|
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) {
|
||||||
|
/* If SACK is disabled, and we got a loss, TCP does not exit
|
||||||
|
* the loss-state until something above high_seq has been
|
||||||
|
* acked. (see tcp_try_undo_recovery)
|
||||||
|
*
|
||||||
|
* high_seq is the snd_nxt at the moment of the RTO. As soon
|
||||||
|
* as we have an RTO, we won't push data on the subflow.
|
||||||
|
* Thus, snd_una can never go beyond high_seq.
|
||||||
|
*/
|
||||||
|
if (!tcp_is_reno(tp))
|
||||||
|
return true;
|
||||||
|
else if (tp->snd_una != tp->high_seq)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!tp->mptcp->fully_established) {
|
||||||
|
/* Make sure that we send in-order data */
|
||||||
|
if (skb && tp->mptcp->second_packet &&
|
||||||
|
tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
in_flight = tcp_packets_in_flight(tp);
|
||||||
|
/* Not even a single spot in the cwnd */
|
||||||
|
if (in_flight >= tp->snd_cwnd)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* Now, check if what is queued in the subflow's send-queue
|
||||||
|
* already fills the cwnd.
|
||||||
|
*/
|
||||||
|
space = (tp->snd_cwnd - in_flight) * tp->mss_cache;
|
||||||
|
|
||||||
|
if (tp->write_seq - tp->snd_nxt > space)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp)))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
mss_now = tcp_current_mss(sk);
|
||||||
|
|
||||||
|
/* Don't send on this subflow if we bypass the allowed send-window at
|
||||||
|
* the per-subflow level. Similar to tcp_snd_wnd_test, but manually
|
||||||
|
* calculated end_seq (because here at this point end_seq is still at
|
||||||
|
* the meta-level).
|
||||||
|
*/
|
||||||
|
if (skb && zero_wnd_test &&
|
||||||
|
after(tp->write_seq + min(skb->len, mss_now), tcp_wnd_end(tp)))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Is the sub-socket sk available to send the skb? */
|
||||||
|
bool mptcp_is_available(struct sock *sk, const struct sk_buff *skb,
|
||||||
|
bool zero_wnd_test)
|
||||||
|
{
|
||||||
|
return !mptcp_is_def_unavailable(sk) &&
|
||||||
|
!mptcp_is_temp_unavailable(sk, skb, zero_wnd_test);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(mptcp_is_available);
|
||||||
|
|
||||||
|
/* Are we not allowed to reinject this skb on tp? */
|
||||||
|
static int mptcp_dont_reinject_skb(const struct tcp_sock *tp, const struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
/* If the skb has already been enqueued in this sk, try to find
|
||||||
|
* another one.
|
||||||
|
*/
|
||||||
|
return skb &&
|
||||||
|
/* Has the skb already been enqueued into this subsocket? */
|
||||||
|
mptcp_pi_to_flag(tp->mptcp->path_index) & TCP_SKB_CB(skb)->path_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool subflow_is_backup(const struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return tp->mptcp->rcv_low_prio || tp->mptcp->low_prio;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(subflow_is_backup);
|
||||||
|
|
||||||
|
bool subflow_is_active(const struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return !tp->mptcp->rcv_low_prio && !tp->mptcp->low_prio;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(subflow_is_active);
|
||||||
|
|
||||||
|
/* Generic function to iterate over used and unused subflows and to select the
|
||||||
|
* best one
|
||||||
|
*/
|
||||||
|
static struct sock
|
||||||
|
*get_subflow_from_selectors(struct mptcp_cb *mpcb, struct sk_buff *skb,
|
||||||
|
bool (*selector)(const struct tcp_sock *),
|
||||||
|
bool zero_wnd_test, bool *force)
|
||||||
|
{
|
||||||
|
struct sock *bestsk = NULL;
|
||||||
|
u32 min_srtt = 0xffffffff;
|
||||||
|
bool found_unused = false;
|
||||||
|
bool found_unused_una = false;
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct sock *sk = mptcp_to_sock(mptcp);
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
bool unused = false;
|
||||||
|
|
||||||
|
/* First, we choose only the wanted sks */
|
||||||
|
if (!(*selector)(tp))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!mptcp_dont_reinject_skb(tp, skb))
|
||||||
|
unused = true;
|
||||||
|
else if (found_unused)
|
||||||
|
/* If a unused sk was found previously, we continue -
|
||||||
|
* no need to check used sks anymore.
|
||||||
|
*/
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (mptcp_is_def_unavailable(sk))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (mptcp_is_temp_unavailable(sk, skb, zero_wnd_test)) {
|
||||||
|
if (unused)
|
||||||
|
found_unused_una = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unused) {
|
||||||
|
if (!found_unused) {
|
||||||
|
/* It's the first time we encounter an unused
|
||||||
|
* sk - thus we reset the bestsk (which might
|
||||||
|
* have been set to a used sk).
|
||||||
|
*/
|
||||||
|
min_srtt = 0xffffffff;
|
||||||
|
bestsk = NULL;
|
||||||
|
}
|
||||||
|
found_unused = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tp->srtt_us < min_srtt) {
|
||||||
|
min_srtt = tp->srtt_us;
|
||||||
|
bestsk = sk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bestsk) {
|
||||||
|
/* The force variable is used to mark the returned sk as
|
||||||
|
* previously used or not-used.
|
||||||
|
*/
|
||||||
|
if (found_unused)
|
||||||
|
*force = true;
|
||||||
|
else
|
||||||
|
*force = false;
|
||||||
|
} else {
|
||||||
|
/* The force variable is used to mark if there are temporally
|
||||||
|
* unavailable not-used sks.
|
||||||
|
*/
|
||||||
|
if (found_unused_una)
|
||||||
|
*force = true;
|
||||||
|
else
|
||||||
|
*force = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bestsk;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This is the scheduler. This function decides on which flow to send
|
||||||
|
* a given MSS. If all subflows are found to be busy, NULL is returned
|
||||||
|
* The flow is selected based on the shortest RTT.
|
||||||
|
* If all paths have full cong windows, we simply return NULL.
|
||||||
|
*
|
||||||
|
* Additionally, this function is aware of the backup-subflows.
|
||||||
|
*/
|
||||||
|
struct sock *get_available_subflow(struct sock *meta_sk, struct sk_buff *skb,
|
||||||
|
bool zero_wnd_test)
|
||||||
|
{
|
||||||
|
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct sock *sk;
|
||||||
|
bool looping = false, force;
|
||||||
|
|
||||||
|
/* Answer data_fin on same subflow!!! */
|
||||||
|
if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
|
||||||
|
skb && mptcp_is_data_fin(skb)) {
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
sk = mptcp_to_sock(mptcp);
|
||||||
|
|
||||||
|
if (tcp_sk(sk)->mptcp->path_index == mpcb->dfin_path_index &&
|
||||||
|
mptcp_is_available(sk, skb, zero_wnd_test))
|
||||||
|
return sk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Find the best subflow */
|
||||||
|
restart:
|
||||||
|
sk = get_subflow_from_selectors(mpcb, skb, &subflow_is_active,
|
||||||
|
zero_wnd_test, &force);
|
||||||
|
if (force)
|
||||||
|
/* one unused active sk or one NULL sk when there is at least
|
||||||
|
* one temporally unavailable unused active sk
|
||||||
|
*/
|
||||||
|
return sk;
|
||||||
|
|
||||||
|
sk = get_subflow_from_selectors(mpcb, skb, &subflow_is_backup,
|
||||||
|
zero_wnd_test, &force);
|
||||||
|
if (!force && skb) {
|
||||||
|
/* one used backup sk or one NULL sk where there is no one
|
||||||
|
* temporally unavailable unused backup sk
|
||||||
|
*
|
||||||
|
* the skb passed through all the available active and backups
|
||||||
|
* sks, so clean the path mask
|
||||||
|
*/
|
||||||
|
TCP_SKB_CB(skb)->path_mask = 0;
|
||||||
|
|
||||||
|
if (!looping) {
|
||||||
|
looping = true;
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sk;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(get_available_subflow);
|
||||||
|
|
||||||
|
static struct sk_buff *mptcp_rcv_buf_optimization(struct sock *sk, int penal)
|
||||||
|
{
|
||||||
|
struct sock *meta_sk;
|
||||||
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
struct sk_buff *skb_head;
|
||||||
|
struct defsched_priv *def_p = defsched_get_priv(tp);
|
||||||
|
|
||||||
|
meta_sk = mptcp_meta_sk(sk);
|
||||||
|
skb_head = tcp_rtx_queue_head(meta_sk);
|
||||||
|
|
||||||
|
if (!skb_head)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* If penalization is optional (coming from mptcp_next_segment() and
|
||||||
|
* We are not send-buffer-limited we do not penalize. The retransmission
|
||||||
|
* is just an optimization to fix the idle-time due to the delay before
|
||||||
|
* we wake up the application.
|
||||||
|
*/
|
||||||
|
if (!penal && sk_stream_memory_free(meta_sk))
|
||||||
|
goto retrans;
|
||||||
|
|
||||||
|
/* Only penalize again after an RTT has elapsed */
|
||||||
|
if (tcp_jiffies32 - def_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3))
|
||||||
|
goto retrans;
|
||||||
|
|
||||||
|
/* Half the cwnd of the slow flows */
|
||||||
|
mptcp_for_each_sub(tp->mpcb, mptcp) {
|
||||||
|
struct tcp_sock *tp_it = mptcp->tp;
|
||||||
|
|
||||||
|
if (tp_it != tp &&
|
||||||
|
TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
|
||||||
|
if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) {
|
||||||
|
u32 prior_cwnd = tp_it->snd_cwnd;
|
||||||
|
|
||||||
|
tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
|
||||||
|
|
||||||
|
/* If in slow start, do not reduce the ssthresh */
|
||||||
|
if (prior_cwnd >= tp_it->snd_ssthresh)
|
||||||
|
tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U);
|
||||||
|
|
||||||
|
def_p->last_rbuf_opti = tcp_jiffies32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
retrans:
|
||||||
|
|
||||||
|
/* Segment not yet injected into this path? Take it!!! */
|
||||||
|
if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) {
|
||||||
|
bool do_retrans = false;
|
||||||
|
mptcp_for_each_sub(tp->mpcb, mptcp) {
|
||||||
|
struct tcp_sock *tp_it = mptcp->tp;
|
||||||
|
|
||||||
|
if (tp_it != tp &&
|
||||||
|
TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
|
||||||
|
if (tp_it->snd_cwnd <= 4) {
|
||||||
|
do_retrans = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (4 * tp->srtt_us >= tp_it->srtt_us) {
|
||||||
|
do_retrans = false;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
do_retrans = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (do_retrans && mptcp_is_available(sk, skb_head, false)) {
|
||||||
|
trace_mptcp_retransmit(sk, skb_head);
|
||||||
|
return skb_head;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns the next segment to be sent from the mptcp meta-queue.
|
||||||
|
* (chooses the reinject queue if any segment is waiting in it, otherwise,
|
||||||
|
* chooses the normal write queue).
|
||||||
|
* Sets *@reinject to 1 if the returned segment comes from the
|
||||||
|
* reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
|
||||||
|
* and sets it to -1 if it is a meta-level retransmission to optimize the
|
||||||
|
* receive-buffer.
|
||||||
|
*/
|
||||||
|
static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject)
|
||||||
|
{
|
||||||
|
const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
|
||||||
|
struct sk_buff *skb = NULL;
|
||||||
|
|
||||||
|
*reinject = 0;
|
||||||
|
|
||||||
|
/* If we are in fallback-mode, just take from the meta-send-queue */
|
||||||
|
if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
|
||||||
|
return tcp_send_head(meta_sk);
|
||||||
|
|
||||||
|
skb = skb_peek(&mpcb->reinject_queue);
|
||||||
|
|
||||||
|
if (skb) {
|
||||||
|
*reinject = 1;
|
||||||
|
} else {
|
||||||
|
skb = tcp_send_head(meta_sk);
|
||||||
|
|
||||||
|
if (!skb && meta_sk->sk_socket &&
|
||||||
|
test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
|
||||||
|
sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
|
||||||
|
struct sock *subsk = get_available_subflow(meta_sk, NULL,
|
||||||
|
false);
|
||||||
|
if (!subsk)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
skb = mptcp_rcv_buf_optimization(subsk, 0);
|
||||||
|
if (skb)
|
||||||
|
*reinject = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
|
||||||
|
int *reinject,
|
||||||
|
struct sock **subsk,
|
||||||
|
unsigned int *limit)
|
||||||
|
{
|
||||||
|
struct sk_buff *skb = __mptcp_next_segment(meta_sk, reinject);
|
||||||
|
unsigned int mss_now;
|
||||||
|
struct tcp_sock *subtp;
|
||||||
|
u16 gso_max_segs;
|
||||||
|
u32 max_len, max_segs, window, needed;
|
||||||
|
|
||||||
|
/* As we set it, we have to reset it as well. */
|
||||||
|
*limit = 0;
|
||||||
|
|
||||||
|
if (!skb)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
*subsk = get_available_subflow(meta_sk, skb, false);
|
||||||
|
if (!*subsk)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
subtp = tcp_sk(*subsk);
|
||||||
|
mss_now = tcp_current_mss(*subsk);
|
||||||
|
|
||||||
|
if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) {
|
||||||
|
skb = mptcp_rcv_buf_optimization(*subsk, 1);
|
||||||
|
if (skb)
|
||||||
|
*reinject = -1;
|
||||||
|
else
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No splitting required, as we will only send one single segment */
|
||||||
|
if (skb->len <= mss_now)
|
||||||
|
return skb;
|
||||||
|
|
||||||
|
/* The following is similar to tcp_mss_split_point, but
|
||||||
|
* we do not care about nagle, because we will anyways
|
||||||
|
* use TCP_NAGLE_PUSH, which overrides this.
|
||||||
|
*
|
||||||
|
* So, we first limit according to the cwnd/gso-size and then according
|
||||||
|
* to the subflow's window.
|
||||||
|
*/
|
||||||
|
|
||||||
|
gso_max_segs = (*subsk)->sk_gso_max_segs;
|
||||||
|
if (!gso_max_segs) /* No gso supported on the subflow's NIC */
|
||||||
|
gso_max_segs = 1;
|
||||||
|
max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
|
||||||
|
if (!max_segs)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
max_len = mss_now * max_segs;
|
||||||
|
window = tcp_wnd_end(subtp) - subtp->write_seq;
|
||||||
|
|
||||||
|
needed = min(skb->len, window);
|
||||||
|
if (max_len <= skb->len)
|
||||||
|
/* Take max_win, which is actually the cwnd/gso-size */
|
||||||
|
*limit = max_len;
|
||||||
|
else
|
||||||
|
/* Or, take the window */
|
||||||
|
*limit = needed;
|
||||||
|
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void defsched_init(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct defsched_priv *def_p = defsched_get_priv(tcp_sk(sk));
|
||||||
|
|
||||||
|
def_p->last_rbuf_opti = tcp_jiffies32;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct mptcp_sched_ops mptcp_sched_default = {
|
||||||
|
.get_subflow = get_available_subflow,
|
||||||
|
.next_segment = mptcp_next_segment,
|
||||||
|
.init = defsched_init,
|
||||||
|
.name = "default",
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct mptcp_sched_ops *mptcp_sched_find(const char *name)
|
||||||
|
{
|
||||||
|
struct mptcp_sched_ops *e;
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(e, &mptcp_sched_list, list) {
|
||||||
|
if (strcmp(e->name, name) == 0)
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!sched->get_subflow || !sched->next_segment)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
spin_lock(&mptcp_sched_list_lock);
|
||||||
|
if (mptcp_sched_find(sched->name)) {
|
||||||
|
pr_notice("%s already registered\n", sched->name);
|
||||||
|
ret = -EEXIST;
|
||||||
|
} else {
|
||||||
|
list_add_tail_rcu(&sched->list, &mptcp_sched_list);
|
||||||
|
pr_info("%s registered\n", sched->name);
|
||||||
|
}
|
||||||
|
spin_unlock(&mptcp_sched_list_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(mptcp_register_scheduler);
|
||||||
|
|
||||||
|
void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
|
||||||
|
{
|
||||||
|
spin_lock(&mptcp_sched_list_lock);
|
||||||
|
list_del_rcu(&sched->list);
|
||||||
|
spin_unlock(&mptcp_sched_list_lock);
|
||||||
|
|
||||||
|
/* Wait for outstanding readers to complete before the
|
||||||
|
* module gets removed entirely.
|
||||||
|
*
|
||||||
|
* A try_module_get() should fail by now as our module is
|
||||||
|
* in "going" state since no refs are held anymore and
|
||||||
|
* module_exit() handler being called.
|
||||||
|
*/
|
||||||
|
synchronize_rcu();
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(mptcp_unregister_scheduler);
|
||||||
|
|
||||||
|
void mptcp_get_default_scheduler(char *name)
|
||||||
|
{
|
||||||
|
struct mptcp_sched_ops *sched;
|
||||||
|
|
||||||
|
BUG_ON(list_empty(&mptcp_sched_list));
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
sched = list_entry(mptcp_sched_list.next, struct mptcp_sched_ops, list);
|
||||||
|
strncpy(name, sched->name, MPTCP_SCHED_NAME_MAX);
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
int mptcp_set_default_scheduler(const char *name)
|
||||||
|
{
|
||||||
|
struct mptcp_sched_ops *sched;
|
||||||
|
int ret = -ENOENT;
|
||||||
|
|
||||||
|
spin_lock(&mptcp_sched_list_lock);
|
||||||
|
sched = mptcp_sched_find(name);
|
||||||
|
#ifdef CONFIG_MODULES
|
||||||
|
if (!sched && capable(CAP_NET_ADMIN)) {
|
||||||
|
spin_unlock(&mptcp_sched_list_lock);
|
||||||
|
|
||||||
|
request_module("mptcp_%s", name);
|
||||||
|
spin_lock(&mptcp_sched_list_lock);
|
||||||
|
sched = mptcp_sched_find(name);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (sched) {
|
||||||
|
list_move(&sched->list, &mptcp_sched_list);
|
||||||
|
ret = 0;
|
||||||
|
} else {
|
||||||
|
pr_info("%s is not available\n", name);
|
||||||
|
}
|
||||||
|
spin_unlock(&mptcp_sched_list_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Must be called with rcu lock held */
|
||||||
|
static struct mptcp_sched_ops *__mptcp_sched_find_autoload(const char *name)
|
||||||
|
{
|
||||||
|
struct mptcp_sched_ops *sched = mptcp_sched_find(name);
|
||||||
|
#ifdef CONFIG_MODULES
|
||||||
|
if (!sched && capable(CAP_NET_ADMIN)) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
request_module("mptcp_%s", name);
|
||||||
|
rcu_read_lock();
|
||||||
|
sched = mptcp_sched_find(name);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return sched;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mptcp_init_scheduler(struct mptcp_cb *mpcb)
|
||||||
|
{
|
||||||
|
struct mptcp_sched_ops *sched;
|
||||||
|
struct sock *meta_sk = mpcb->meta_sk;
|
||||||
|
struct tcp_sock *meta_tp = tcp_sk(meta_sk);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
/* if scheduler was set using socket option */
|
||||||
|
if (meta_tp->mptcp_sched_setsockopt) {
|
||||||
|
sched = __mptcp_sched_find_autoload(meta_tp->mptcp_sched_name);
|
||||||
|
if (sched && try_module_get(sched->owner)) {
|
||||||
|
mpcb->sched_ops = sched;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
|
||||||
|
if (try_module_get(sched->owner)) {
|
||||||
|
mpcb->sched_ops = sched;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Change scheduler for socket */
|
||||||
|
int mptcp_set_scheduler(struct sock *sk, const char *name)
|
||||||
|
{
|
||||||
|
struct mptcp_sched_ops *sched;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
sched = __mptcp_sched_find_autoload(name);
|
||||||
|
|
||||||
|
if (!sched) {
|
||||||
|
err = -ENOENT;
|
||||||
|
} else if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
|
||||||
|
err = -EPERM;
|
||||||
|
} else {
|
||||||
|
strcpy(tcp_sk(sk)->mptcp_sched_name, name);
|
||||||
|
tcp_sk(sk)->mptcp_sched_setsockopt = 1;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Manage refcounts on socket close. */
|
||||||
|
void mptcp_cleanup_scheduler(struct mptcp_cb *mpcb)
|
||||||
|
{
|
||||||
|
module_put(mpcb->sched_ops->owner);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set default value from kernel configuration at bootup */
|
||||||
|
static int __init mptcp_scheduler_default(void)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(struct defsched_priv) > MPTCP_SCHED_SIZE);
|
||||||
|
|
||||||
|
return mptcp_set_default_scheduler(CONFIG_DEFAULT_MPTCP_SCHED);
|
||||||
|
}
|
||||||
|
late_initcall(mptcp_scheduler_default);
|
271
net/mptcp/mptcp_wvegas.c
Executable file
271
net/mptcp/mptcp_wvegas.c
Executable file
@ -0,0 +1,271 @@
|
|||||||
|
/*
|
||||||
|
* MPTCP implementation - WEIGHTED VEGAS
|
||||||
|
*
|
||||||
|
* Algorithm design:
|
||||||
|
* Yu Cao <cyAnalyst@126.com>
|
||||||
|
* Mingwei Xu <xmw@csnet1.cs.tsinghua.edu.cn>
|
||||||
|
* Xiaoming Fu <fu@cs.uni-goettinggen.de>
|
||||||
|
*
|
||||||
|
* Implementation:
|
||||||
|
* Yu Cao <cyAnalyst@126.com>
|
||||||
|
* Enhuan Dong <deh13@mails.tsinghua.edu.cn>
|
||||||
|
*
|
||||||
|
* Ported to the official MPTCP-kernel:
|
||||||
|
* Christoph Paasch <christoph.paasch@uclouvain.be>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version
|
||||||
|
* 2 of the License, or (at your option) any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/skbuff.h>
|
||||||
|
#include <net/tcp.h>
|
||||||
|
#include <net/mptcp.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/tcp.h>
|
||||||
|
|
||||||
|
static int initial_alpha = 2;
|
||||||
|
static int total_alpha = 10;
|
||||||
|
static int gamma = 1;
|
||||||
|
|
||||||
|
module_param(initial_alpha, int, 0644);
|
||||||
|
MODULE_PARM_DESC(initial_alpha, "initial alpha for all subflows");
|
||||||
|
module_param(total_alpha, int, 0644);
|
||||||
|
MODULE_PARM_DESC(total_alpha, "total alpha for all subflows");
|
||||||
|
module_param(gamma, int, 0644);
|
||||||
|
MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
|
||||||
|
|
||||||
|
#define MPTCP_WVEGAS_SCALE 16
|
||||||
|
|
||||||
|
/* wVegas variables */
|
||||||
|
struct wvegas {
|
||||||
|
u32 beg_snd_nxt; /* right edge during last RTT */
|
||||||
|
u8 doing_wvegas_now;/* if true, do wvegas for this RTT */
|
||||||
|
|
||||||
|
u16 cnt_rtt; /* # of RTTs measured within last RTT */
|
||||||
|
u32 sampled_rtt; /* cumulative RTTs measured within last RTT (in usec) */
|
||||||
|
u32 base_rtt; /* the min of all wVegas RTT measurements seen (in usec) */
|
||||||
|
|
||||||
|
u64 instant_rate; /* cwnd / srtt_us, unit: pkts/us * 2^16 */
|
||||||
|
u64 weight; /* the ratio of subflow's rate to the total rate, * 2^16 */
|
||||||
|
int alpha; /* alpha for each subflows */
|
||||||
|
|
||||||
|
u32 queue_delay; /* queue delay*/
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static inline u64 mptcp_wvegas_scale(u32 val, int scale)
|
||||||
|
{
|
||||||
|
return (u64) val << scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void wvegas_enable(const struct sock *sk)
|
||||||
|
{
|
||||||
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct wvegas *wvegas = inet_csk_ca(sk);
|
||||||
|
|
||||||
|
wvegas->doing_wvegas_now = 1;
|
||||||
|
|
||||||
|
wvegas->beg_snd_nxt = tp->snd_nxt;
|
||||||
|
|
||||||
|
wvegas->cnt_rtt = 0;
|
||||||
|
wvegas->sampled_rtt = 0;
|
||||||
|
|
||||||
|
wvegas->instant_rate = 0;
|
||||||
|
wvegas->alpha = initial_alpha;
|
||||||
|
wvegas->weight = mptcp_wvegas_scale(1, MPTCP_WVEGAS_SCALE);
|
||||||
|
|
||||||
|
wvegas->queue_delay = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void wvegas_disable(const struct sock *sk)
|
||||||
|
{
|
||||||
|
struct wvegas *wvegas = inet_csk_ca(sk);
|
||||||
|
|
||||||
|
wvegas->doing_wvegas_now = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_wvegas_init(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct wvegas *wvegas = inet_csk_ca(sk);
|
||||||
|
|
||||||
|
wvegas->base_rtt = 0x7fffffff;
|
||||||
|
wvegas_enable(sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 mptcp_wvegas_rate(u32 cwnd, u32 rtt_us)
|
||||||
|
{
|
||||||
|
return div_u64(mptcp_wvegas_scale(cwnd, MPTCP_WVEGAS_SCALE), rtt_us);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_wvegas_pkts_acked(struct sock *sk,
|
||||||
|
const struct ack_sample *sample)
|
||||||
|
{
|
||||||
|
struct wvegas *wvegas = inet_csk_ca(sk);
|
||||||
|
u32 vrtt;
|
||||||
|
|
||||||
|
if (sample->rtt_us < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
vrtt = sample->rtt_us + 1;
|
||||||
|
|
||||||
|
if (vrtt < wvegas->base_rtt)
|
||||||
|
wvegas->base_rtt = vrtt;
|
||||||
|
|
||||||
|
wvegas->sampled_rtt += vrtt;
|
||||||
|
wvegas->cnt_rtt++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_wvegas_state(struct sock *sk, u8 ca_state)
|
||||||
|
{
|
||||||
|
if (ca_state == TCP_CA_Open)
|
||||||
|
wvegas_enable(sk);
|
||||||
|
else
|
||||||
|
wvegas_disable(sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_wvegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
|
||||||
|
{
|
||||||
|
if (event == CA_EVENT_CWND_RESTART) {
|
||||||
|
mptcp_wvegas_init(sk);
|
||||||
|
} else if (event == CA_EVENT_LOSS) {
|
||||||
|
struct wvegas *wvegas = inet_csk_ca(sk);
|
||||||
|
wvegas->instant_rate = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u32 mptcp_wvegas_ssthresh(const struct tcp_sock *tp)
|
||||||
|
{
|
||||||
|
return min(tp->snd_ssthresh, tp->snd_cwnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static u64 mptcp_wvegas_weight(const struct mptcp_cb *mpcb, const struct sock *sk)
|
||||||
|
{
|
||||||
|
u64 total_rate = 0;
|
||||||
|
const struct wvegas *wvegas = inet_csk_ca(sk);
|
||||||
|
struct mptcp_tcp_sock *mptcp;
|
||||||
|
|
||||||
|
if (!mpcb)
|
||||||
|
return wvegas->weight;
|
||||||
|
|
||||||
|
|
||||||
|
mptcp_for_each_sub(mpcb, mptcp) {
|
||||||
|
struct sock *sub_sk = mptcp_to_sock(mptcp);
|
||||||
|
struct wvegas *sub_wvegas = inet_csk_ca(sub_sk);
|
||||||
|
|
||||||
|
/* sampled_rtt is initialized by 0 */
|
||||||
|
if (mptcp_sk_can_send(sub_sk) && (sub_wvegas->sampled_rtt > 0))
|
||||||
|
total_rate += sub_wvegas->instant_rate;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (total_rate && wvegas->instant_rate)
|
||||||
|
return div64_u64(mptcp_wvegas_scale(wvegas->instant_rate, MPTCP_WVEGAS_SCALE), total_rate);
|
||||||
|
else
|
||||||
|
return wvegas->weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mptcp_wvegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct wvegas *wvegas = inet_csk_ca(sk);
|
||||||
|
|
||||||
|
if (!wvegas->doing_wvegas_now) {
|
||||||
|
tcp_reno_cong_avoid(sk, ack, acked);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (after(ack, wvegas->beg_snd_nxt)) {
|
||||||
|
wvegas->beg_snd_nxt = tp->snd_nxt;
|
||||||
|
|
||||||
|
if (wvegas->cnt_rtt <= 2) {
|
||||||
|
tcp_reno_cong_avoid(sk, ack, acked);
|
||||||
|
} else {
|
||||||
|
u32 rtt, diff, q_delay;
|
||||||
|
u64 target_cwnd;
|
||||||
|
|
||||||
|
rtt = wvegas->sampled_rtt / wvegas->cnt_rtt;
|
||||||
|
target_cwnd = div_u64(((u64)tp->snd_cwnd * wvegas->base_rtt), rtt);
|
||||||
|
|
||||||
|
diff = div_u64((u64)tp->snd_cwnd * (rtt - wvegas->base_rtt), rtt);
|
||||||
|
|
||||||
|
if (diff > gamma && tcp_in_slow_start(tp)) {
|
||||||
|
tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
|
||||||
|
tp->snd_ssthresh = mptcp_wvegas_ssthresh(tp);
|
||||||
|
|
||||||
|
} else if (tcp_in_slow_start(tp)) {
|
||||||
|
tcp_slow_start(tp, acked);
|
||||||
|
} else {
|
||||||
|
if (diff >= wvegas->alpha) {
|
||||||
|
wvegas->instant_rate = mptcp_wvegas_rate(tp->snd_cwnd, rtt);
|
||||||
|
wvegas->weight = mptcp_wvegas_weight(tp->mpcb, sk);
|
||||||
|
wvegas->alpha = max(2U, (u32)((wvegas->weight * total_alpha) >> MPTCP_WVEGAS_SCALE));
|
||||||
|
}
|
||||||
|
if (diff > wvegas->alpha) {
|
||||||
|
tp->snd_cwnd--;
|
||||||
|
tp->snd_ssthresh = mptcp_wvegas_ssthresh(tp);
|
||||||
|
} else if (diff < wvegas->alpha) {
|
||||||
|
tp->snd_cwnd++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to drain link queue if needed*/
|
||||||
|
q_delay = rtt - wvegas->base_rtt;
|
||||||
|
if ((wvegas->queue_delay == 0) || (wvegas->queue_delay > q_delay))
|
||||||
|
wvegas->queue_delay = q_delay;
|
||||||
|
|
||||||
|
if (q_delay >= 2 * wvegas->queue_delay) {
|
||||||
|
u32 backoff_factor = div_u64(mptcp_wvegas_scale(wvegas->base_rtt, MPTCP_WVEGAS_SCALE), 2 * rtt);
|
||||||
|
tp->snd_cwnd = ((u64)tp->snd_cwnd * backoff_factor) >> MPTCP_WVEGAS_SCALE;
|
||||||
|
wvegas->queue_delay = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tp->snd_cwnd < 2)
|
||||||
|
tp->snd_cwnd = 2;
|
||||||
|
else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
|
||||||
|
tp->snd_cwnd = tp->snd_cwnd_clamp;
|
||||||
|
|
||||||
|
tp->snd_ssthresh = tcp_current_ssthresh(sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
wvegas->cnt_rtt = 0;
|
||||||
|
wvegas->sampled_rtt = 0;
|
||||||
|
}
|
||||||
|
/* Use normal slow start */
|
||||||
|
else if (tcp_in_slow_start(tp))
|
||||||
|
tcp_slow_start(tp, acked);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static struct tcp_congestion_ops mptcp_wvegas __read_mostly = {
|
||||||
|
.init = mptcp_wvegas_init,
|
||||||
|
.ssthresh = tcp_reno_ssthresh,
|
||||||
|
.cong_avoid = mptcp_wvegas_cong_avoid,
|
||||||
|
.undo_cwnd = tcp_reno_undo_cwnd,
|
||||||
|
.pkts_acked = mptcp_wvegas_pkts_acked,
|
||||||
|
.set_state = mptcp_wvegas_state,
|
||||||
|
.cwnd_event = mptcp_wvegas_cwnd_event,
|
||||||
|
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.name = "wvegas",
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init mptcp_wvegas_register(void)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(struct wvegas) > ICSK_CA_PRIV_SIZE);
|
||||||
|
tcp_register_congestion_control(&mptcp_wvegas);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit mptcp_wvegas_unregister(void)
|
||||||
|
{
|
||||||
|
tcp_unregister_congestion_control(&mptcp_wvegas);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(mptcp_wvegas_register);
|
||||||
|
module_exit(mptcp_wvegas_unregister);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Yu Cao, Enhuan Dong");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("MPTCP wVegas");
|
||||||
|
MODULE_VERSION("0.1");
|
Loading…
Reference in New Issue
Block a user