net: Port samsung MPTCP modifications from SM-N986B

- MultiPath TCP (MPTCP) is an effort towards enabling the simultaneous use of several IP-addresses/interfaces by a modification of TCP that presents a regular TCP interface to applications, while in fact spreading data across several subflows. Benefits of this include better resource utilization, better throughput and smoother reaction to failures. Change-Id: I50e8cbda93ed133fb6cb937b49b5d23879f92270 Signed-off-by: UtsavBalar1231 <utsavbalar1231@gmail.com>
2022-02-20 09:38:27 +05:30 · 2022-02-20 09:38:27 +05:30 · 98a694aafb
commit 98a694aafb
parent ea8506a15b
65 changed files with 20585 additions and 137 deletions
--- a/gen_headers_arm.bp
+++ b/gen_headers_arm.bp
@ -378,6 +378,7 @@ gen_headers_out_arm = [
    "linux/mmtimer.h",
    "linux/module.h",
    "linux/mpls.h",
+    "linux/mptcp.h",
    "linux/mpls_iptunnel.h",
    "linux/mqueue.h",
    "linux/mroute.h",
--- a/gen_headers_arm64.bp
+++ b/gen_headers_arm64.bp
@ -373,6 +373,7 @@ gen_headers_out_arm64 = [
    "linux/module.h",
    "linux/mpls.h",
    "linux/mpls_iptunnel.h",
+    "linux/mptcp.h",
    "linux/mqueue.h",
    "linux/mroute.h",
    "linux/mroute6.h",
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@ -696,7 +696,11 @@ struct sk_buff {
 	 * want to keep them across layers you have to do a skb_clone()
 	 * first. This is owned by whoever has the skb queued ATM.
 	 */
+#ifdef CONFIG_MPTCP
+	char			cb[80] __aligned(8);
+#else
 	char			cb[48] __aligned(8);
+#endif

 	union {
 		struct {
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@ -58,7 +58,11 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
 /* TCP Fast Open */
 #define TCP_FASTOPEN_COOKIE_MIN	4	/* Min Fast Open Cookie size in bytes */
 #define TCP_FASTOPEN_COOKIE_MAX	16	/* Max Fast Open Cookie size in bytes */
+#ifdef CONFIG_MPTCP
+#define TCP_FASTOPEN_COOKIE_SIZE 4	/* the size employed by this impl for MPTCP. */
+#else
 #define TCP_FASTOPEN_COOKIE_SIZE 8	/* the size employed by this impl. */
+#endif

 /* TCP Fast Open Cookie as stored in memory */
 struct tcp_fastopen_cookie {
@ -83,6 +87,56 @@ struct tcp_sack_block {
 	u32	end_seq;
 };

+#ifdef CONFIG_MPTCP
+struct tcp_out_options {
+	u16	options;	/* bit field of OPTION_* */
+	u16	mss;		/* 0 to disable */
+	u8	ws;		/* window scale, 0 to disable */
+	u8	num_sack_blocks;/* number of SACK blocks to include */
+	u8	hash_size;	/* bytes in hash_location */
+	__u8	*hash_location;	/* temporary pointer, overloaded */
+	__u32	tsval, tsecr;	/* need to include OPTION_TS */
+	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
+	u16	mptcp_options;	/* bit field of MPTCP related OPTION_* */
+	u8	dss_csum:1,	/* dss-checksum required? */
+		add_addr_v4:1,
+		add_addr_v6:1,
+		mptcp_ver:4;
+
+	union {
+		struct {
+			__u64	sender_key;	/* sender's key for mptcp */
+			__u64	receiver_key;	/* receiver's key for mptcp */
+		} mp_capable;
+
+		struct {
+			__u64	sender_truncated_mac;
+			__u32	sender_nonce;
+					/* random number of the sender */
+			__u32	token;	/* token for mptcp */
+			u8	low_prio:1;
+		} mp_join_syns;
+	};
+
+	struct {
+		__u64 trunc_mac;
+		struct in_addr addr;
+		u16 port;
+		u8 addr_id;
+	} add_addr4;
+
+	struct {
+		__u64 trunc_mac;
+		struct in6_addr addr;
+		u16 port;
+		u8 addr_id;
+	} add_addr6;
+
+	u16	remove_addrs;	/* list of address id */
+	u8	addr_id;	/* address id (mp_join or add_address) */
+};
+#endif
+
 /*These are used to set the sack_ok field in struct tcp_options_received */
 #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
 #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
@ -106,6 +160,11 @@ struct tcp_options_received {
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };

+#ifdef CONFIG_MPTCP
+struct mptcp_cb;
+struct mptcp_tcp_sock;
+#endif
+
 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 {
 	rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
@ -144,6 +203,10 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
 	return (struct tcp_request_sock *)req;
 }

+#ifdef CONFIG_MPTCP
+struct tcp_md5sig_key;
+#endif
+
 struct tcp_sock {
 	/* inet_connection_sock has to be the first member of tcp_sock */
 	struct inet_connection_sock	inet_conn;
@ -401,6 +464,43 @@ struct tcp_sock {
 	 */
 	struct request_sock *fastopen_rsk;
 	u32	*saved_syn;
+#ifdef CONFIG_MPTCP
+	/* MPTCP/TCP-specific callbacks */
+	const struct tcp_sock_ops	*ops;
+
+	struct mptcp_cb		*mpcb;
+	struct sock		*meta_sk;
+	/* We keep these flags even if CONFIG_MPTCP is not checked, because
+	 * it allows checking MPTCP capability just by checking the mpc flag,
+	 * rather than adding ifdefs everywhere.
+	 */
+	u32     mpc:1,          /* Other end is multipath capable */
+		inside_tk_table:1, /* Is the tcp_sock inside the token-table? */
+		send_mp_fclose:1,
+		request_mptcp:1, /* Did we send out an MP_CAPABLE?
+				  * (this speeds up mptcp_doit() in tcp_recvmsg)
+				  */
+		pf:1, /* Potentially Failed state: when this flag is set, we
+		       * stop using the subflow
+		       */
+		mp_killed:1, /* Killed with a tcp_done in mptcp? */
+		is_master_sk:1,
+		close_it:1,	/* Must close socket in mptcp_data_ready? */
+		closing:1,
+		mptcp_ver:4,
+		mptcp_sched_setsockopt:1,
+		mptcp_pm_setsockopt:1,
+		record_master_info:1,
+		tcp_disconnect:1;
+	struct mptcp_tcp_sock *mptcp;
+#define MPTCP_SCHED_NAME_MAX 16
+#define MPTCP_PM_NAME_MAX 16
+	struct hlist_nulls_node tk_table;
+	u32		mptcp_loc_token;
+	u64		mptcp_loc_key;
+	char		mptcp_sched_name[MPTCP_SCHED_NAME_MAX];
+	char		mptcp_pm_name[MPTCP_PM_NAME_MAX];
+#endif /* CONFIG_MPTCP */
 };

 enum tsq_enum {
@ -412,6 +512,10 @@ enum tsq_enum {
 	TCP_MTU_REDUCED_DEFERRED,  /* tcp_v{4|6}_err() could not call
 				    * tcp_v{4|6}_mtu_reduced()
 				    */
+#ifdef CONFIG_MPTCP
+	MPTCP_PATH_MANAGER_DEFERRED, /* MPTCP deferred creation of new subflows */
+	MPTCP_SUB_DEFERRED, /* A subflow got deferred - process them */
+#endif
 };

 enum tsq_flags {
@ -421,6 +525,10 @@ enum tsq_flags {
 	TCPF_WRITE_TIMER_DEFERRED	= (1UL << TCP_WRITE_TIMER_DEFERRED),
 	TCPF_DELACK_TIMER_DEFERRED	= (1UL << TCP_DELACK_TIMER_DEFERRED),
 	TCPF_MTU_REDUCED_DEFERRED	= (1UL << TCP_MTU_REDUCED_DEFERRED),
+#ifdef CONFIG_MPTCP
+	TCPF_PATH_MANAGER_DEFERRED	= (1UL << MPTCP_PATH_MANAGER_DEFERRED),
+	TCPF_SUB_DEFERRED		= (1UL << MPTCP_SUB_DEFERRED),
+#endif
 };

 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@ -443,6 +551,9 @@ struct tcp_timewait_sock {
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	  *tw_md5_key;
 #endif
+#ifdef CONFIG_MPTCP
+	struct mptcp_tw		  *mptcp_tw;
+#endif
 };

 static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@ -2,6 +2,10 @@
 #ifndef _INET_COMMON_H
 #define _INET_COMMON_H

+#ifdef CONFIG_MPTCP
+#include <net/sock.h>
+#endif
+
 extern const struct proto_ops inet_stream_ops;
 extern const struct proto_ops inet_dgram_ops;

@ -14,6 +18,11 @@ struct sock;
 struct sockaddr;
 struct socket;

+#ifdef CONFIG_MPTCP
+int inet_create(struct net *net, struct socket *sock, int protocol, int kern);
+int inet6_create(struct net *net, struct socket *sock, int protocol, int kern);
+#endif
+
 int inet_release(struct socket *sock);
 int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 			int addr_len, int flags);
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@ -29,6 +29,9 @@

 struct inet_bind_bucket;
 struct tcp_congestion_ops;
+#ifdef CONFIG_MPTCP
+struct tcp_options_received;
+#endif

 /*
 * Pointers to address related TCP functions
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@ -83,6 +83,20 @@ struct inet_request_sock {
 #define ireq_state		req.__req_common.skc_state
 #define ireq_family		req.__req_common.skc_family

+#ifdef CONFIG_MPTCP
+		u32		snd_wscale : 4,
+				rcv_wscale : 4,
+				tstamp_ok  : 1,
+				sack_ok	   : 1,
+				wscale_ok  : 1,
+				ecn_ok	   : 1,
+				acked	   : 1,
+				no_srccheck: 1,
+				mptcp_rqsk : 1,
+				saw_mpc    : 1,
+				smc_ok	   : 1;
+	u32                     ir_mark;
+#else
 	u16			snd_wscale : 4,
 				rcv_wscale : 4,
 				tstamp_ok  : 1,
@ -93,6 +107,7 @@ struct inet_request_sock {
 				no_srccheck: 1,
 				smc_ok	   : 1;
 	u32                     ir_mark;
+#endif
 	union {
 		struct ip_options_rcu __rcu	*ireq_opt;
 #if IS_ENABLED(CONFIG_IPV6)
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
--- a/include/net/mptcp_v4.h
+++ b/include/net/mptcp_v4.h
@ -0,0 +1,76 @@
+/*
+ *	MPTCP implementation
+ *
+ *	Initial Design & Implementation:
+ *	Sébastien Barré <sebastien.barre@uclouvain.be>
+ *
+ *	Current Maintainer & Author:
+ *	Christoph Paasch <christoph.paasch@uclouvain.be>
+ *
+ *	Additional authors:
+ *	Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
+ *	Gregory Detal <gregory.detal@uclouvain.be>
+ *	Fabien Duchêne <fabien.duchene@uclouvain.be>
+ *	Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
+ *	Lavkesh Lahngir <lavkesh51@gmail.com>
+ *	Andreas Ripke <ripke@neclab.eu>
+ *	Vlad Dogaru <vlad.dogaru@intel.com>
+ *	Octavian Purdila <octavian.purdila@intel.com>
+ *	John Ronan <jronan@tssg.org>
+ *	Catalin Nicutar <catalin.nicutar@gmail.com>
+ *	Brandon Heller <brandonh@stanford.edu>
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef MPTCP_V4_H_
+#define MPTCP_V4_H_
+
+
+#include <linux/in.h>
+#include <linux/skbuff.h>
+#include <net/mptcp.h>
+#include <net/request_sock.h>
+#include <net/sock.h>
+
+extern struct request_sock_ops mptcp_request_sock_ops;
+extern const struct inet_connection_sock_af_ops mptcp_v4_specific;
+extern struct tcp_request_sock_ops mptcp_request_sock_ipv4_ops;
+extern struct tcp_request_sock_ops mptcp_join_request_sock_ipv4_ops;
+
+#ifdef CONFIG_MPTCP
+
+int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb);
+struct sock *mptcp_v4_search_req(const __be16 rport, const __be32 raddr,
+				 const __be32 laddr, const struct net *net);
+int __mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc,
+			     __be16 sport, struct mptcp_rem4 *rem,
+			     struct sock **subsk);
+int mptcp_pm_v4_init(void);
+void mptcp_pm_v4_undo(void);
+u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport);
+u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
+		     u32 seed);
+
+static inline int mptcp_init4_subsockets(struct sock *meta_sk,
+					 const struct mptcp_loc4 *loc,
+					 struct mptcp_rem4 *rem)
+{
+	return __mptcp_init4_subsockets(meta_sk, loc, 0, rem, NULL);
+}
+
+#else
+
+static inline int mptcp_v4_do_rcv(const struct sock *meta_sk,
+				  const struct sk_buff *skb)
+{
+	return 0;
+}
+
+#endif /* CONFIG_MPTCP */
+
+#endif /* MPTCP_V4_H_ */
--- a/include/net/mptcp_v6.h
+++ b/include/net/mptcp_v6.h
@ -0,0 +1,77 @@
+/*
+ *	MPTCP implementation
+ *
+ *	Initial Design & Implementation:
+ *	Sébastien Barré <sebastien.barre@uclouvain.be>
+ *
+ *	Current Maintainer & Author:
+ *	Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
+ *
+ *	Additional authors:
+ *	Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
+ *	Gregory Detal <gregory.detal@uclouvain.be>
+ *	Fabien Duchêne <fabien.duchene@uclouvain.be>
+ *	Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
+ *	Lavkesh Lahngir <lavkesh51@gmail.com>
+ *	Andreas Ripke <ripke@neclab.eu>
+ *	Vlad Dogaru <vlad.dogaru@intel.com>
+ *	Octavian Purdila <octavian.purdila@intel.com>
+ *	John Ronan <jronan@tssg.org>
+ *	Catalin Nicutar <catalin.nicutar@gmail.com>
+ *	Brandon Heller <brandonh@stanford.edu>
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _MPTCP_V6_H
+#define _MPTCP_V6_H
+
+#include <linux/in6.h>
+#include <net/if_inet6.h>
+
+#include <net/mptcp.h>
+
+
+#ifdef CONFIG_MPTCP
+extern const struct inet_connection_sock_af_ops mptcp_v6_mapped;
+extern const struct inet_connection_sock_af_ops mptcp_v6_specific;
+extern struct request_sock_ops mptcp6_request_sock_ops;
+extern struct tcp_request_sock_ops mptcp_request_sock_ipv6_ops;
+extern struct tcp_request_sock_ops mptcp_join_request_sock_ipv6_ops;
+
+int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb);
+struct sock *mptcp_v6_search_req(const __be16 rport, const struct in6_addr *raddr,
+				 const struct in6_addr *laddr, const struct net *net);
+int __mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc,
+			     __be16 sport, struct mptcp_rem6 *rem,
+			     struct sock **subsk);
+int mptcp_pm_v6_init(void);
+void mptcp_pm_v6_undo(void);
+__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr,
+			 __be16 sport, __be16 dport);
+u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr,
+		     __be16 sport, __be16 dport, u32 seed);
+
+static inline int mptcp_init6_subsockets(struct sock *meta_sk,
+					 const struct mptcp_loc6 *loc,
+					 struct mptcp_rem6 *rem)
+{
+	return __mptcp_init6_subsockets(meta_sk, loc, 0, rem, NULL);
+}
+
+#else /* CONFIG_MPTCP */
+
+#define mptcp_v6_mapped ipv6_mapped
+
+static inline int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
+{
+	return 0;
+}
+
+#endif /* CONFIG_MPTCP */
+
+#endif /* _MPTCP_V6_H */
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@ -19,6 +19,9 @@
 #include <net/netns/packet.h>
 #include <net/netns/ipv4.h>
 #include <net/netns/ipv6.h>
+#ifdef CONFIG_MPTCP
+#include <net/netns/mptcp.h>
+#endif
 #include <net/netns/ieee802154_6lowpan.h>
 #include <net/netns/sctp.h>
 #include <net/netns/dccp.h>
@ -110,6 +113,9 @@ struct net {
 #if IS_ENABLED(CONFIG_IPV6)
 	struct netns_ipv6	ipv6;
 #endif
+#if IS_ENABLED(CONFIG_MPTCP)
+	struct netns_mptcp	mptcp;
+#endif
 #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
 	struct netns_ieee802154_lowpan	ieee802154_lowpan;
 #endif
--- a/include/net/netns/mptcp.h
+++ b/include/net/netns/mptcp.h
@ -0,0 +1,52 @@
+/*
+ *	MPTCP implementation - MPTCP namespace
+ *
+ *	Initial Design & Implementation:
+ *	Sébastien Barré <sebastien.barre@uclouvain.be>
+ *
+ *	Current Maintainer:
+ *	Christoph Paasch <christoph.paasch@uclouvain.be>
+ *
+ *	Additional authors:
+ *	Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
+ *	Gregory Detal <gregory.detal@uclouvain.be>
+ *	Fabien Duchêne <fabien.duchene@uclouvain.be>
+ *	Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
+ *	Lavkesh Lahngir <lavkesh51@gmail.com>
+ *	Andreas Ripke <ripke@neclab.eu>
+ *	Vlad Dogaru <vlad.dogaru@intel.com>
+ *	Octavian Purdila <octavian.purdila@intel.com>
+ *	John Ronan <jronan@tssg.org>
+ *	Catalin Nicutar <catalin.nicutar@gmail.com>
+ *	Brandon Heller <brandonh@stanford.edu>
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __NETNS_MPTCP_H__
+#define __NETNS_MPTCP_H__
+
+#include <linux/compiler.h>
+
+enum {
+	MPTCP_PM_FULLMESH = 0,
+	MPTCP_PM_MAX
+};
+
+struct mptcp_mib;
+
+struct netns_mptcp {
+	DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics);
+
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *proc_net_mptcp;
+#endif
+
+	void *path_managers[MPTCP_PM_MAX];
+};
+
+#endif /* __NETNS_MPTCP_H__ */
--- a/include/net/sock.h
+++ b/include/net/sock.h
@ -835,6 +835,9 @@ enum sock_flags {
 	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
 	SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
 	SOCK_TXTIME,
+#ifdef CONFIG_MPTCP
+	SOCK_MPTCP, /* MPTCP set on this socket */
+#endif
 };

 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@ -1143,6 +1146,10 @@ struct proto {
 	void			(*rehash)(struct sock *sk);
 	int			(*get_port)(struct sock *sk, unsigned short snum);

+#ifdef CONFIG_MPTCP
+	void			(*clear_sk)(struct sock *sk, int size);
+#endif
+
 	/* Keeping track of sockets in use */
 #ifdef CONFIG_PROC_FS
 	unsigned int		inuse_idx;
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@ -188,6 +188,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
 #define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
+#ifdef CONFIG_MPTCP
+#define TCPOPT_MPTCP	30
+#endif
 #define TCPOPT_FASTOPEN		34	/* Fast open (RFC7413) */
 #define TCPOPT_EXP		254	/* Experimental */
 /* Magic number to be after the option value for sharing TCP
@ -244,6 +247,33 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 */
 #define	TFO_SERVER_WO_SOCKOPT1	0x400

+#ifdef CONFIG_MPTCP
+/* Flags from tcp_input.c for tcp_ack */
+#define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
+#define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
+#define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
+#define FLAG_RETRANS_DATA_ACKED	0x08 /* "" "" some of which was retransmitted.	*/
+#define FLAG_SYN_ACKED		0x10 /* This ACK acknowledged SYN.		*/
+#define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
+#define FLAG_ECE		0x40 /* ECE in this ACK				*/
+#define FLAG_LOST_RETRANS	0x80 /* This ACK marks some retransmission lost */
+#define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
+#define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/
+#define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
+#define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
+#define FLAG_SET_XMIT_TIMER	0x1000 /* Set TLP or RTO timer */
+#define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */
+#define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */
+#define FLAG_NO_CHALLENGE_ACK	0x8000 /* do not call tcp_send_challenge_ack()	*/
+#define FLAG_ACK_MAYBE_DELAYED	0x10000 /* Likely a delayed ACK */
+
+#define MPTCP_FLAG_DATA_ACKED	0x20000
+
+#define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
+#define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
+#define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
+#define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
+#endif

 /* sysctl variables for tcp */
 extern int sysctl_tcp_max_orphans;
@ -321,6 +351,98 @@ extern struct proto tcp_prot;
 #define TCP_DEC_STATS(net, field)	SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
 #define TCP_ADD_STATS(net, field, val)	SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)

+#ifdef CONFIG_MPTCP
+/**** START - Exports needed for MPTCP ****/
+extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
+extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
+
+struct mptcp_options_received;
+
+void tcp_cleanup_rbuf(struct sock *sk, int copied);
+void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited);
+int tcp_close_state(struct sock *sk);
+void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
+			 const struct sk_buff *skb);
+int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib);
+void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb);
+int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+		     gfp_t gfp_mask);
+unsigned int tcp_mss_split_point(const struct sock *sk,
+				 const struct sk_buff *skb,
+				 unsigned int mss_now,
+				 unsigned int max_segs,
+				 int nonagle);
+bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
+		    unsigned int cur_mss, int nonagle);
+bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
+		      unsigned int cur_mss);
+unsigned int tcp_cwnd_test(const struct tcp_sock *tp, const struct sk_buff *skb);
+int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now);
+int __pskb_trim_head(struct sk_buff *skb, int len);
+void tcp_queue_skb(struct sock *sk, struct sk_buff *skb);
+void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags);
+void tcp_reset(struct sock *sk);
+bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
+			   const u32 ack_seq, const u32 nwin);
+bool tcp_urg_mode(const struct tcp_sock *tp);
+void tcp_ack_probe(struct sock *sk);
+void tcp_rearm_rto(struct sock *sk);
+int tcp_write_timeout(struct sock *sk);
+bool retransmits_timed_out(struct sock *sk,
+			   unsigned int boundary,
+			   unsigned int timeout);
+void tcp_write_err(struct sock *sk);
+void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr);
+void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb);
+void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now);
+
+void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+			   struct request_sock *req);
+void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb);
+struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb);
+void tcp_v4_reqsk_destructor(struct request_sock *req);
+
+void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+			   struct request_sock *req);
+void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
+struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb);
+int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
+int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+void tcp_v6_destroy_sock(struct sock *sk);
+void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
+void tcp_v6_hash(struct sock *sk);
+struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb);
+struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
+				  struct request_sock *req,
+				  struct dst_entry *dst,
+				  struct request_sock *req_unhash,
+				  bool *own_req);
+void tcp_v6_reqsk_destructor(struct request_sock *req);
+
+unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
+				       int large_allowed);
+u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb);
+
+void skb_clone_fraglist(struct sk_buff *skb);
+
+void inet_twsk_free(struct inet_timewait_sock *tw);
+int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb);
+/* These states need RST on ABORT according to RFC793 */
+static inline bool tcp_need_reset(int state)
+{
+	return (1 << state) &
+	       (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
+		TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
+}
+
+int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
+			       bool *fragstolen);
+void tcp_ofo_queue(struct sock *sk);
+void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb);
+int linear_payload_sz(bool first_skb);
+/**** END - Exports needed for MPTCP ****/
+#endif
+
 void tcp_tasklet_init(void);

 void tcp_v4_err(struct sk_buff *skb, u32);
@ -428,7 +550,14 @@ int tcp_mmap(struct file *file, struct socket *sock,
 	     struct vm_area_struct *vma);
 void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx,
-		       int estab, struct tcp_fastopen_cookie *foc);
+#ifdef CONFIG_MPTCP
+		       struct mptcp_options_received *mopt_rx,
+#endif
+		       int estab, struct tcp_fastopen_cookie *foc
+#ifdef CONFIG_MPTCP
+		       , struct tcp_sock *tp
+#endif
+			);
 const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);

 /*
@ -437,6 +566,9 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);

 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
 void tcp_v4_mtu_reduced(struct sock *sk);
+#ifdef CONFIG_MPTCP
+void tcp_v6_mtu_reduced(struct sock *sk);
+#endif
 void tcp_req_err(struct sock *sk, u32 seq, bool abort);
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_create_openreq_child(const struct sock *sk,
@ -554,7 +686,12 @@ static inline u32 tcp_cookie_time(void)

 u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
 			      u16 *mssp);
+#ifdef CONFIG_MPTCP
+__u32 cookie_v4_init_sequence(struct request_sock *req, const struct sock *sk,
+			      const struct sk_buff *skb, __u16 *mss);
+#else
 __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
+#endif
 u64 cookie_init_timestamp(struct request_sock *req);
 bool cookie_timestamp_decode(const struct net *net,
 			     struct tcp_options_received *opt);
@ -568,8 +705,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);

 u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
 			      const struct tcphdr *th, u16 *mssp);
+#ifdef CONFIG_MPTCP
+__u32 cookie_v6_init_sequence(struct request_sock *req, const struct sock *sk,
+			      const struct sk_buff *skb, __u16 *mss);
+#else
 __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
 #endif
+#endif
 /* tcp_output.c */

 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
@ -604,10 +746,20 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto);
 void tcp_skb_collapse_tstamp(struct sk_buff *skb,
 			     const struct sk_buff *next_skb);

+#ifdef CONFIG_MPTCP
+u16 tcp_select_window(struct sock *sk);
+bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
+		int push_one, gfp_t gfp);
+#endif
+
 /* tcp_input.c */
 void tcp_rearm_rto(struct sock *sk);
 void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
 void tcp_reset(struct sock *sk);
+#ifdef CONFIG_MPTCP
+void tcp_set_rto(struct sock *sk);
+bool tcp_should_expand_sndbuf(const struct sock *sk);
+#endif
 void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
 void tcp_fin(struct sock *sk);
 void tcp_check_space(struct sock *sk);
@ -652,7 +804,11 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 }

 /* tcp.c */
+#ifdef CONFIG_MPTCP
+void tcp_get_info(struct sock *, struct tcp_info *, bool no_lock);
+#else
 void tcp_get_info(struct sock *, struct tcp_info *);
+#endif

 /* Read 'sendfile()'-style from a TCP socket */
 int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
@ -840,6 +996,12 @@ struct tcp_skb_cb {
 			u16	tcp_gso_size;
 		};
 	};
+#ifdef CONFIG_MPTCP
+	__u8		mptcp_flags;	/* flags for the MPTCP layer    */
+	__u8		dss_off;	/* Number of 4-byte words until
+					 * seq-number
+					 */
+#endif
 	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/

 	__u8		sacked;		/* State flags for SACK.	*/
@ -858,6 +1020,12 @@ struct tcp_skb_cb {
 			has_rxtstamp:1,	/* SKB has a RX timestamp	*/
 			unused:5;
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
+#ifdef CONFIG_MPTCP
+	union {			/* For MPTCP outgoing frames */
+		__u32 path_mask; /* paths that tried to send this skb */
+		__u32 dss[6];	/* DSS options */
+	};
+#endif
 	union {
 		struct {
 			/* There is space for up to 24 bytes */
@ -1381,6 +1549,19 @@ static inline int tcp_win_from_space(const struct sock *sk, int space)
 		space - (space>>tcp_adv_win_scale);
 }

+#ifdef CONFIG_MPTCP
+extern struct static_key mptcp_static_key;
+static inline bool mptcp(const struct tcp_sock *tp)
+{
+	return static_key_false(&mptcp_static_key) && tp->mpc;
+}
+#else
+static inline bool mptcp(const struct tcp_sock *tp)
+{
+	return 0;
+}
+#endif
+
 /* Note: caller must be prepared to deal with negative returns */
 static inline int tcp_space(const struct sock *sk)
 {
@ -1932,6 +2113,32 @@ struct tcp_sock_af_ops {
 #endif
 };

+#ifdef CONFIG_MPTCP
+/* TCP/MPTCP-specific functions */
+struct tcp_sock_ops {
+	u32 (*__select_window)(struct sock *sk);
+	u16 (*select_window)(struct sock *sk);
+	void (*select_initial_window)(const struct sock *sk, int __space,
+				      __u32 mss, __u32 *rcv_wnd,
+				      __u32 *window_clamp, int wscale_ok,
+				      __u8 *rcv_wscale, __u32 init_rcv_wnd);
+	int (*select_size)(const struct sock *sk, bool first_skb, bool zc);
+	void (*init_buffer_space)(struct sock *sk);
+	void (*set_rto)(struct sock *sk);
+	bool (*should_expand_sndbuf)(const struct sock *sk);
+	void (*send_fin)(struct sock *sk);
+	bool (*write_xmit)(struct sock *sk, unsigned int mss_now, int nonagle,
+			   int push_one, gfp_t gfp);
+	void (*send_active_reset)(struct sock *sk, gfp_t priority);
+	int (*write_wakeup)(struct sock *sk, int mib);
+	void (*retransmit_timer)(struct sock *sk);
+	void (*time_wait)(struct sock *sk, int state, int timeo);
+	void (*cleanup_rbuf)(struct sock *sk, int copied);
+	void (*cwnd_validate)(struct sock *sk, bool is_cwnd_limited);
+};
+extern const struct tcp_sock_ops tcp_specific;
+#endif
+
 struct tcp_request_sock_ops {
 	u16 mss_clamp;
 #ifdef CONFIG_TCP_MD5SIG
@ -1942,12 +2149,26 @@ struct tcp_request_sock_ops {
 					  const struct sock *sk,
 					  const struct sk_buff *skb);
 #endif
+#ifdef CONFIG_MPTCP
+	int (*init_req)(struct request_sock *req,
+			const struct sock *sk_listener,
+			struct sk_buff *skb,
+			bool want_cookie);
+#else
 	void (*init_req)(struct request_sock *req,
 			 const struct sock *sk_listener,
 			 struct sk_buff *skb);
+#endif
+
 #ifdef CONFIG_SYN_COOKIES
+#ifdef CONFIG_MPTCP
+	__u32 (*cookie_init_seq)(struct request_sock *req, const struct sock *sk,
+				 const struct sk_buff *skb, __u16 *mss);
+#else
 	__u32 (*cookie_init_seq)(const struct sk_buff *skb,
 				 __u16 *mss);
+#endif
+
 #endif
 	struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
 				       const struct request_sock *req);
@ -1965,18 +2186,36 @@ extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
 #endif

 #ifdef CONFIG_SYN_COOKIES
+#ifdef CONFIG_MPTCP
 static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
+					 struct request_sock *req,
 					 const struct sock *sk, struct sk_buff *skb,
 					 __u16 *mss)
-{
-	tcp_synq_overflow(sk);
-	__NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-	return ops->cookie_init_seq(skb, mss);
-}
 #else
 static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
 					 const struct sock *sk, struct sk_buff *skb,
 					 __u16 *mss)
+#endif
+{
+	tcp_synq_overflow(sk);
+	__NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+#ifdef CONFIG_MPTCP
+	return ops->cookie_init_seq(req, sk, skb, mss);
+#else
+	return ops->cookie_init_seq(skb, mss);
+#endif
+}
+#else
+#ifdef CONFIG_MPTCP
+static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
+					 struct request_sock *req,
+					 const struct sock *sk, struct sk_buff *skb,
+					 __u16 *mss)
+#else
+static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
+					 const struct sock *sk, struct sk_buff *skb,
+					 __u16 *mss)
+#endif
 {
 	return 0;
 }
--- a/include/net/tcp_states.h
+++ b/include/net/tcp_states.h
@ -26,6 +26,9 @@ enum {
 	TCP_LISTEN,
 	TCP_CLOSING,	/* Now a valid state */
 	TCP_NEW_SYN_RECV,
+#ifdef CONFIG_MPTCP
+	TCP_RST_WAIT,
+#endif

 	TCP_MAX_STATES	/* Leave at the end! */
 };
@ -47,6 +50,9 @@ enum {
 	TCPF_LISTEN	 = (1 << TCP_LISTEN),
 	TCPF_CLOSING	 = (1 << TCP_CLOSING),
 	TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
+#ifdef CONFIG_MPTCP
+	TCPF_RST_WAIT	 = (1 << TCP_RST_WAIT),
+#endif
 };

 #endif	/* _LINUX_TCP_STATES_H */
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@ -58,6 +58,10 @@ ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp,

 /* address family specific functions */
 extern const struct inet_connection_sock_af_ops ipv4_specific;
+#ifdef CONFIG_MPTCP
+extern const struct inet_connection_sock_af_ops ipv6_mapped;
+extern const struct inet_connection_sock_af_ops ipv6_specific;
+#endif

 void inet6_destroy_sock(struct sock *sk);

--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@ -10,6 +10,9 @@
 #include <linux/tracepoint.h>
 #include <net/ipv6.h>
 #include <net/tcp.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#endif
 #include <linux/sock_diag.h>

 #define TP_STORE_V4MAPPED(__entry, saddr, daddr)		\
@ -178,6 +181,15 @@ DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
 	TP_ARGS(sk)
 );

+#ifdef CONFIG_MPTCP
+DEFINE_EVENT(tcp_event_sk_skb, mptcp_retransmit,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+
+	TP_ARGS(sk, skb)
+);
+#endif
+
 TRACE_EVENT(tcp_retransmit_synack,

 	TP_PROTO(const struct sock *sk, const struct request_sock *req),
@ -245,6 +257,9 @@ TRACE_EVENT(tcp_probe,
 		__field(__u32, srtt)
 		__field(__u32, rcv_wnd)
 		__field(__u64, sock_cookie)
+#ifdef CONFIG_MPTCP
+		__field(__u8, mptcp)
+#endif
 	),

 	TP_fast_assign(
@ -271,13 +286,25 @@ TRACE_EVENT(tcp_probe,
 		__entry->ssthresh = tcp_current_ssthresh(sk);
 		__entry->srtt = tp->srtt_us >> 3;
 		__entry->sock_cookie = sock_gen_cookie(sk);
+#ifdef CONFIG_MPTCP
+		__entry->mptcp = mptcp(tp) ? tp->mptcp->path_index : 0;
+#endif
 	),

+#ifdef CONFIG_MPTCP
+	TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx mptcp=%d",
+		  __entry->saddr, __entry->daddr, __entry->mark,
+		  __entry->data_len, __entry->snd_nxt, __entry->snd_una,
+		  __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
+		  __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie,
+		  __entry->mptcp)
+#else
 	TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx",
 		  __entry->saddr, __entry->daddr, __entry->mark,
 		  __entry->data_len, __entry->snd_nxt, __entry->snd_una,
 		  __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
 		  __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
+#endif
 );

 #endif /* _TRACE_TCP_H */
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@ -2740,6 +2740,9 @@ enum {
 	BPF_TCP_LISTEN,
 	BPF_TCP_CLOSING,	/* Now a valid state */
 	BPF_TCP_NEW_SYN_RECV,
+#ifdef CONFIG_MPTCP
+	BPF_TCP_RST_WAIT,
+#endif

 	BPF_TCP_MAX_STATES	/* Leave at the end! */
 };
--- a/include/uapi/linux/if.h
+++ b/include/uapi/linux/if.h
@ -132,6 +132,11 @@ enum net_device_flags {
 #define IFF_ECHO			IFF_ECHO
 #endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */

+#ifdef CONFIG_MPTCP
+#define IFF_NOMULTIPATH	0x80000		/* Disable for MPTCP		*/
+#define IFF_MPBACKUP	0x100000	/* Use as backup path for MPTCP */
+#endif
+
 #define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
 		IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)

--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Netlink API for Multipath TCP
+ *
+ * Author: Gregory Detal <gregory.detal@tessares.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_MPTCP_H
+#define _LINUX_MPTCP_H
+
+#define MPTCP_GENL_NAME		"mptcp"
+#define MPTCP_GENL_EV_GRP_NAME	"mptcp_events"
+#define MPTCP_GENL_CMD_GRP_NAME "mptcp_commands"
+#define MPTCP_GENL_VER		0x1
+
+/*
+ * ATTR types defined for MPTCP
+ */
+enum {
+	MPTCP_ATTR_UNSPEC = 0,
+
+	MPTCP_ATTR_TOKEN,	/* u32 */
+	MPTCP_ATTR_FAMILY,	/* u16 */
+	MPTCP_ATTR_LOC_ID,	/* u8 */
+	MPTCP_ATTR_REM_ID,	/* u8 */
+	MPTCP_ATTR_SADDR4,	/* u32 */
+	MPTCP_ATTR_SADDR6,	/* struct in6_addr */
+	MPTCP_ATTR_DADDR4,	/* u32 */
+	MPTCP_ATTR_DADDR6,	/* struct in6_addr */
+	MPTCP_ATTR_SPORT,	/* u16 */
+	MPTCP_ATTR_DPORT,	/* u16 */
+	MPTCP_ATTR_BACKUP,	/* u8 */
+	MPTCP_ATTR_ERROR,	/* u8 */
+	MPTCP_ATTR_FLAGS,	/* u16 */
+	MPTCP_ATTR_TIMEOUT,	/* u32 */
+	MPTCP_ATTR_IF_IDX,	/* s32 */
+
+	__MPTCP_ATTR_AFTER_LAST
+};
+
+#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1)
+
+/*
+ * Events generated by MPTCP:
+ *   - MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ *                          sport, dport
+ *       A new connection has been created. It is the good time to allocate
+ *       memory and send ADD_ADDR if needed. Depending on the traffic-patterns
+ *       it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent.
+ *
+ *   - MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ *                              sport, dport
+ *       A connection is established (can start new subflows).
+ *
+ *   - MPTCP_EVENT_CLOSED: token
+ *       A connection has stopped.
+ *
+ *   - MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport]
+ *       A new address has been announced by the peer.
+ *
+ *   - MPTCP_EVENT_REMOVED: token, rem_id
+ *       An address has been lost by the peer.
+ *
+ *   - MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6,
+ *                                  daddr4 | daddr6, sport, dport, backup,
+ *                                  if_idx [, error]
+ *       A new subflow has been established. 'error' should not be set.
+ *
+ *   - MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ *                             sport, dport, backup, if_idx [, error]
+ *       A subflow has been closed. An error (copy of sk_err) could be set if an
+ *       error has been detected for this subflow.
+ *
+ *   - MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ *                               sport, dport, backup, if_idx [, error]
+ *       The priority of a subflow has changed. 'error' should not be set.
+ *
+ * Commands for MPTCP:
+ *   - MPTCP_CMD_ANNOUNCE: token, loc_id, family, saddr4 | saddr6 [, sport]
+ *       Announce a new address to the peer.
+ *
+ *   - MPTCP_CMD_REMOVE: token, loc_id
+ *       Announce that an address has been lost to the peer.
+ *
+ *   - MPTCP_CMD_SUB_CREATE: token, family, loc_id, rem_id, [saddr4 | saddr6,
+ *                           daddr4 | daddr6, dport [, sport, backup, if_idx]]
+ *       Create a new subflow.
+ *
+ *   - MPTCP_CMD_SUB_DESTROY: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ *                            sport, dport
+ *       Close a subflow.
+ *
+ *   - MPTCP_CMD_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ *                             sport, dport, backup
+ *       Change the priority of a subflow.
+ *
+ *   - MPTCP_CMD_SET_FILTER: flags
+ *       Set the filter on events. Set MPTCPF_* flags to only receive specific
+ *       events. Default is to receive all events.
+ *
+ *   - MPTCP_CMD_EXIST: token
+ *       Check if this token is linked to an existing socket.
+ */
+enum {
+	MPTCP_CMD_UNSPEC = 0,
+
+	MPTCP_EVENT_CREATED,
+	MPTCP_EVENT_ESTABLISHED,
+	MPTCP_EVENT_CLOSED,
+
+	MPTCP_CMD_ANNOUNCE,
+	MPTCP_CMD_REMOVE,
+	MPTCP_EVENT_ANNOUNCED,
+	MPTCP_EVENT_REMOVED,
+
+	MPTCP_CMD_SUB_CREATE,
+	MPTCP_CMD_SUB_DESTROY,
+	MPTCP_EVENT_SUB_ESTABLISHED,
+	MPTCP_EVENT_SUB_CLOSED,
+
+	MPTCP_CMD_SUB_PRIORITY,
+	MPTCP_EVENT_SUB_PRIORITY,
+
+	MPTCP_CMD_SET_FILTER,
+
+	MPTCP_CMD_EXIST,
+
+	__MPTCP_CMD_AFTER_LAST
+};
+
+#define MPTCP_CMD_MAX (__MPTCP_CMD_AFTER_LAST - 1)
+
+enum {
+	MPTCPF_EVENT_CREATED		= (1 << 1),
+	MPTCPF_EVENT_ESTABLISHED	= (1 << 2),
+	MPTCPF_EVENT_CLOSED		= (1 << 3),
+	MPTCPF_EVENT_ANNOUNCED		= (1 << 4),
+	MPTCPF_EVENT_REMOVED		= (1 << 5),
+	MPTCPF_EVENT_SUB_ESTABLISHED	= (1 << 6),
+	MPTCPF_EVENT_SUB_CLOSED		= (1 << 7),
+	MPTCPF_EVENT_SUB_PRIORITY	= (1 << 8),
+};
+
+#endif /* _LINUX_MPTCP_H */
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@ -18,7 +18,17 @@
 #ifndef _UAPI_LINUX_TCP_H
 #define _UAPI_LINUX_TCP_H

+#ifndef CONFIG_MPTCP
 #include <linux/types.h>
+#endif
+#ifdef CONFIG_MPTCP
+#ifndef __KERNEL__
+#include <sys/socket.h>
+#endif
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/types.h>
+#endif
 #include <asm/byteorder.h>
 #include <linux/socket.h>

@ -130,6 +140,14 @@ enum {
 #define TCP_REPAIR_ON		1
 #define TCP_REPAIR_OFF		0
 #define TCP_REPAIR_OFF_NO_WP	-1	/* Turn off without window probes */
+#ifdef CONFIG_MPTCP
+#define MPTCP_ENABLED		42
+#define MPTCP_SCHEDULER		43
+#define MPTCP_PATH_MANAGER	44
+#define MPTCP_INFO		45
+
+#define MPTCP_INFO_FLAG_SAVE_MASTER	0x01
+#endif

 struct tcp_repair_opt {
 	__u32	opt_code;
@ -268,6 +286,55 @@ enum {
 	TCP_NLA_REORD_SEEN,	/* reordering events seen */
 };

+#ifdef CONFIG_MPTCP
+struct mptcp_meta_info {
+	__u8	mptcpi_state;
+	__u8	mptcpi_retransmits;
+	__u8	mptcpi_probes;
+	__u8	mptcpi_backoff;
+
+	__u32	mptcpi_rto;
+	__u32	mptcpi_unacked;
+
+	/* Times. */
+	__u32	mptcpi_last_data_sent;
+	__u32	mptcpi_last_data_recv;
+	__u32	mptcpi_last_ack_recv;
+
+	__u32	mptcpi_total_retrans;
+
+	__u64	mptcpi_bytes_acked;    /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
+	__u64	mptcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
+};
+
+struct mptcp_sub_info {
+	union {
+		struct sockaddr src;
+		struct sockaddr_in src_v4;
+		struct sockaddr_in6 src_v6;
+	};
+
+	union {
+		struct sockaddr dst;
+		struct sockaddr_in dst_v4;
+		struct sockaddr_in6 dst_v6;
+	};
+};
+
+struct mptcp_info {
+	__u32	tcp_info_len;	/* Length of each struct tcp_info in subflows pointer */
+	__u32	sub_len;	/* Total length of memory pointed to by subflows pointer */
+	__u32	meta_len;	/* Length of memory pointed to by meta_info */
+	__u32	sub_info_len;	/* Length of each struct mptcp_sub_info in subflow_info pointer */
+	__u32	total_sub_info_len;	/* Total length of memory pointed to by subflow_info */
+
+	struct mptcp_meta_info	*meta_info;
+	struct tcp_info		*initial;
+	struct tcp_info		*subflows;	/* Pointer to array of tcp_info structs */
+	struct mptcp_sub_info	*subflow_info;
+};
+#endif
+
 /* for TCP_MD5SIG socket option */
 #define TCP_MD5SIG_MAXKEYLEN	80

--- a/net/Kconfig
+++ b/net/Kconfig
@ -89,6 +89,9 @@ if INET
 source "net/ipv4/Kconfig"
 source "net/ipv6/Kconfig"
 source "net/netlabel/Kconfig"
+#ifdef CONFIG_MPTCP
+source "net/mptcp/Kconfig"
+#endif

 endif # if INET

--- a/net/Makefile
+++ b/net/Makefile
@ -20,6 +20,7 @@ obj-$(CONFIG_TLS)		+= tls/
 obj-$(CONFIG_XFRM)		+= xfrm/
 obj-$(CONFIG_UNIX_SCM)		+= unix/
 obj-$(CONFIG_NET)		+= ipv6/
+obj-$(CONFIG_MPTCP)		+= mptcp/
 obj-$(CONFIG_BPFILTER)		+= bpfilter/
 obj-$(CONFIG_PACKET)		+= packet/
 obj-$(CONFIG_NET_KEY)		+= key/
--- a/net/core/dev.c
+++ b/net/core/dev.c
@ -7686,7 +7686,11 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)

 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
-			       IFF_AUTOMEDIA)) |
+			       IFF_AUTOMEDIA
+#ifdef CONFIG_MPTCP
+					 | IFF_NOMULTIPATH | IFF_MPBACKUP
+#endif
+)) |
 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
 				    IFF_ALLMULTI));

--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@ -548,7 +548,10 @@ static inline void skb_drop_fraglist(struct sk_buff *skb)
 	skb_drop_list(&skb_shinfo(skb)->frag_list);
 }

-static void skb_clone_fraglist(struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void skb_clone_fraglist(struct sk_buff *skb)
 {
 	struct sk_buff *list;

--- a/net/core/sock.c
+++ b/net/core/sock.c
@ -140,6 +140,11 @@

 #include <trace/events/sock.h>

+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#include <net/inet_common.h>
+#endif
+
 #include <net/tcp.h>
 #include <net/busy_poll.h>

@ -404,11 +409,17 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	unsigned long flags;
 	struct sk_buff_head *list = &sk->sk_receive_queue;

+	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
+		if (sk->sk_rcvbuf < sysctl_rmem_max) {
+			/* increase sk_rcvbuf twice */
+			sk->sk_rcvbuf = min(sk->sk_rcvbuf * 2, (int)sysctl_rmem_max);
+		}
 		if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
 			atomic_inc(&sk->sk_drops);
 			trace_sock_rcvqueue_full(sk, skb);
 			return -ENOMEM;
 		}
+	}

 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
 		atomic_inc(&sk->sk_drops);
@ -1429,6 +1440,23 @@ lenout:
 */
 static inline void sock_lock_init(struct sock *sk)
 {
+#ifdef CONFIG_MPTCP
+	/* Reclassify the lock-class for subflows */
+	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
+		if (mptcp(tcp_sk(sk)) || tcp_sk(sk)->is_master_sk) {
+			sock_lock_init_class_and_name(sk, meta_slock_key_name,
+						      &meta_slock_key,
+						      meta_key_name,
+						      &meta_key);
+
+			/* We don't yet have the mptcp-point.
+			 * Thus we still need inet_sock_destruct
+			 */
+			sk->sk_destruct = inet_sock_destruct;
+			return;
+		}
+#endif
+
 	if (sk->sk_kern_sock)
 		sock_lock_init_class_and_name(
 			sk,
@ -1478,7 +1506,16 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 		if (!sk)
 			return sk;
 		if (want_init_on_alloc(priority))
+#ifdef CONFIG_MPTCP
+		{
+		if (prot->clear_sk)
+			prot->clear_sk(sk, prot->obj_size);
+		else
+#endif
 			sk_prot_clear_nulls(sk, prot->obj_size);
+#ifdef CONFIG_MPTCP
+	}
+#endif
 	} else
 		sk = kmalloc(prot->obj_size, priority);

@ -1708,6 +1745,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		atomic_set(&newsk->sk_zckey, 0);

 		sock_reset_flag(newsk, SOCK_DONE);
+#ifdef CONFIG_MPTCP
+		sock_reset_flag(newsk, SOCK_MPTCP);
+#endif

 		/* sk->sk_memcg will be populated at accept() time */
 		newsk->sk_memcg = NULL;
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@ -681,6 +681,51 @@ config TCP_CONG_BBR
 	bufferbloat, policers, or AQM schemes that do not provide a delay
 	signal. It requires the fq ("Fair Queue") pacing packet scheduler.

+config TCP_CONG_LIA
+	tristate "MPTCP Linked Increase"
+	depends on MPTCP
+	default n
+	---help---
+	MultiPath TCP Linked Increase Congestion Control
+	To enable it, just put 'lia' in tcp_congestion_control
+
+config TCP_CONG_OLIA
+	tristate "MPTCP Opportunistic Linked Increase"
+	depends on MPTCP
+	default n
+	---help---
+	MultiPath TCP Opportunistic Linked Increase Congestion Control
+	To enable it, just put 'olia' in tcp_congestion_control
+
+config TCP_CONG_WVEGAS
+	tristate "MPTCP WVEGAS CONGESTION CONTROL"
+	depends on MPTCP
+	default n
+	---help---
+	wVegas congestion control for MPTCP
+	To enable it, just put 'wvegas' in tcp_congestion_control
+
+config TCP_CONG_BALIA
+	tristate "MPTCP BALIA CONGESTION CONTROL"
+	depends on MPTCP
+	default n
+	---help---
+	Multipath TCP Balanced Linked Adaptation Congestion Control
+	To enable it, just put 'balia' in tcp_congestion_control
+
+config TCP_CONG_MCTCPDESYNC
+	tristate "DESYNCHRONIZED MCTCP CONGESTION CONTROL (EXPERIMENTAL)"
+	depends on MPTCP
+	default n
+	---help---
+	Desynchronized MultiChannel TCP Congestion Control. This is experimental
+	code that only supports single path and must have set mptcp_ndiffports
+	larger than one.
+	To enable it, just put 'mctcpdesync' in tcp_congestion_control
+	For further details see:
+	  http://ieeexplore.ieee.org/abstract/document/6911722/
+	  https://doi.org/10.1016/j.comcom.2015.07.010
+
 choice
 	prompt "Default TCP congestion control"
 	default DEFAULT_CUBIC
@ -718,6 +763,21 @@ choice
 	config DEFAULT_BBR
 		bool "BBR" if TCP_CONG_BBR=y

+	config DEFAULT_LIA
+		bool "Lia" if TCP_CONG_LIA=y
+
+	config DEFAULT_OLIA
+		bool "Olia" if TCP_CONG_OLIA=y
+
+	config DEFAULT_WVEGAS
+		bool "Wvegas" if TCP_CONG_WVEGAS=y
+
+	config DEFAULT_BALIA
+		bool "Balia" if TCP_CONG_BALIA=y
+
+	config DEFAULT_MCTCPDESYNC
+		bool "Mctcpdesync (EXPERIMENTAL)" if TCP_CONG_MCTCPDESYNC=y
+
 	config DEFAULT_RENO
 		bool "Reno"
 endchoice
@ -738,6 +798,10 @@ config DEFAULT_TCP_CONG
 	default "vegas" if DEFAULT_VEGAS
 	default "westwood" if DEFAULT_WESTWOOD
 	default "veno" if DEFAULT_VENO
+	default "lia" if DEFAULT_LIA
+	default "olia" if DEFAULT_OLIA
+	default "wvegas" if DEFAULT_WVEGAS
+	default "balia" if DEFAULT_BALIA
 	default "reno" if DEFAULT_RENO
 	default "dctcp" if DEFAULT_DCTCP
 	default "cdg" if DEFAULT_CDG
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@ -105,6 +105,9 @@
 #include <net/ip_fib.h>
 #include <net/inet_connection_sock.h>
 #include <net/tcp.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#endif
 #include <net/udp.h>
 #include <net/udplite.h>
 #include <net/ping.h>
@ -121,6 +124,9 @@
 #include <linux/mroute.h>
 #endif
 #include <net/l3mdev.h>
+#ifdef CONFIG_NET_ANALYTICS
+#include <net/analytics.h>
+#endif

 #include <trace/events/sock.h>

@ -167,6 +173,11 @@ void inet_sock_destruct(struct sock *sk)
 		return;
 	}

+#ifdef CONFIG_MPTCP
+	if (sock_flag(sk, SOCK_MPTCP))
+		mptcp_disable_static_key();
+#endif
+
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 	WARN_ON(sk->sk_wmem_queued);
@ -261,8 +272,12 @@ EXPORT_SYMBOL(inet_listen);
 *	Create an inet socket.
 */

+#ifdef CONFIG_MPTCP
+int inet_create(struct net *net, struct socket *sock, int protocol, int kern)
+#else
 static int inet_create(struct net *net, struct socket *sock, int protocol,
 		       int kern)
+#endif
 {
 	struct sock *sk;
 	struct inet_protosw *answer;
@ -761,6 +776,24 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags,
 	lock_sock(sk2);

 	sock_rps_record_flow(sk2);
+#ifdef CONFIG_MPTCP
+	if (sk2->sk_protocol == IPPROTO_TCP && mptcp(tcp_sk(sk2))) {
+		struct mptcp_tcp_sock *mptcp;
+
+		mptcp_for_each_sub(tcp_sk(sk2)->mpcb, mptcp) {
+			sock_rps_record_flow(mptcp_to_sock(mptcp));
+		}
+
+		if (tcp_sk(sk2)->mpcb->master_sk) {
+			struct sock *sk_it = tcp_sk(sk2)->mpcb->master_sk;
+
+			write_lock_bh(&sk_it->sk_callback_lock);
+			sk_it->sk_wq = newsock->wq;
+			sk_it->sk_socket = newsock;
+			write_unlock_bh(&sk_it->sk_callback_lock);
+		}
+	}
+#endif
 	WARN_ON(!((1 << sk2->sk_state) &
 		  (TCPF_ESTABLISHED | TCPF_SYN_RECV |
 		  TCPF_CLOSE_WAIT | TCPF_CLOSE)));
@ -809,6 +842,9 @@ EXPORT_SYMBOL(inet_getname);
 int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct sock *sk = sock->sk;
+#ifdef CONFIG_NET_ANALYTICS
+	int err;
+#endif

 	sock_rps_record_flow(sk);

@ -817,7 +853,14 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	    inet_autobind(sk))
 		return -EAGAIN;

+#ifdef CONFIG_NET_ANALYTICS
+	err = sk->sk_prot->sendmsg(sk, msg, size);
+	net_usr_tx(sk, err);
+
+	return err;
+#else
 	return sk->sk_prot->sendmsg(sk, msg, size);
+#endif
 }
 EXPORT_SYMBOL(inet_sendmsg);

@ -853,6 +896,11 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 				   flags & ~MSG_DONTWAIT, &addr_len);
 	if (err >= 0)
 		msg->msg_namelen = addr_len;
+
+#ifdef CONFIG_NET_ANALYTICS
+	net_usr_rx(sk, err);
+#endif
+
 	return err;
 }
 EXPORT_SYMBOL(inet_recvmsg);
@ -1967,6 +2015,10 @@ static int __init inet_init(void)
 	 */

 	ip_init();
+#ifdef CONFIG_MPTCP
+	/* We must initialize MPTCP before TCP. */
+	mptcp_init();
+#endif

 	/* Initialise per-cpu ipv4 mibs */
 	if (init_ipv4_mibs())
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@ -23,6 +23,9 @@
 #include <net/route.h>
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#endif
 #include <net/tcp.h>
 #include <net/sock_reuseport.h>
 #include <net/addrconf.h>
@ -735,9 +738,18 @@ static void reqsk_timer_handler(struct timer_list *t)
 	int max_retries, thresh;
 	u8 defer_accept;

+#ifdef CONFIG_MPTCP
+	if (!is_meta_sk(sk_listener) && inet_sk_state_load(sk_listener) != TCP_LISTEN)
+#else
 	if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
+#endif
 		goto drop;

+#ifdef CONFIG_MPTCP
+	if (is_meta_sk(sk_listener) && !mptcp_can_new_subflow(sk_listener))
+		goto drop;
+#endif
+
 	max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
 	thresh = max_retries;
 	/* Normally all the openreqs are young and become mature
@ -1028,6 +1040,16 @@ void inet_csk_listen_stop(struct sock *sk)
 	 */
 	while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
 		struct sock *child = req->sk;
+#ifdef CONFIG_MPTCP
+		bool mutex_taken = false;
+		struct mptcp_cb *mpcb = tcp_sk(child)->mpcb;
+
+		if (is_meta_sk(child)) {
+			WARN_ON(refcount_inc_not_zero(&mpcb->mpcb_refcnt) == 0);
+			mutex_lock(&mpcb->mpcb_mutex);
+			mutex_taken = true;
+		}
+#endif

 		local_bh_disable();
 		bh_lock_sock(child);
@ -1038,6 +1060,12 @@ void inet_csk_listen_stop(struct sock *sk)
 		reqsk_put(req);
 		bh_unlock_sock(child);
 		local_bh_enable();
+#ifdef CONFIG_MPTCP
+		if (mutex_taken) {
+			mutex_unlock(&mpcb->mpcb_mutex);
+			mptcp_mpcb_put(mpcb);
+		}
+#endif
 		sock_put(child);

 		cond_resched();
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@ -43,6 +43,9 @@
 #include <net/transp_v6.h>
 #endif
 #include <net/ip_fib.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#endif

 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
@ -343,6 +346,10 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 		return -EINVAL;

 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+#ifdef CONFIG_MPTCP
+	if (on && !new_ra)
+		return -ENOMEM;
+#endif

 	mutex_lock(&net->ipv4.ra_mutex);
 	for (rap = &net->ipv4.ra_chain;
@ -655,7 +662,11 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			break;
 		old = rcu_dereference_protected(inet->inet_opt,
 						lockdep_sock_is_held(sk));
+#ifdef CONFIG_MPTCP
+		if (inet->is_icsk && !is_meta_sk(sk)) {
+#else
 		if (inet->is_icsk) {
+#endif
 			struct inet_connection_sock *icsk = inet_csk(sk);
 #if IS_ENABLED(CONFIG_IPV6)
 			if (sk->sk_family == PF_INET ||
@ -749,6 +760,23 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			inet->tos = val;
 			sk->sk_priority = rt_tos2priority(val);
 			sk_dst_reset(sk);
+#ifdef CONFIG_MPTCP
+
+			/* Update TOS on mptcp subflow */
+			if (is_meta_sk(sk)) {
+				struct mptcp_tcp_sock *mptcp;
+
+				mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) {
+					struct sock *sk_it = mptcp_to_sock(mptcp);
+
+					if (inet_sk(sk_it)->tos != inet_sk(sk)->tos) {
+						inet_sk(sk_it)->tos = inet_sk(sk)->tos;
+						sk_it->sk_priority = sk->sk_priority;
+						sk_dst_reset(sk_it);
+					}
+				}
+			}
+#endif
 		}
 		break;
 	case IP_TTL:
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@ -16,6 +16,10 @@
 #include <linux/siphash.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#include <net/mptcp_v4.h>
+#endif
 #include <net/secure_seq.h>
 #include <net/tcp.h>
 #include <net/route.h>
@ -179,7 +183,12 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
 }
 EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);

+#ifdef CONFIG_MPTCP
+__u32 cookie_v4_init_sequence(struct request_sock *req, const struct sock *sk,
+			      const struct sk_buff *skb, __u16 *mssp)
+#else
 __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp)
+#endif
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	const struct tcphdr *th = tcp_hdr(skb);
@ -209,9 +218,27 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sock *child;
 	bool own_req;
+#ifdef CONFIG_MPTCP
+	int ret;
+#endif

 	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
 						 NULL, &own_req);
+
+#ifdef CONFIG_MPTCP
+	if (!child)
+		goto listen_overflow;
+
+	ret = mptcp_check_req_master(sk, child, req, skb, 0, tsoff);
+	if (ret < 0)
+		return NULL;
+
+	if (!ret)
+		return tcp_sk(child)->mpcb->master_sk;
+
+listen_overflow:
+#endif
+
 	if (child) {
 		refcount_set(&req->rsk_refcnt, 1);
 		tcp_sk(child)->tsoffset = tsoff;
@ -289,6 +316,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
 	struct tcp_options_received tcp_opt;
+#ifdef CONFIG_MPTCP
+	struct mptcp_options_received mopt;
+#endif
 	struct inet_request_sock *ireq;
 	struct tcp_request_sock *treq;
 	struct tcp_sock *tp = tcp_sk(sk);
@ -318,7 +348,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)

 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
+#ifdef CONFIG_MPTCP
+	mptcp_init_mp_opt(&mopt);
+	tcp_parse_options(sock_net(sk), skb, &tcp_opt, &mopt, 0, NULL, NULL);
+#else
 	tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+#endif

 	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
 		tsoff = secure_tcp_ts_off(sock_net(sk),
@ -331,6 +366,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 		goto out;

 	ret = NULL;
+#ifdef CONFIG_MPTCP
+	if (mopt.saw_mpc)
+		req = inet_reqsk_alloc(&mptcp_request_sock_ops, sk, false); /* for safety */
+	else
+#endif
 		req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
 	if (!req)
 		goto out;
@ -352,6 +392,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
+#ifdef CONFIG_MPTCP
+	ireq->mptcp_rqsk	= 0;
+	ireq->saw_mpc		= 0;
+#endif
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->snt_synack	= 0;
 	treq->tfo_listener	= false;
@ -360,6 +404,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)

 	ireq->ir_iif = inet_request_bound_dev_if(sk, skb);

+#ifdef CONFIG_MPTCP
+	if (mopt.saw_mpc)
+		mptcp_cookies_reqsk_init(req, &mopt, skb);
+#endif
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
 	 */
@ -398,10 +446,18 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	    (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
 		req->rsk_window_clamp = full_space;

+#ifdef CONFIG_MPTCP
+	tp->ops->select_initial_window(sk, full_space, req->mss,
+				       &req->rsk_rcv_wnd, &req->rsk_window_clamp,
+				       ireq->wscale_ok, &rcv_wscale,
+				       dst_metric(&rt->dst, RTAX_INITRWND));
+
+#else
 	tcp_select_initial_window(sk, full_space, req->mss,
 				  &req->rsk_rcv_wnd, &req->rsk_window_clamp,
 				  ireq->wscale_ok, &rcv_wscale,
 				  dst_metric(&rt->dst, RTAX_INITRWND));
+#endif

 	ireq->rcv_wscale  = rcv_wscale;
 	ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@ -274,6 +274,9 @@

 #include <net/icmp.h>
 #include <net/inet_common.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#endif
 #include <net/tcp.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
@ -404,6 +407,30 @@ static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
 	return rate64;
 }

+#ifdef CONFIG_MPTCP
+	static int select_size(const struct sock *sk, bool first_skb, bool zc);
+#endif
+
+#ifdef CONFIG_MPTCP
+const struct tcp_sock_ops tcp_specific = {
+	.__select_window		= __tcp_select_window,
+	.select_window			= tcp_select_window,
+	.select_initial_window		= tcp_select_initial_window,
+	.select_size			= select_size,
+	.init_buffer_space		= tcp_init_buffer_space,
+	.set_rto			= tcp_set_rto,
+	.should_expand_sndbuf		= tcp_should_expand_sndbuf,
+	.send_fin			= tcp_send_fin,
+	.write_xmit			= tcp_write_xmit,
+	.send_active_reset		= tcp_send_active_reset,
+	.write_wakeup			= tcp_write_wakeup,
+	.retransmit_timer		= tcp_retransmit_timer,
+	.time_wait			= tcp_time_wait,
+	.cleanup_rbuf			= tcp_cleanup_rbuf,
+	.cwnd_validate			= tcp_cwnd_validate,
+};
+#endif
+
 /* Address-family independent initialization for a tcp_sock.
 *
 * NOTE: A lot of things set to zero explicitly by call to
@ -457,6 +484,12 @@ void tcp_init_sock(struct sock *sk)
 	sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];

+#ifdef CONFIG_MPTCP
+	tp->ops = &tcp_specific;
+
+	/* Initialize MPTCP-specific stuff and function-pointers */
+	mptcp_init_tcp_sock(sk);
+#endif
 	sk_sockets_allocated_inc(sk);
 	sk->sk_route_forced_caps = NETIF_F_GSO;
 }
@ -471,7 +504,11 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
 	tcp_init_metrics(sk);
 	tcp_call_bpf(sk, bpf_op, 0, NULL);
 	tcp_init_congestion_control(sk);
+#ifdef CONFIG_MPTCP
+	tcp_sk(sk)->ops->init_buffer_space(sk);
+#else
 	tcp_init_buffer_space(sk);
+#endif
 }

 static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
@ -811,6 +848,16 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,

 	lock_sock(sk);

+#ifdef CONFIG_MPTCP
+	if (mptcp(tcp_sk(sk))) {
+		struct mptcp_tcp_sock *mptcp;
+
+		mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) {
+			sock_rps_record_flow(mptcp_to_sock(mptcp));
+		}
+	}
+#endif
+
 	timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
 	while (tss.len) {
 		ret = __tcp_splice_read(sk, &tss);
@ -914,7 +961,10 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
 	return NULL;
 }

-static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
+#ifndef CONFIG_MPTCP
+static
+#endif
+unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 				int large_allowed)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@ -943,8 +993,17 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
 {
 	int mss_now;

+#ifdef CONFIG_MPTCP
+	if (mptcp(tcp_sk(sk))) {
+		mss_now = mptcp_current_mss(sk);
+		*size_goal = mptcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
+	} else {
+#endif
 		mss_now = tcp_current_mss(sk);
 		*size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
+#ifdef CONFIG_MPTCP
+	}
+#endif

 	return mss_now;
 }
@ -979,12 +1038,39 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 	 * is fully established.
 	 */
 	if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
+#ifdef CONFIG_MPTCP
+	    !tcp_passive_fastopen(mptcp(tp) && tp->mpcb->master_sk ?
+				  tp->mpcb->master_sk : sk)) {
+#else
 	    !tcp_passive_fastopen(sk)) {
+#endif
 		err = sk_stream_wait_connect(sk, &timeo);
 		if (err != 0)
 			goto out_err;
 	}

+#ifdef CONFIG_MPTCP
+	if (mptcp(tp)) {
+		struct mptcp_tcp_sock *mptcp;
+
+		/* We must check this with socket-lock hold because we iterate
+		 * over the subflows.
+		 */
+		if (!mptcp_can_sendpage(sk)) {
+			ssize_t ret;
+
+			release_sock(sk);
+			ret = sock_no_sendpage(sk->sk_socket, page, offset,
+					       size, flags);
+			lock_sock(sk);
+			return ret;
+		}
+
+		mptcp_for_each_sub(tp->mpcb, mptcp) {
+			sock_rps_record_flow(mptcp_to_sock(mptcp));
+		}
+	}
+#endif
 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);

 	mss_now = tcp_send_mss(sk, &size_goal, flags);
@ -1103,7 +1189,12 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
 			size_t size, int flags)
 {
+	/* If MPTCP is enabled, we check it later after establishment */
+#ifdef CONFIG_MPTCP
+	if (!mptcp(tcp_sk(sk)) && !(sk->sk_route_caps & NETIF_F_SG))
+#else
 	if (!(sk->sk_route_caps & NETIF_F_SG))
+#endif
 		return sock_no_sendpage_locked(sk, page, offset, size, flags);

 	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
@ -1135,14 +1226,21 @@ EXPORT_SYMBOL(tcp_sendpage);
 * This also speeds up tso_fragment(), since it wont fallback
 * to tcp_fragment().
 */
-static int linear_payload_sz(bool first_skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+int linear_payload_sz(bool first_skb)
 {
 	if (first_skb)
 		return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
 	return 0;
 }

-static int select_size(bool first_skb, bool zc)
+#ifdef CONFIG_MPTCP
+static int select_size(const struct sock *sk, bool first_skb, bool zc)
+#else
+int select_size(bool first_skb, bool zc)
+#endif
 {
 	if (zc)
 		return 0;
@ -1253,12 +1351,27 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 	 * is fully established.
 	 */
 	if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
+#ifdef CONFIG_MPTCP
+	    !tcp_passive_fastopen(mptcp(tp) && tp->mpcb->master_sk ?
+				  tp->mpcb->master_sk : sk)) {
+#else
 	    !tcp_passive_fastopen(sk)) {
+#endif
 		err = sk_stream_wait_connect(sk, &timeo);
 		if (err != 0)
 			goto do_error;
 	}

+#ifdef CONFIG_MPTCP
+	if (mptcp(tp)) {
+		struct mptcp_tcp_sock *mptcp;
+
+		mptcp_for_each_sub(tp->mpcb, mptcp) {
+			sock_rps_record_flow(mptcp_to_sock(mptcp));
+		}
+	}
+#endif
+
 	if (unlikely(tp->repair)) {
 		if (tp->repair_queue == TCP_RECV_QUEUE) {
 			copied = tcp_send_rcvq(sk, msg, size);
@ -1314,7 +1427,11 @@ new_segment:
 				goto restart;
 			}
 			first_skb = tcp_rtx_and_write_queues_empty(sk);
+#ifdef CONFIG_MPTCP
+			linear = tp->ops->select_size(sk, first_skb, zc);
+#else
 			linear = select_size(first_skb, zc);
+#endif
 			skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
 						  first_skb);
 			if (!skb)
@ -1552,7 +1669,10 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
 * calculation of whether or not we must ACK for the sake of
 * a window update.
 */
-static void tcp_cleanup_rbuf(struct sock *sk, int copied)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_cleanup_rbuf(struct sock *sk, int copied)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	bool time_to_ack = false;
@ -1598,7 +1718,11 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)

 		/* Optimize, __tcp_select_window() is not cheap. */
 		if (2*rcv_window_now <= tp->window_clamp) {
+#ifdef CONFIG_MPTCP
+			__u32 new_window = tp->ops->__select_window(sk);
+#else
 			__u32 new_window = __tcp_select_window(sk);
+#endif

 			/* Send ACK now, if this read freed lots of space
 			 * in our buffer. Certainly, new_window is new window.
@ -1716,7 +1840,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 	/* Clean up data we have read: This will do ACK frames. */
 	if (copied > 0) {
 		tcp_recv_skb(sk, seq, &offset);
+#ifdef CONFIG_MPTCP
+		tp->ops->cleanup_rbuf(sk, copied);
+#else
 		tcp_cleanup_rbuf(sk, copied);
+#endif
 	}
 	return copied;
 }
@ -1974,6 +2102,16 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,

 	lock_sock(sk);

+#ifdef CONFIG_MPTCP
+	if (mptcp(tp)) {
+		struct mptcp_tcp_sock *mptcp;
+
+		mptcp_for_each_sub(tp->mpcb, mptcp) {
+			sock_rps_record_flow(mptcp_to_sock(mptcp));
+		}
+	}
+#endif
+
 	err = -ENOTCONN;
 	if (sk->sk_state == TCP_LISTEN)
 		goto out;
@ -2092,7 +2230,11 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 			}
 		}

+#ifdef CONFIG_MPTCP
+		tp->ops->cleanup_rbuf(sk, copied);
+#else
 		tcp_cleanup_rbuf(sk, copied);
+#endif

 		if (copied >= target) {
 			/* Do not sleep, just process backlog. */
@ -2185,7 +2327,11 @@ skip_copy:
 	 */

 	/* Clean up data we have read: This will do ACK frames. */
+#ifdef CONFIG_MPTCP
+	tp->ops->cleanup_rbuf(sk, copied);
+#else
 	tcp_cleanup_rbuf(sk, copied);
+#endif

 	release_sock(sk);

@ -2297,7 +2443,10 @@ static const unsigned char new_state[16] = {
  [TCP_NEW_SYN_RECV]	= TCP_CLOSE,	/* should not happen ! */
 };

-static int tcp_close_state(struct sock *sk)
+#ifndef CONFIG_MPTCP
+static
+#endif
+int tcp_close_state(struct sock *sk)
 {
 	int next = (int)new_state[sk->sk_state];
 	int ns = next & TCP_STATE_MASK;
@ -2327,7 +2476,11 @@ void tcp_shutdown(struct sock *sk, int how)
 	     TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
 		/* Clear out any half completed packets.  FIN if needed. */
 		if (tcp_close_state(sk))
+#ifdef CONFIG_MPTCP
+			tcp_sk(sk)->ops->send_fin(sk);
+#else
 			tcp_send_fin(sk);
+#endif
 	}
 }
 EXPORT_SYMBOL(tcp_shutdown);
@ -2351,6 +2504,18 @@ void tcp_close(struct sock *sk, long timeout)
 	struct sk_buff *skb;
 	int data_was_unread = 0;
 	int state;
+#ifdef CONFIG_MPTCP
+	if (is_meta_sk(sk)) {
+		/* TODO: Currently forcing timeout to 0 because
+		 * sk_stream_wait_close will complain during lockdep because
+		 * of the mpcb_mutex (circular lock dependency through
+		 * inet_csk_listen_stop()).
+		 * We should find a way to get rid of the mpcb_mutex.
+		 */
+		mptcp_close(sk, 0);
+		return;
+	}
+#endif

 	lock_sock(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
@ -2396,7 +2561,11 @@ void tcp_close(struct sock *sk, long timeout)
 		/* Unread data was tossed, zap the connection. */
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
 		tcp_set_state(sk, TCP_CLOSE);
+#ifdef CONFIG_MPTCP
+		tcp_sk(sk)->ops->send_active_reset(sk, sk->sk_allocation);
+#else
 		tcp_send_active_reset(sk, sk->sk_allocation);
+#endif
 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
 		/* Check zero linger _after_ checking for unread data. */
 		sk->sk_prot->disconnect(sk, 0);
@ -2470,7 +2639,11 @@ adjudge_to_death:
 		struct tcp_sock *tp = tcp_sk(sk);
 		if (tp->linger2 < 0) {
 			tcp_set_state(sk, TCP_CLOSE);
+#ifdef CONFIG_MPTCP
+			tp->ops->send_active_reset(sk, GFP_ATOMIC);
+#else
 			tcp_send_active_reset(sk, GFP_ATOMIC);
+#endif
 			__NET_INC_STATS(sock_net(sk),
 					LINUX_MIB_TCPABORTONLINGER);
 		} else {
@ -2480,7 +2653,12 @@ adjudge_to_death:
 				inet_csk_reset_keepalive_timer(sk,
 						tmo - TCP_TIMEWAIT_LEN);
 			} else {
+#ifdef CONFIG_MPTCP
+				tcp_sk(sk)->ops->time_wait(sk, TCP_FIN_WAIT2,
+							   tmo);
+#else
 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+#endif
 				goto out;
 			}
 		}
@ -2489,7 +2667,11 @@ adjudge_to_death:
 		sk_mem_reclaim(sk);
 		if (tcp_check_oom(sk, 0)) {
 			tcp_set_state(sk, TCP_CLOSE);
+#ifdef CONFIG_MPTCP
+			tcp_sk(sk)->ops->send_active_reset(sk, GFP_ATOMIC);
+#else
 			tcp_send_active_reset(sk, GFP_ATOMIC);
+#endif
 			__NET_INC_STATS(sock_net(sk),
 					LINUX_MIB_TCPABORTONMEMORY);
 		} else if (!check_net(sock_net(sk))) {
@ -2518,6 +2700,7 @@ out:
 }
 EXPORT_SYMBOL(tcp_close);

+#ifndef CONFIG_MPTCP
 /* These states need RST on ABORT according to RFC793 */

 static inline bool tcp_need_reset(int state)
@ -2526,7 +2709,7 @@ static inline bool tcp_need_reset(int state)
 	       (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
 		TCPF_FIN_WAIT2 | TCPF_SYN_RECV | TCPF_SYN_SENT);
 }
-
+#endif
 static void tcp_rtx_queue_purge(struct sock *sk)
 {
 	struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
@ -2547,7 +2730,11 @@ static void tcp_rtx_queue_purge(struct sock *sk)
 void tcp_write_queue_purge(struct sock *sk)
 {
 	struct sk_buff *skb;
-
+#ifdef CONFIG_MPTCP
+	if (mptcp(tcp_sk(sk)) && !is_meta_sk(sk) &&
+	    !tcp_rtx_and_write_queues_empty(sk))
+		mptcp_reinject_data(sk, 0);
+#endif
 	tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
 	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		tcp_skb_tsorted_anchor_cleanup(skb);
@ -2586,7 +2773,11 @@ int tcp_disconnect(struct sock *sk, int flags)
 		/* The last check adjusts for discrepancy of Linux wrt. RFC
 		 * states
 		 */
+#ifdef CONFIG_MPTCP
+		tp->ops->send_active_reset(sk, gfp_any());
+#else
 		tcp_send_active_reset(sk, gfp_any());
+#endif
 		sk->sk_err = ECONNRESET;
 	}

@ -2603,6 +2794,15 @@ int tcp_disconnect(struct sock *sk, int flags)
 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 		inet_reset_saddr(sk);

+#ifdef CONFIG_MPTCP
+	if (is_meta_sk(sk)) {
+		mptcp_disconnect(sk);
+	} else {
+		if (tp->inside_tk_table)
+			mptcp_hash_remove_bh(tp);
+	}
+#endif
+
 	sk->sk_shutdown = 0;
 	sock_reset_flag(sk, SOCK_DONE);
 	tp->srtt_us = 0;
@ -2669,8 +2869,13 @@ EXPORT_SYMBOL(tcp_disconnect);

 static inline bool tcp_can_repair_sock(const struct sock *sk)
 {
+#ifdef CONFIG_MPTCP
+	return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
+		(sk->sk_state != TCP_LISTEN) && !sock_flag(sk, SOCK_MPTCP);
+#else
 	return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
 		(sk->sk_state != TCP_LISTEN);
+#endif
 }

 static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
@ -2816,6 +3021,61 @@ static int do_tcp_setsockopt(struct sock *sk, int level,

 		return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
 	}
+#ifdef CONFIG_MPTCP
+	case MPTCP_SCHEDULER: {
+		char name[MPTCP_SCHED_NAME_MAX];
+
+		if (optlen < 1)
+			return -EINVAL;
+
+		/* Cannot be used if MPTCP is not used or we already have
+		 * established an MPTCP-connection.
+		 */
+		if (mptcp_init_failed || !sysctl_mptcp_enabled ||
+		    sk->sk_state != TCP_CLOSE)
+			return -EPERM;
+
+		val = strncpy_from_user(name, optval,
+					min_t(long, MPTCP_SCHED_NAME_MAX - 1,
+					      optlen));
+
+		if (val < 0)
+			return -EFAULT;
+		name[val] = 0;
+
+		lock_sock(sk);
+		err = mptcp_set_scheduler(sk, name);
+		release_sock(sk);
+		return err;
+	}
+
+	case MPTCP_PATH_MANAGER: {
+		char name[MPTCP_PM_NAME_MAX];
+
+		if (optlen < 1)
+			return -EINVAL;
+
+		/* Cannot be used if MPTCP is not used or we already have
+		 * established an MPTCP-connection.
+		 */
+		if (mptcp_init_failed || !sysctl_mptcp_enabled ||
+		    sk->sk_state != TCP_CLOSE)
+			return -EPERM;
+
+		val = strncpy_from_user(name, optval,
+					min_t(long, MPTCP_PM_NAME_MAX - 1,
+					      optlen));
+
+		if (val < 0)
+			return -EFAULT;
+		name[val] = 0;
+
+		lock_sock(sk);
+		err = mptcp_set_path_manager(sk, name);
+		release_sock(sk);
+		return err;
+	}
+#endif
 	default:
 		/* fallthru */
 		break;
@ -3005,6 +3265,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;

 	case TCP_DEFER_ACCEPT:
+#ifdef CONFIG_MPTCP
+		/* An established MPTCP-connection (mptcp(tp) only returns true
+		 * if the socket is established) should not use DEFER on new
+		 * subflows.
+		 */
+		if (mptcp(tp))
+			break;
+#endif
 		/* Translate value in seconds to number of retransmits */
 		icsk->icsk_accept_queue.rskq_defer_accept =
 			secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
@ -3032,7 +3300,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			    (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
 			    inet_csk_ack_scheduled(sk)) {
 				icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
+#ifdef CONFIG_MPTCP
+				tp->ops->cleanup_rbuf(sk, 1);
+#else
 				tcp_cleanup_rbuf(sk, 1);
+#endif
 				if (!(val & 1))
 					icsk->icsk_ack.pingpong = 1;
 			}
@ -3099,6 +3371,32 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		tp->notsent_lowat = val;
 		sk->sk_write_space(sk);
 		break;
+#ifdef CONFIG_MPTCP
+	case MPTCP_ENABLED:
+		if (mptcp_init_failed || !sysctl_mptcp_enabled ||
+		    sk->sk_state != TCP_CLOSE
+#ifdef CONFIG_TCP_MD5SIG
+		    || tp->md5sig_info
+#endif
+								) {
+			err = -EPERM;
+			break;
+		}
+
+		if (val)
+			mptcp_enable_sock(sk);
+		else
+			mptcp_disable_sock(sk);
+		break;
+	case MPTCP_INFO:
+		if (mptcp_init_failed || !sysctl_mptcp_enabled) {
+			err = -EPERM;
+			break;
+		}
+
+		tp->record_master_info = !!(val & MPTCP_INFO_FLAG_SAVE_MASTER);
+		break;
+#endif
 	case TCP_INQ:
 		if (val > 1 || val < 0)
 			err = -EINVAL;
@ -3158,7 +3456,11 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
 }

 /* Return information about state of tcp endpoint in API format. */
+#ifdef CONFIG_MPTCP
+void tcp_get_info(struct sock *sk, struct tcp_info *info, bool no_lock)
+#else
 void tcp_get_info(struct sock *sk, struct tcp_info *info)
+#endif
 {
 	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@ -3195,7 +3497,12 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 		return;
 	}

+#ifdef CONFIG_MPTCP
+	if (!no_lock)
 		slow = lock_sock_fast(sk);
+#else
+	slow = lock_sock_fast(sk);
+#endif

 	info->tcpi_ca_state = icsk->icsk_ca_state;
 	info->tcpi_retransmits = icsk->icsk_retransmits;
@ -3269,7 +3576,12 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_bytes_retrans = tp->bytes_retrans;
 	info->tcpi_dsack_dups = tp->dsack_dups;
 	info->tcpi_reord_seen = tp->reord_seen;
+#ifdef CONFIG_MPTCP
+	if (!no_lock)
 		unlock_sock_fast(sk, slow);
+#else
+	unlock_sock_fast(sk, slow);
+#endif
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);

@ -3414,7 +3726,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (get_user(len, optlen))
 			return -EFAULT;

+#ifdef CONFIG_MPTCP
+		tcp_get_info(sk, &info, false);
+#else
 		tcp_get_info(sk, &info);
+#endif

 		len = min_t(unsigned int, len, sizeof(info));
 		if (put_user(len, optlen))
@ -3605,6 +3921,87 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		}
 		return 0;
 	}
+#ifdef CONFIG_MPTCP
+	case MPTCP_SCHEDULER:
+		if (get_user(len, optlen))
+			return -EFAULT;
+		len = min_t(unsigned int, len, MPTCP_SCHED_NAME_MAX);
+		if (put_user(len, optlen))
+			return -EFAULT;
+
+		lock_sock(sk);
+		if (mptcp(tcp_sk(sk))) {
+			struct mptcp_cb *mpcb = tcp_sk(mptcp_meta_sk(sk))->mpcb;
+
+			if (copy_to_user(optval, mpcb->sched_ops->name, len)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+		} else {
+			if (copy_to_user(optval, tcp_sk(sk)->mptcp_sched_name,
+					 len)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+		}
+		release_sock(sk);
+		return 0;
+
+	case MPTCP_PATH_MANAGER:
+		if (get_user(len, optlen))
+			return -EFAULT;
+		len = min_t(unsigned int, len, MPTCP_PM_NAME_MAX);
+		if (put_user(len, optlen))
+			return -EFAULT;
+
+		lock_sock(sk);
+		if (mptcp(tcp_sk(sk))) {
+			struct mptcp_cb *mpcb = tcp_sk(mptcp_meta_sk(sk))->mpcb;
+
+			if (copy_to_user(optval, mpcb->pm_ops->name, len)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+		} else {
+			if (copy_to_user(optval, tcp_sk(sk)->mptcp_pm_name,
+					 len)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+		}
+		release_sock(sk);
+		return 0;
+
+	case MPTCP_ENABLED:
+		if (sk->sk_state != TCP_SYN_SENT)
+			val = mptcp(tp) ? 1 : 0;
+		else
+			val = sock_flag(sk, SOCK_MPTCP) ? 1 : 0;
+		break;
+	case MPTCP_INFO:
+	{
+		int ret;
+
+		if (!mptcp(tp))
+			return -EINVAL;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		len = min_t(unsigned int, len, sizeof(struct mptcp_info));
+
+		lock_sock(sk);
+		ret = mptcp_get_info(sk, optval, len);
+		release_sock(sk);
+
+		if (ret)
+			return ret;
+
+		if (put_user(len, optlen))
+			return -EFAULT;
+		return 0;
+	}
+#endif
 #ifdef CONFIG_MMU
 	case TCP_ZEROCOPY_RECEIVE: {
 		struct tcp_zerocopy_receive zc;
@ -3807,6 +4204,9 @@ void tcp_done(struct sock *sk)
 	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
 		TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);

+#ifdef CONFIG_MPTCP
+	//WARN_ON(sk->sk_state == TCP_CLOSE);
+#endif
 	tcp_set_state(sk, TCP_CLOSE);
 	tcp_clear_xmit_timers(sk);
 	if (req)
@ -3823,6 +4223,9 @@ EXPORT_SYMBOL_GPL(tcp_done);

 int tcp_abort(struct sock *sk, int err)
 {
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk = mptcp(tcp_sk(sk)) ? mptcp_meta_sk(sk) : sk;
+#endif
 	if (!sk_fullsock(sk)) {
 		if (sk->sk_state == TCP_NEW_SYN_RECV) {
 			struct request_sock *req = inet_reqsk(sk);
@ -3836,7 +4239,11 @@ int tcp_abort(struct sock *sk, int err)
 	}

 	/* Don't race with userspace socket closes such as tcp_close. */
+#ifdef CONFIG_MPTCP
+	lock_sock(meta_sk);
+#else
 	lock_sock(sk);
+#endif

 	if (sk->sk_state == TCP_LISTEN) {
 		tcp_set_state(sk, TCP_CLOSE);
@ -3845,22 +4252,39 @@ int tcp_abort(struct sock *sk, int err)

 	/* Don't race with BH socket closes such as inet_csk_listen_stop. */
 	local_bh_disable();
+#ifdef CONFIG_MPTCP
+	bh_lock_sock(meta_sk);
+#else
 	bh_lock_sock(sk);
+#endif

 	if (!sock_flag(sk, SOCK_DEAD)) {
 		sk->sk_err = err;
 		/* This barrier is coupled with smp_rmb() in tcp_poll() */
 		smp_wmb();
 		sk->sk_error_report(sk);
+#ifdef CONFIG_MPTCP		
+		if (tcp_need_reset(sk->sk_state))
+			tcp_sk(sk)->ops->send_active_reset(sk, GFP_ATOMIC);
+#else
 		if (tcp_need_reset(sk->sk_state))
 			tcp_send_active_reset(sk, GFP_ATOMIC);
+#endif
 		tcp_done(sk);
 	}

+#ifdef CONFIG_MPTCP
+	bh_unlock_sock(meta_sk);
+#else
 	bh_unlock_sock(sk);
+#endif
 	local_bh_enable();
 	tcp_write_queue_purge(sk);
+#ifdef CONFIG_MPTCP
+	release_sock(meta_sk);
+#else
 	release_sock(sk);
+#endif
 	return 0;
 }
 EXPORT_SYMBOL_GPL(tcp_abort);
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@ -34,8 +34,15 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 					     READ_ONCE(tp->copied_seq), 0);
 		r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una;
 	}
+
+#ifdef CONFIG_MPTCP
+	if (info)
+		tcp_get_info(sk, info, false);
+#else
 	if (info)
 		tcp_get_info(sk, info);
+#endif
+
 }

 #ifdef CONFIG_TCP_MD5SIG
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@ -9,6 +9,9 @@
 #include <linux/rculist.h>
 #include <net/inetpeer.h>
 #include <net/tcp.h>
+#ifdef CONFIG_MPTCP
+	#include <net/mptcp.h>
+#endif

 void tcp_fastopen_init_key_once(struct net *net)
 {
@ -219,6 +222,10 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
 	struct tcp_sock *tp;
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 	struct sock *child;
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk;
+	int ret;
+#endif
 	bool own_req;

 	req->num_retrans = 0;
@ -258,8 +265,10 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,

 	refcount_set(&req->rsk_refcnt, 2);

+#ifndef CONFIG_MPTCP
 	/* Now finish processing the fastopen child socket. */
 	tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
+#endif

 	tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;

@ -267,6 +276,20 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,

 	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
 	tp->rcv_wup = tp->rcv_nxt;
+#ifdef CONFIG_MPTCP
+	meta_sk = child;
+	ret = mptcp_check_req_fastopen(meta_sk, req);
+	if (ret < 0)
+		return NULL;
+
+	if (ret == 0) {
+		child = tcp_sk(meta_sk)->mpcb->master_sk;
+		tp = tcp_sk(child);
+	}
+
+	/* Now finish processing the fastopen child socket. */
+	tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
+#endif
 	/* tcp_conn_request() is sending the SYNACK,
 	 * and queues the child into listener accept queue.
 	 */
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@ -76,12 +76,18 @@
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
 #include <linux/errqueue.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#include <net/mptcp_v4.h>
+#include <net/mptcp_v6.h>
+#endif
 #include <trace/events/tcp.h>
 #include <linux/static_key.h>
 #include <net/busy_poll.h>

 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;

+#ifndef CONFIG_MPTCP
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
 #define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
@ -104,6 +110,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
 #define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
 #define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
+#endif

 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
@ -343,8 +350,16 @@ static void tcp_sndbuf_expand(struct sock *sk)
 	per_mss = roundup_pow_of_two(per_mss) +
 		  SKB_DATA_ALIGN(sizeof(struct sk_buff));

+#ifdef CONFIG_MPTCP
+	if (mptcp(tp)) {
+		nr_segs = mptcp_check_snd_buf(tp);
+	} else {
+#endif
 		nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
 		nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
+#ifdef CONFIG_MPTCP
+	}
+#endif

 	/* Fast Recovery (RFC 5681 3.2) :
 	 * Cubic needs 1.7 factor, rounded to 2 to include
@ -353,8 +368,23 @@ static void tcp_sndbuf_expand(struct sock *sk)
 	sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
 	sndmem *= nr_segs * per_mss;

+	/* MPTCP: after this sndmem is the new contribution of the
+	 * current subflow to the aggregated sndbuf
+	 */
 	if (sk->sk_sndbuf < sndmem)
+#ifdef CONFIG_MPTCP
+	{
+		int old_sndbuf = sk->sk_sndbuf;
+#endif
 		sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
+#ifdef CONFIG_MPTCP
+		/* MPTCP: ok, the subflow sndbuf has grown, reflect
+		 * this in the aggregate buffer.
+		 */
+		if (mptcp(tp) && old_sndbuf != sk->sk_sndbuf)
+			mptcp_update_sndbuf(tp);
+	}
+#endif
 }

 /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@ -403,10 +433,20 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk;
+	struct tcp_sock *meta_tp = tcp_sk(meta_sk);
+#endif
 	int room;

-	room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
+#ifdef CONFIG_MPTCP
+	if (is_meta_sk(sk))
+		return;

+	room = min_t(int, meta_tp->window_clamp, tcp_space(meta_sk)) - meta_tp->rcv_ssthresh;
+#else
+	room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
+#endif
 	/* Check #1 */
 	if (room > 0 && !tcp_under_memory_pressure(sk)) {
 		int incr;
@ -415,13 +455,22 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 		 * will fit to rcvbuf in future.
 		 */
 		if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
+#ifdef CONFIG_MPTCP
+			incr = 2 * meta_tp->advmss;
+		else
+			incr = __tcp_grow_window(meta_sk, skb);
+#else
 			incr = 2 * tp->advmss;
 		else
 			incr = __tcp_grow_window(sk, skb);
-
+#endif
 		if (incr) {
 			incr = max_t(int, incr, 2 * skb->len);
+#ifdef CONFIG_MPTCP
+            meta_tp->rcv_ssthresh += min(room, incr);
+#else
 			tp->rcv_ssthresh += min(room, incr);
+#endif
 			inet_csk(sk)->icsk_ack.quick |= 1;
 		}
 	}
@ -604,7 +653,14 @@ void tcp_rcv_space_adjust(struct sock *sk)

 	tcp_mstamp_refresh(tp);
 	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
+#ifdef CONFIG_MPTCP
+	if (mptcp(tp)) {
+		if (mptcp_check_rtt(tp, time))
+			return;
+	} else if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
+#else
 	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
+#endif
 		return;

 	/* Number of bytes copied to user in last RTT */
@ -823,7 +879,10 @@ static void tcp_update_pacing_rate(struct sock *sk)
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
 * routine referred to above.
 */
-static void tcp_set_rto(struct sock *sk)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_set_rto(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	/* Old crap is replaced with new one. 8)
@ -1395,6 +1454,14 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 	int len;
 	int in_sack;

+#ifdef CONFIG_MPTCP
+	/* For MPTCP we cannot shift skb-data and remove one skb from the
+	 * send-queue, because this will make us loose the DSS-option (which
+	 * is stored in TCP_SKB_CB(skb)->dss) of the skb we are removing.
+	 */
+	if (mptcp(tp))
+		goto fallback;
+#endif
 	/* Normally R but no L won't result in plain S */
 	if (!dup_sack &&
 	    (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
@ -2968,7 +3035,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 	 */
 	tcp_update_rtt_min(sk, ca_rtt_us, flag);
 	tcp_rtt_estimator(sk, seq_rtt_us);
+#ifdef CONFIG_MPTCP
+	tp->ops->set_rto(sk);
+#else
 	tcp_set_rto(sk);
+#endif

 	/* RFC6298: only reset backoff on valid RTT measurement. */
 	inet_csk(sk)->icsk_backoff = 0;
@ -3036,7 +3107,10 @@ static void tcp_set_xmit_timer(struct sock *sk)
 }

 /* If we get here, the whole TSO packet has not been acked. */
-static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 packets_acked;
@ -3162,6 +3236,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 		 */
 		if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
 			flag |= FLAG_DATA_ACKED;
+#ifdef CONFIG_MPTCP
+			if (mptcp(tp) && mptcp_is_data_seq(skb))
+				flag |= MPTCP_FLAG_DATA_ACKED;
+#endif
 		} else {
 			flag |= FLAG_SYN_ACKED;
 			tp->retrans_stamp = 0;
@ -3281,7 +3359,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 	return flag;
 }

-static void tcp_ack_probe(struct sock *sk)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_ack_probe(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sk_buff *head = tcp_send_head(sk);
@ -3354,9 +3435,14 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
 /* Check that window update is acceptable.
 * The function assumes that snd_una<=ack<=snd_next.
 */
+#ifdef CONFIG_MPTCP
+bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
+			   const u32 ack_seq, const u32 nwin)
+#else
 static inline bool tcp_may_update_window(const struct tcp_sock *tp,
 					const u32 ack, const u32 ack_seq,
 					const u32 nwin)
+#endif
 {
 	return	after(ack, tp->snd_una) ||
 		after(ack_seq, tp->snd_wl1) ||
@ -3595,7 +3681,11 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
 }

 /* This routine deals with incoming acks, but not outgoing ones. */
+#ifdef CONFIG_MPTCP
+static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
+#else
 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+#endif
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@ -3717,6 +3807,18 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)

 	tcp_rack_update_reo_wnd(sk, &rs);
 	
+#ifdef CONFIG_MPTCP
+	if (mptcp(tp)) {
+		if (mptcp_fallback_infinite(sk, flag)) {
+			pr_debug("%s resetting flow\n", __func__);
+			mptcp_send_reset(sk);
+			goto invalid_ack;
+		}
+
+		mptcp_clean_rtx_infinite(skb, sk);
+	}
+#endif
+
 	if (tp->tlp_high_seq)
 		tcp_process_tlp_ack(sk, ack, flag);

@ -3817,8 +3919,16 @@ static void smc_parse_options(const struct tcphdr *th,
 */
 void tcp_parse_options(const struct net *net,
 		       const struct sk_buff *skb,
-		       struct tcp_options_received *opt_rx, int estab,
-		       struct tcp_fastopen_cookie *foc)
+		       struct tcp_options_received *opt_rx, 
+#ifdef CONFIG_MPTCP
+		       struct mptcp_options_received *mopt,
+#endif
+				int estab,
+		        struct tcp_fastopen_cookie *foc
+#ifdef CONFIG_MPTCP
+				, struct tcp_sock *tp
+#endif
+				)
 {
 	const unsigned char *ptr;
 	const struct tcphdr *th = tcp_hdr(skb);
@ -3901,6 +4011,11 @@ void tcp_parse_options(const struct net *net,
 				 * checked (see tcp_v{4,6}_do_rcv()).
 				 */
 				break;
+#endif
+#ifdef CONFIG_MPTCP
+			case TCPOPT_MPTCP:
+				mptcp_parse_options(ptr - 2, opsize, mopt, skb, tp);
+				break;
 #endif
 			case TCPOPT_FASTOPEN:
 				tcp_parse_fastopen_option(
@ -3969,7 +4084,12 @@ static bool tcp_fast_parse_options(const struct net *net,
 			return true;
 	}

+#ifdef CONFIG_MPTCP
+	tcp_parse_options(net, skb, &tp->rx_opt,
+			  mptcp(tp) ? &tp->mptcp->rx_opt : NULL, 1, NULL, tp);
+#else
 	tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
+#endif
 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;

@ -4128,6 +4248,13 @@ void tcp_fin(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);

+#ifdef CONFIG_MPTCP
+	if (is_meta_sk(sk)) {
+		mptcp_fin(sk);
+		return;
+	}
+#endif
+
 	inet_csk_schedule_ack(sk);

 	sk->sk_shutdown |= RCV_SHUTDOWN;
@ -4138,6 +4265,11 @@ void tcp_fin(struct sock *sk)
 	case TCP_ESTABLISHED:
 		/* Move to CLOSE_WAIT */
 		tcp_set_state(sk, TCP_CLOSE_WAIT);
+
+#ifdef CONFIG_MPTCP
+		if (mptcp(tp))
+			mptcp_sub_close_passive(sk);
+#endif
 		inet_csk(sk)->icsk_ack.pingpong = 1;
 		break;

@ -4160,9 +4292,22 @@ void tcp_fin(struct sock *sk)
 		tcp_set_state(sk, TCP_CLOSING);
 		break;
 	case TCP_FIN_WAIT2:
+#ifdef CONFIG_MPTCP
+		if (mptcp(tp)) {
+			/* The socket will get closed by mptcp_data_ready.
+			 * We first have to process all data-sequences.
+			 */
+			tp->close_it = 1;
+			break;
+		}
+#endif
 		/* Received a FIN -- send ACK and enter TIME_WAIT. */
 		tcp_send_ack(sk);
+#ifdef CONFIG_MPTCP
+		tp->ops->time_wait(sk, TCP_TIME_WAIT, 0);
+#else
 		tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+#endif
 		break;
 	default:
 		/* Only TCP_LISTEN and TCP_CLOSE are left, in these
@ -4183,6 +4328,11 @@ void tcp_fin(struct sock *sk)

 	if (!sock_flag(sk, SOCK_DEAD)) {
 		sk->sk_state_change(sk);
+#ifdef CONFIG_MPTCP
+		/* Don't wake up MPTCP-subflows */
+		if (mptcp(tp))
+			return;
+#endif

 		/* Do not send POLL_HUP for half duplex close. */
 		if (sk->sk_shutdown == SHUTDOWN_MASK ||
@ -4386,6 +4536,11 @@ static bool tcp_try_coalesce(struct sock *sk,

 	*fragstolen = false;

+#ifdef CONFIG_MPTCP
+	if (mptcp(tcp_sk(sk)) && !is_meta_sk(sk))
+		return false;
+#endif
+
 	/* Its possible this segment overlaps with prior segment in queue */
 	if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
 		return false;
@ -4440,7 +4595,10 @@ static void tcp_drop(struct sock *sk, struct sk_buff *skb)
 /* This one checks to see if we can put data from the
 * out_of_order queue into the receive_queue.
 */
-static void tcp_ofo_queue(struct sock *sk)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_ofo_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 dsack_high = tp->rcv_nxt;
@ -4463,7 +4621,18 @@ static void tcp_ofo_queue(struct sock *sk)
 		p = rb_next(p);
 		rb_erase(&skb->rbnode, &tp->out_of_order_queue);

+#ifdef CONFIG_MPTCP
+/* In case of MPTCP, the segment may be empty if it's a
+		 * non-data DATA_FIN. (see beginning of tcp_data_queue)
+		 *
+		 * But this only holds true for subflows, not for the
+		 * meta-socket.
+		 */
+		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt) &&
+			     (is_meta_sk(sk) || !mptcp(tp) || TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq))) {
+#else
 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
+#endif
 			SOCK_DEBUG(sk, "ofo packet was already received\n");
 			tcp_drop(sk, skb);
 			continue;
@ -4497,6 +4666,10 @@ static int tcp_prune_queue(struct sock *sk);
 static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
 				 unsigned int size)
 {
+#ifdef CONFIG_MPTCP
+	if (mptcp(tcp_sk(sk)))
+		sk = mptcp_meta_sk(sk);
+#endif
 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 	    !sk_rmem_schedule(sk, skb, size)) {

@ -4511,7 +4684,10 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
 	return 0;
 }

-static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct rb_node **p, *parent;
@ -4584,7 +4760,11 @@ coalesce_done:
 			continue;
 		}
 		if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
-			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)
+#ifdef CONFIG_MPTCP
+				&& (is_meta_sk(sk) || !mptcp(tp) || end_seq != seq)
+#endif
+		) {
 				/* All the bits are present. Drop. */
 				NET_INC_STATS(sock_net(sk),
 					      LINUX_MIB_TCPOFOMERGE);
@ -4631,6 +4811,13 @@ merge_right:
 					 end_seq);
 			break;
 		}
+#ifdef CONFIG_MPTCP
+		/* MPTCP allows non-data data-fin to be in the ofo-queue */
+		if (mptcp(tp) && !is_meta_sk(sk) && TCP_SKB_CB(skb1)->seq == TCP_SKB_CB(skb1)->end_seq) {
+			skb = skb1;
+			continue;
+		}
+#endif
 		rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
 		tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
 				 TCP_SKB_CB(skb1)->end_seq);
@ -4642,7 +4829,11 @@ merge_right:
 		tp->ooo_last_skb = skb;

 add_sack:
+#ifdef CONFIG_MPTCP
+	if (tcp_is_sack(tp) && seq != end_seq)
+#else
 	if (tcp_is_sack(tp))
+#endif
 		tcp_sack_new_ofo_skb(sk, seq, end_seq);
 end:
 	if (skb) {
@ -4656,7 +4847,10 @@ end:
 	}
 }

-static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
+#ifndef CONFIG_MPTCP
+static
+#endif
+int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
 			       bool *fragstolen)
 {
 	int eaten;
@ -4733,6 +4927,9 @@ void tcp_data_ready(struct sock *sk)

 	if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
 	    !sock_flag(sk, SOCK_DONE) &&
+#ifdef CONFIG_MPTCP
+	    !mptcp(tp) &&
+#endif
 	    tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
 		return;

@ -4745,7 +4942,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	bool fragstolen;
 	int eaten;

-	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
+	/* If no data is present, but a data_fin is in the options, we still
+	 * have to call mptcp_queue_skb later on.
+	 */
+	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq
+#ifdef CONFIG_MPTCP
+	&& !(mptcp(tp) && mptcp_is_data_fin(skb))
+#endif
+	   ) {
 		__kfree_skb(skb);
 		return;
 	}
@ -4775,7 +4979,11 @@ queue_and_out:
 		}

 		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
-		if (skb->len)
+		if (skb->len
+#ifdef CONFIG_MPTCP
+			|| mptcp_is_data_fin(skb)
+#endif
+)
 			tcp_event_data_recv(sk, skb);
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
 			tcp_fin(sk);
@ -4797,7 +5005,15 @@ queue_and_out:

 		if (eaten > 0)
 			kfree_skb_partial(skb, fragstolen);
-		if (!sock_flag(sk, SOCK_DEAD))
+		if (!sock_flag(sk, SOCK_DEAD)
+#ifdef CONFIG_MPTCP
+		 || mptcp(tp)
+#endif
+		 )
+			/* MPTCP: we always have to call data_ready, because
+			 * we may be about to receive a data-fin, which still
+			 * must get queued.
+			 */
 			tcp_data_ready(sk);
 		return;
 	}
@ -5145,7 +5361,10 @@ static int tcp_prune_queue(struct sock *sk)
 	return -1;
 }

-static bool tcp_should_expand_sndbuf(const struct sock *sk)
+#ifndef CONFIG_MPTCP
+static
+#endif
+bool tcp_should_expand_sndbuf(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);

@ -5180,7 +5399,11 @@ static void tcp_new_space(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);

+#ifdef CONFIG_MPTCP
+	if (tp->ops->should_expand_sndbuf(sk)) {
+#else
 	if (tcp_should_expand_sndbuf(sk)) {
+#endif
 		tcp_sndbuf_expand(sk);
 		tp->snd_cwnd_stamp = tcp_jiffies32;
 	}
@ -5204,11 +5427,26 @@ void tcp_check_space(struct sock *sk)
 		sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
 		/* pairs with tcp_poll() */
 		smp_mb();
-		if (sk->sk_socket &&
-		    test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+if
+#ifdef CONFIG_MPTCP
+		(mptcp(tcp_sk(sk)) ||
+#endif
+		  (sk->sk_socket &&
+		test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+#ifdef CONFIG_MPTCP
+		){
+#else
+		{
+#endif
+
 			tcp_new_space(sk);
+#ifdef CONFIG_MPTCP
+	if (sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+				tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+#else
 			if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
 				tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+#endif
 		}
 	}
 }
@ -5226,6 +5464,10 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned long rtt, delay;
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk;
+	struct tcp_sock *meta_tp = tcp_sk(meta_sk);
+#endif

 	    /* More than one full frame received... */
 	if (((tp->rcv_nxt - tp->rcv_wup) > (inet_csk(sk)->icsk_ack.rcv_mss) *
@ -5235,8 +5477,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	      * If application uses SO_RCVLOWAT, we want send ack now if
 	      * we have not received enough bytes to satisfy the condition.
 	      */
+	    
+#ifdef CONFIG_MPTCP
+	     (meta_tp->rcv_nxt - meta_tp->copied_seq < meta_sk->sk_rcvlowat ||
+		 tp->ops->__select_window(sk) >= tp->rcv_wnd)) ||
+#else
 	     (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
 		 __tcp_select_window(sk) >= tp->rcv_wnd)) ||
+#endif
 	    /* We ACK each frame or... */
 	    tcp_in_quickack_mode(sk) ||
 	    /* Protocol state mandates a one-time immediate ACK */
@ -5371,6 +5619,11 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
 static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+#ifdef CONFIG_MPTCP
+	/* MPTCP urgent data is not yet supported */
+	if (mptcp(tp))
+		return;
+#endif

 	/* Check if we get a new urgent pointer - normally not. */
 	if (th->urg)
@ -5514,9 +5767,18 @@ syn_challenge:
 		goto discard;
 	}

+#ifdef CONFIG_MPTCP
+	/* If valid: post process the received MPTCP options. */
+	if (mptcp(tp) && mptcp_handle_options(sk, th, skb))
+		goto discard;
+#endif
 	return true;

 discard:
+#ifdef CONFIG_MPTCP
+	if (mptcp(tp))
+		mptcp_reset_mopt(tp);
+#endif
 	tcp_drop(sk, skb);
 	return false;
 }
@ -5572,6 +5834,11 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 	 */

 	tp->rx_opt.saw_tstamp = 0;
+#ifdef CONFIG_MPTCP
+	/* MPTCP: force slowpath. */
+	if (mptcp(tp))
+		goto slow_path;
+#endif

 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
 	 *	if header_prediction is to be made
@ -5758,17 +6025,34 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 				    struct tcp_fastopen_cookie *cookie)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+#ifdef CONFIG_MPTCP
+	struct sk_buff *data = NULL;
+#else
 	struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
+#endif
 	u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
 	bool syn_drop = false;

+#ifdef CONFIG_MPTCP
+	if (tp->syn_data) {
+		if (mptcp(tp))
+			data = tcp_write_queue_head(mptcp_meta_sk(sk));
+		else
+			data = tcp_rtx_queue_head(sk);
+	}
+#endif
+
 	if (mss == tp->rx_opt.user_mss) {
 		struct tcp_options_received opt;

 		/* Get original SYNACK MSS value if user MSS sets mss_clamp */
 		tcp_clear_options(&opt);
 		opt.user_mss = opt.mss_clamp = 0;
+#ifdef CONFIG_MPTCP
+		tcp_parse_options(sock_net(sk), synack, &opt, NULL, 0, NULL, NULL);
+#else
 		tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
+#endif
 		mss = opt.mss_clamp;
 	}

@ -5792,7 +6076,15 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,

 	tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);

-	if (data) { /* Retransmit unacked data in SYN */
+	/* In mptcp case, we do not rely on "retransmit", but instead on
+	 * "transmit", because if fastopen data is not acked, the retransmission
+	 * becomes the first MPTCP data (see mptcp_rcv_synsent_fastopen).
+	 */
+	if (data
+#ifdef CONFIG_MPTCP
+		&& !mptcp(tp)
+#endif
+		) { /* Retransmit unacked data in SYN */
 		skb_rbtree_walk_from(data) {
 			if (__tcp_retransmit_skb(sk, data, 1))
 				break;
@ -5832,9 +6124,18 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_fastopen_cookie foc = { .len = -1 };
 	int saved_clamp = tp->rx_opt.mss_clamp;
+#ifdef CONFIG_MPTCP
+	struct mptcp_options_received mopt;
 	bool fastopen_fail;

+	mptcp_init_mp_opt(&mopt);
+
+	tcp_parse_options(sock_net(sk), skb, &tp->rx_opt,
+			  mptcp(tp) ? &tp->mptcp->rx_opt : &mopt, 0, &foc, tp);
+#else
+	bool fastopen_fail;
 	tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
+#endif
 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;

@ -5894,6 +6195,36 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
 		tcp_ack(sk, skb, FLAG_SLOWPATH);

+#ifdef CONFIG_MPTCP
+		if (tp->request_mptcp || mptcp(tp)) {
+			int ret;
+
+			rcu_read_lock();
+			local_bh_disable();
+			ret = mptcp_rcv_synsent_state_process(sk, &sk,
+							      skb, &mopt);
+			local_bh_enable();
+			rcu_read_unlock();
+
+			/* May have changed if we support MPTCP */
+			tp = tcp_sk(sk);
+			icsk = inet_csk(sk);
+
+			if (ret == 1)
+				goto reset_and_undo;
+			if (ret == 2)
+				goto discard;
+		}
+
+		if (mptcp(tp) && !is_master_tp(tp)) {
+			/* Timer for repeating the ACK until an answer
+			 * arrives. Used only when establishing an additional
+			 * subflow inside of an MPTCP connection.
+			 */
+			sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer,
+				       jiffies + icsk->icsk_rto);
+		}
+#endif
 		/* Ok.. it's good. Set up sequence numbers and
 		 * move to established.
 		 */
@ -5920,6 +6251,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			tp->tcp_header_len = sizeof(struct tcphdr);
 		}

+#ifdef CONFIG_MPTCP
+		if (mptcp(tp)) {
+			tp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN;
+			tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN;
+		}
+#endif
 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);

@ -5943,9 +6280,21 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		}
 		if (fastopen_fail)
 			return -1;
-		if (sk->sk_write_pending ||
+
+		/* With MPTCP we cannot send data on the third ack due to the
+		 * lack of option-space to combine with an MP_CAPABLE.
+		 */
+		if (
+#ifdef CONFIG_MPTCP
+			!mptcp(tp) && (
+#endif
+			sk->sk_write_pending ||
 		    icsk->icsk_accept_queue.rskq_defer_accept ||
-		    icsk->icsk_ack.pingpong) {
+		    icsk->icsk_ack.pingpong
+#ifdef CONFIG_MPTCP
+			)
+#endif
+			) {
 			/* Save one ACK. Data will be ready after
 			 * several ticks, if write_pending is set.
 			 *
@ -5984,6 +6333,7 @@ discard:
 	    tcp_paws_reject(&tp->rx_opt, 0))
 		goto discard_and_undo;

+	/* TODO - check this here for MPTCP */
 	if (th->syn) {
 		/* We see SYN without ACK. It is attempt of
 		 * simultaneous connect with crossed SYNs.
@ -6000,6 +6350,12 @@ discard:
 			tp->tcp_header_len = sizeof(struct tcphdr);
 		}

+#ifdef CONFIG_MPTCP
+		if (mptcp(tp)) {
+			tp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN;
+			tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN;
+		}
+#endif
 		WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
 		WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
@ -6058,6 +6414,9 @@ reset_and_undo:
 */

 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
+#ifdef CONFIG_MPTCP
+	__releases(&sk->sk_lock.slock)
+#endif
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
@ -6100,6 +6459,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		tp->rx_opt.saw_tstamp = 0;
 		tcp_mstamp_refresh(tp);
 		queued = tcp_rcv_synsent_state_process(sk, skb, th);
+#ifdef CONFIG_MPTCP
+		if (is_meta_sk(sk)) {
+			sk = tcp_sk(sk)->mpcb->master_sk;
+			tp = tcp_sk(sk);
+
+			/* Need to call it here, because it will announce new
+			 * addresses, which can only be done after the third ack
+			 * of the 3-way handshake.
+			 */
+			mptcp_update_metasocket(tp->meta_sk);
+		}
+#endif
 		if (queued >= 0)
 			return queued;

@ -6182,6 +6553,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)

 		if (tp->rx_opt.tstamp_ok)
 			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+#ifdef CONFIG_MPTCP
+		if (mptcp(tp))
+			tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN;
+#endif

 		if (!inet_csk(sk)->icsk_ca_ops->cong_control)
 			tcp_update_pacing_rate(sk);
@ -6191,6 +6566,32 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)

 		tcp_initialize_rcv_mss(sk);
 		tcp_fast_path_on(tp);
+#ifdef CONFIG_MPTCP
+
+		/* Send an ACK when establishing a new  MPTCP subflow, i.e.
+		 * using an MP_JOIN subtype.
+		 */
+		if (mptcp(tp)) {
+			if (is_master_tp(tp)) {
+				mptcp_update_metasocket(mptcp_meta_sk(sk));
+			} else {
+				struct sock *meta_sk = mptcp_meta_sk(sk);
+
+				tcp_send_ack(sk);
+
+				/* Update RTO as it might be worse/better */
+				mptcp_set_rto(sk);
+
+				/* If the new RTO would fire earlier, pull it in! */
+				if (tcp_sk(meta_sk)->packets_out &&
+				    icsk->icsk_timeout > inet_csk(meta_sk)->icsk_rto + jiffies) {
+					tcp_rearm_rto(meta_sk);
+				}
+
+				mptcp_push_pending_frames(mptcp_meta_sk(sk));
+			}
+		}
+#endif
 		break;

 	case TCP_FIN_WAIT1: {
@ -6238,7 +6639,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		tmo = tcp_fin_time(sk);
 		if (tmo > TCP_TIMEWAIT_LEN) {
 			inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
-		} else if (th->fin || sock_owned_by_user(sk)) {
+		} else if (th->fin ||
+#ifdef CONFIG_MPTCP
+		mptcp_is_data_fin(skb) ||
+#endif
+		sock_owned_by_user(sk)) {
 			/* Bad case. We could lose such FIN otherwise.
 			 * It is not a big problem, but it looks confusing
 			 * and not so rare event. We still can lose it now,
@ -6247,7 +6652,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			 */
 			inet_csk_reset_keepalive_timer(sk, tmo);
 		} else {
+#ifdef CONFIG_MPTCP
+			tp->ops->time_wait(sk, TCP_FIN_WAIT2, tmo);
+#else
 			tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+#endif
 			goto discard;
 		}
 		break;
@ -6255,7 +6664,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)

 	case TCP_CLOSING:
 		if (tp->snd_una == tp->write_seq) {
+#ifdef CONFIG_MPTCP
+			tp->ops->time_wait(sk, TCP_TIME_WAIT, 0);
+#else
 			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+#endif
 			goto discard;
 		}
 		break;
@ -6267,6 +6680,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			goto discard;
 		}
 		break;
+#ifdef CONFIG_MPTCP
+	case TCP_CLOSE:
+		if (tp->mp_killed)
+			goto discard;
+#endif
 	}

 	/* step 6: check the URG bit */
@ -6288,7 +6706,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
 			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
-			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
+			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)
+#ifdef CONFIG_MPTCP
+				&& !mptcp(tp)
+#endif
+			) {
 				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
 				tcp_reset(sk);
 				return 1;
@ -6385,6 +6807,10 @@ static void tcp_openreq_init(struct request_sock *req,
 	ireq->wscale_ok = rx_opt->wscale_ok;
 	ireq->acked = 0;
 	ireq->ecn_ok = 0;
+#ifdef CONFIG_MPTCP
+	ireq->mptcp_rqsk = 0;
+	ireq->saw_mpc = 0;
+#endif
 	ireq->ir_rmt_port = tcp_hdr(skb)->source;
 	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
 	ireq->ir_mark = inet_request_mark(sk, skb);
@ -6482,12 +6908,23 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	/* TW buckets are converted to open requests without
 	 * limitations, they conserve resources and peer is
 	 * evidently real one.
+	 *
+	 * MPTCP: new subflows cannot be established in a stateless manner.
 	 */
+#ifdef CONFIG_MPTCP
+	if (((!is_meta_sk(sk) && net->ipv4.sysctl_tcp_syncookies == 2) ||
+	     inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+#else
 	if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
 	     inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+#endif
 		want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
 		if (!want_cookie)
 			goto drop;
+#ifdef CONFIG_MPTCP
+		if (is_meta_sk(sk))
+			goto drop;
+#endif
 	}

 	if (sk_acceptq_is_full(sk)) {
@ -6505,8 +6942,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = af_ops->mss_clamp;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
+#ifdef CONFIG_MPTCP
+	tcp_parse_options(sock_net(sk), skb, &tmp_opt, NULL, 0,
+			  want_cookie ? NULL : &foc, NULL);
+#else
 	tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
 			  want_cookie ? NULL : &foc);
+#endif

 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
@ -6521,7 +6963,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	/* Note: tcp_v6_init_req() might override ir_iif for link locals */
 	inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);

+#ifdef CONFIG_MPTCP
+	if (af_ops->init_req(req, sk, skb, want_cookie))
+		goto drop_and_free;
+#else
 	af_ops->init_req(req, sk, skb);
+#endif

 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
@ -6557,7 +7004,11 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tcp_ecn_create_request(req, skb, sk, dst);

 	if (want_cookie) {
+#ifdef CONFIG_MPTCP
+		isn = cookie_init_sequence(af_ops, req, sk, skb, &req->mss);
+#else
 		isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
+#endif
 		req->cookie_ts = tmp_opt.tstamp_ok;
 		if (!tmp_opt.tstamp_ok)
 			inet_rsk(req)->ecn_ok = 0;
@ -6572,9 +7023,26 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
 	}
 	if (fastopen_sk) {
+#ifdef CONFIG_MPTCP
+		struct sock *meta_sk = fastopen_sk;
+
+		if (mptcp(tcp_sk(fastopen_sk)))
+			meta_sk = mptcp_meta_sk(fastopen_sk);
+#endif
 		af_ops->send_synack(fastopen_sk, dst, &fl, req,
 				    &foc, TCP_SYNACK_FASTOPEN);
 		/* Add the child socket directly into the accept queue */
+#ifdef CONFIG_MPTCP
+		if (!inet_csk_reqsk_queue_add(sk, req, meta_sk)) {
+			reqsk_fastopen_remove(fastopen_sk, req, false);
+			bh_unlock_sock(fastopen_sk);
+			if (meta_sk != fastopen_sk)
+				bh_unlock_sock(meta_sk);
+			sock_put(fastopen_sk);
+			reqsk_put(req);
+			goto drop;
+		}
+#else
 		if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
 			reqsk_fastopen_remove(fastopen_sk, req, false);
 			bh_unlock_sock(fastopen_sk);
@ -6582,8 +7050,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 			reqsk_put(req);
 			goto drop;
 		}
+#endif
 		sk->sk_data_ready(sk);
 		bh_unlock_sock(fastopen_sk);
+#ifdef CONFIG_MPTCP
+		if (meta_sk != fastopen_sk)
+			bh_unlock_sock(meta_sk);
+#endif
 		sock_put(fastopen_sk);
 	} else {
 		tcp_rsk(req)->tfo_listener = false;
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@ -67,6 +67,10 @@
 #include <net/icmp.h>
 #include <net/inet_hashtables.h>
 #include <net/tcp.h>
+#ifdef CONFIG_MPTCP
+	#include <net/mptcp.h>
+	#include <net/mptcp_v4.h>
+#endif
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 #include <net/inet_common.h>
@ -436,6 +440,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	const int type = icmp_hdr(icmp_skb)->type;
 	const int code = icmp_hdr(icmp_skb)->code;
 	struct sock *sk;
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk;
+#endif
 	struct sk_buff *skb;
 	struct request_sock *fastopen;
 	u32 seq, snd_una;
@ -464,13 +471,27 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 				   (code == ICMP_NET_UNREACH ||
 				    code == ICMP_HOST_UNREACH)));

+#ifdef CONFIG_MPTCP
+	tp = tcp_sk(sk);
+	if (mptcp(tp))
+		meta_sk = mptcp_meta_sk(sk);
+	else
+		meta_sk = sk;
+
+	bh_lock_sock(meta_sk);
+#else
 	bh_lock_sock(sk);
+#endif
 	/* If too many ICMPs get dropped on busy
 	 * servers this needs to be solved differently.
 	 * We do take care of PMTU discovery (RFC1191) special case :
 	 * we can receive locally generated ICMP messages while socket is held.
 	 */
+#ifdef CONFIG_MPTCP
+	if (sock_owned_by_user(meta_sk)) {
+#else
 	if (sock_owned_by_user(sk)) {
+#endif
 		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
 			__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 	}
@ -483,7 +504,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	}

 	icsk = inet_csk(sk);
+#ifndef CONFIG_MPTCP
 	tp = tcp_sk(sk);
+#endif
 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 	fastopen = tp->fastopen_rsk;
 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
@ -517,11 +540,19 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 				goto out;

 			WRITE_ONCE(tp->mtu_info, info);
+#ifdef CONFIG_MPTCP
+			if (!sock_owned_by_user(meta_sk)) {
+#else
 			if (!sock_owned_by_user(sk)) {
+#endif
 				tcp_v4_mtu_reduced(sk);
 			} else {
 				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
 					sock_hold(sk);
+#ifdef CONFIG_MPTCP
+				if (mptcp(tp))
+					mptcp_tsq_flags(sk);
+#endif
 			}
 			goto out;
 		}
@ -535,7 +566,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		    !icsk->icsk_backoff || fastopen)
 			break;

+#ifdef CONFIG_MPTCP
+		if (sock_owned_by_user(meta_sk))
+#else
 		if (sock_owned_by_user(sk))
+#endif
 			break;

 		skb = tcp_rtx_queue_head(sk);
@ -558,7 +593,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		} else {
 			/* RTO revert clocked out retransmission.
 			 * Will retransmit now */
+#ifdef CONFIG_MPTCP
+		tcp_sk(sk)->ops->retransmit_timer(sk);
+#else
 		tcp_retransmit_timer(sk);
+#endif
 		}

 		break;
@ -578,7 +617,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		if (fastopen && !fastopen->sk)
 			break;

+#ifdef CONFIG_MPTCP
+		if (!sock_owned_by_user(meta_sk)) {
+#else
 		if (!sock_owned_by_user(sk)) {
+#endif
 			sk->sk_err = err;

 			sk->sk_error_report(sk);
@ -607,7 +650,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	 */

 	inet = inet_sk(sk);
+#ifdef CONFIG_MPTCP
+	if (!sock_owned_by_user(meta_sk) && inet->recverr) {
+#else
 	if (!sock_owned_by_user(sk) && inet->recverr) {
+#endif
 		sk->sk_err = err;
 		sk->sk_error_report(sk);
 	} else	{ /* Only an error on timeout */
@ -615,7 +662,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	}

 out:
+#ifdef CONFIG_MPTCP
+	bh_unlock_sock(meta_sk);
+#else
 	bh_unlock_sock(sk);
+#endif
 	sock_put(sk);
 }

@ -650,7 +701,10 @@ EXPORT_SYMBOL(tcp_v4_send_check);
 *	Exception: precedence violation. We do not implement it in any case.
 */

-static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
@ -794,12 +848,19 @@ out:
 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
   outside socket context is ugly, certainly. What can I do?
 */
-
+#ifdef CONFIG_MPTCP
+static void tcp_v4_send_ack(const struct sock *sk,
+			    struct sk_buff *skb, u32 seq, u32 ack, u32 data_ack,
+			    u32 win, u32 tsval, u32 tsecr, int oif,
+			    struct tcp_md5sig_key *key,
+			    int reply_flags, u8 tos, int mptcp)
+#else
 static void tcp_v4_send_ack(const struct sock *sk,
 			    struct sk_buff *skb, u32 seq, u32 ack,
 			    u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key,
 			    int reply_flags, u8 tos)
+#endif
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
@ -807,6 +868,10 @@ static void tcp_v4_send_ack(const struct sock *sk,
 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 #ifdef CONFIG_TCP_MD5SIG
 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
+#endif
+#ifdef CONFIG_MPTCP
+			   + ((MPTCP_SUB_LEN_DSS >> 2) +
+			      (MPTCP_SUB_LEN_ACK >> 2))
 #endif
 			];
 	} rep;
@ -853,6 +918,21 @@ static void tcp_v4_send_ack(const struct sock *sk,
 				    ip_hdr(skb)->daddr, &rep.th);
 	}
 #endif
+#ifdef CONFIG_MPTCP
+	if (mptcp) {
+		int offset = (tsecr) ? 3 : 0;
+		/* Construction of 32-bit data_ack */
+		rep.opt[offset++] = htonl((TCPOPT_MPTCP << 24) |
+					  ((MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK) << 16) |
+					  (0x20 << 8) |
+					  (0x01));
+		rep.opt[offset] = htonl(data_ack);
+
+		arg.iov[0].iov_len += MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK;
+		rep.th.doff = arg.iov[0].iov_len / 4;
+	}
+#endif /* CONFIG_MPTCP */
+
 	arg.flags = reply_flags;
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 				      ip_hdr(skb)->saddr, /* XXX */
@ -881,9 +961,20 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_timewait_sock *tw = inet_twsk(sk);
 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+#ifdef CONFIG_MPTCP
+	u32 data_ack = 0;
+	int mptcp = 0;

+	if (tcptw->mptcp_tw) {
+		data_ack = (u32)tcptw->mptcp_tw->rcv_nxt;
+		mptcp = 1;
+	}
+#endif
 	tcp_v4_send_ack(sk, skb,
 			tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+#ifdef CONFIG_MPTCP
+			data_ack,
+#endif
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent,
@ -891,19 +982,31 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 			tcp_twsk_md5_key(tcptw),
 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
 			tw->tw_tos
+#ifdef CONFIG_MPTCP
+			, mptcp
+#endif
 			);

 	inet_twsk_put(tw);
 }

-static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			   struct request_sock *req)
 {
 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
 	 */
+#ifdef CONFIG_MPTCP
+	u32 seq = (sk->sk_state == TCP_LISTEN || is_meta_sk(sk)) ?
+					     tcp_rsk(req)->snt_isn + 1 :
+					     tcp_sk(sk)->snd_nxt;
+#else
 	u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
 					     tcp_sk(sk)->snd_nxt;
+#endif

 	/* RFC 7323 2.3
 	 * The window field (SEG.WND) of every outgoing segment, with the
@ -912,6 +1015,9 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 	 */
 	tcp_v4_send_ack(sk, skb, seq,
 			tcp_rsk(req)->rcv_nxt,
+#ifdef CONFIG_MPTCP
+			0,
+#endif
 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
 			req->ts_recent,
@ -919,7 +1025,11 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
 					  AF_INET),
 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
-			ip_hdr(skb)->tos);
+			ip_hdr(skb)->tos
+#ifdef CONFIG_MPTCP
+			, 0
+#endif
+			);
 }

 /*
@ -927,7 +1037,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 *	This still operates on a request_sock only, not on a big
 *	socket.
 */
-static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
+#ifndef CONFIG_MPTCP
+static
+#endif
+int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 		       struct flowi *fl,
 		       struct request_sock *req,
 		       struct tcp_fastopen_cookie *foc,
@ -961,7 +1074,10 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 /*
 *	IPv4 request_sock destructor.
 */
-static void tcp_v4_reqsk_destructor(struct request_sock *req)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_v4_reqsk_destructor(struct request_sock *req)
 {
 	kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
 }
@ -1343,9 +1459,14 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
 	return false;
 }

+#ifdef CONFIG_MPTCP
+static int tcp_v4_init_req(struct request_sock *req, const struct sock *sk_listener,
+			   struct sk_buff *skb, bool want_cookie)
+#else
 static void tcp_v4_init_req(struct request_sock *req,
 			    const struct sock *sk_listener,
 			    struct sk_buff *skb)
+#endif
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct net *net = sock_net(sk_listener);
@ -1353,6 +1474,9 @@ static void tcp_v4_init_req(struct request_sock *req,
 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
 	RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
+#ifdef CONFIG_MPTCP
+	return 0;
+#endif
 }

 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@ -1372,6 +1496,9 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
 	.syn_ack_timeout =	tcp_syn_ack_timeout,
 };

+#ifndef CONFIG_MPTCP
+static
+#endif
 const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 	.mss_clamp	=	TCP_MSS_DEFAULT,
 #ifdef CONFIG_TCP_MD5SIG
@ -1520,7 +1647,10 @@ put_and_exit:
 }
 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);

-static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
 {
 #ifdef CONFIG_SYN_COOKIES
 	const struct tcphdr *th = tcp_hdr(skb);
@ -1542,6 +1672,10 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	struct sock *rsk;
+#ifdef CONFIG_MPTCP
+	if (is_meta_sk(sk))
+		return mptcp_v4_do_rcv(sk, skb);
+#endif

 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		struct dst_entry *dst;
@ -1697,6 +1831,10 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff * 4);
 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+#ifdef CONFIG_MPTCP
+	TCP_SKB_CB(skb)->mptcp_flags = 0;
+	TCP_SKB_CB(skb)->dss_off = 0;
+#endif
 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
@ -1717,6 +1855,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	const struct tcphdr *th;
 	bool refcounted;
 	struct sock *sk;
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk = NULL;
+#endif
 	int ret;

 	if (skb->pkt_type != PACKET_HOST)
@ -1770,15 +1911,26 @@ process:
 			reqsk_put(req);
 			goto csum_error;
 		}
-		if (unlikely(sk->sk_state != TCP_LISTEN)) {
+		if (unlikely(sk->sk_state != TCP_LISTEN
+#ifdef CONFIG_MPTCP
+		&& !is_meta_sk(sk)
+#endif
+		)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
+#ifdef CONFIG_MPTCP
+		if (unlikely(is_meta_sk(sk) && !mptcp_can_new_subflow(sk))) {
+			inet_csk_reqsk_queue_drop_and_put(sk, req);
+			goto lookup;
+		}
+#endif
 		/* We own a reference on the listener, increase it again
 		 * as we might lose it too soon.
 		 */
 		sock_hold(sk);
 		refcounted = true;
+
 		nsk = NULL;
 		if (!tcp_filter(sk, skb)) {
 			th = (const struct tcphdr *)skb->data;
@ -1839,15 +1991,38 @@ process:

 	sk_incoming_cpu_update(sk);

+#ifdef CONFIG_MPTCP
+	if (mptcp(tcp_sk(sk))) {
+		meta_sk = mptcp_meta_sk(sk);
+
+		bh_lock_sock_nested(meta_sk);
+		if (sock_owned_by_user(meta_sk))
+			mptcp_prepare_for_backlog(sk, skb);
+	} else {
+		meta_sk = sk;
+#endif
 		bh_lock_sock_nested(sk);
+#ifdef CONFIG_MPTCP
+	}
+#endif
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
+#ifdef CONFIG_MPTCP
+	if (!sock_owned_by_user(meta_sk)) {
+		ret = tcp_v4_do_rcv(sk, skb);
+	} else if (tcp_add_backlog(meta_sk, skb)) {
+#else
 	if (!sock_owned_by_user(sk)) {
 		ret = tcp_v4_do_rcv(sk, skb);
 	} else if (tcp_add_backlog(sk, skb)) {
+#endif
 		goto discard_and_relse;
 	}
+#ifdef CONFIG_MPTCP
+	bh_unlock_sock(meta_sk);
+#else
 	bh_unlock_sock(sk);
+#endif

 put_and_return:
 	if (refcounted)
@ -1861,6 +2036,19 @@ no_tcp_socket:

 	tcp_v4_fill_cb(skb, iph, th);

+#ifdef CONFIG_MPTCP
+	if (!sk && th->syn && !th->ack) {
+		int ret = mptcp_lookup_join(skb, NULL);
+
+		if (ret < 0) {
+			tcp_v4_send_reset(NULL, skb);
+			goto discard_it;
+		} else if (ret > 0) {
+			return 0;
+		}
+	}
+#endif
+
 	if (tcp_checksum_complete(skb)) {
 csum_error:
 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
@ -1909,6 +2097,18 @@ do_time_wait:
 			refcounted = false;
 			goto process;
 		}
+#ifdef CONFIG_MPTCP
+		if (th->syn && !th->ack) {
+			int ret = mptcp_lookup_join(skb, inet_twsk(sk));
+
+			if (ret < 0) {
+				tcp_v4_send_reset(NULL, skb);
+				goto discard_it;
+			} else if (ret > 0) {
+				return 0;
+			}
+		}
+#endif
 	}
 		/* to ACK */
 		/* fall through */
@ -1978,6 +2178,11 @@ static int tcp_v4_init_sock(struct sock *sk)

 	tcp_init_sock(sk);

+#ifdef CONFIG_MPTCP
+	if (sock_flag(sk, SOCK_MPTCP))
+		icsk->icsk_af_ops = &mptcp_v4_specific;
+	else
+#endif
 		icsk->icsk_af_ops = &ipv4_specific;

 #ifdef CONFIG_TCP_MD5SIG
@ -1996,7 +2201,12 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	tcp_clear_xmit_timers(sk);

 	tcp_cleanup_congestion_control(sk);
-
+#ifdef CONFIG_MPTCP
+	if (mptcp(tp))
+		mptcp_destroy_sock(sk);
+	if (tp->inside_tk_table)
+		mptcp_hash_remove_bh(tp);
+#endif
 	tcp_cleanup_ulp(sk);

 	/* Cleanup up the write buffer. */
@ -2506,6 +2716,11 @@ struct proto tcp_prot = {
 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
+#ifdef CONFIG_MPTCP
+	.useroffset		= offsetof(struct tcp_sock, mptcp_sched_name),
+	.usersize		= sizeof_field(struct tcp_sock, mptcp_sched_name) +
+				  sizeof_field(struct tcp_sock, mptcp_pm_name),
+#endif
 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
@ -2516,6 +2731,9 @@ struct proto tcp_prot = {
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
 	.diag_destroy		= tcp_abort,
+#ifdef CONFIG_MPTCP
+	.clear_sk		= mptcp_clear_sk,
+#endif
 };
 EXPORT_SYMBOL(tcp_prot);

--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@ -18,11 +18,17 @@
 *		Jorge Cwik, <jorge@laser.satlink.net>
 */

+#ifdef CONFIG_MPTCP
+#include <linux/kconfig.h>
+#endif
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/workqueue.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#endif
 #include <linux/static_key.h>
 #include <net/tcp.h>
 #include <net/inet_common.h>
@ -94,10 +100,25 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 	struct tcp_options_received tmp_opt;
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	bool paws_reject = false;
+#ifdef CONFIG_MPTCP
+	struct mptcp_options_received mopt;
+#endif

 	tmp_opt.saw_tstamp = 0;
+#ifdef CONFIG_MPTCP
+	if (th->doff > (sizeof(*th) >> 2) &&
+	    (tcptw->tw_ts_recent_stamp || tcptw->mptcp_tw)) {
+#else
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
+#endif
+#ifdef CONFIG_MPTCP
+		mptcp_init_mp_opt(&mopt);
+
+		tcp_parse_options(twsk_net(tw), skb, &tmp_opt, &mopt, 0, NULL, NULL);
+
+#else
 		tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
+#endif

 		if (tmp_opt.saw_tstamp) {
 			if (tmp_opt.rcv_tsecr)
@ -106,6 +127,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			tmp_opt.ts_recent_stamp	= tcptw->tw_ts_recent_stamp;
 			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
 		}
+#ifdef CONFIG_MPTCP
+		if (unlikely(mopt.mp_fclose) && tcptw->mptcp_tw) {
+			if (mopt.mptcp_sender_key == tcptw->mptcp_tw->loc_key)
+				return TCP_TW_RST;
+		}
+#endif
 	}

 	if (tw->tw_substate == TCP_FIN_WAIT2) {
@ -129,6 +156,17 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		if (!th->ack ||
 		    !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
 		    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
+#ifdef CONFIG_MPTCP
+			/* If mptcp_is_data_fin() returns true, we are sure that
+			 * mopt has been initialized - otherwise it would not
+			 * be a DATA_FIN.
+			 */
+			if (tcptw->mptcp_tw && tcptw->mptcp_tw->meta_tw &&
+			    mptcp_is_data_fin(skb) &&
+			    TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
+			    mopt.data_seq + 1 == (u32)tcptw->mptcp_tw->rcv_nxt)
+				return TCP_TW_ACK;
+#endif
 			inet_twsk_put(tw);
 			return TCP_TW_SUCCESS;
 		}
@ -274,6 +312,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_ts_offset	= tp->tsoffset;
 		tcptw->tw_last_oow_ack_time = 0;

+#ifdef CONFIG_MPTCP
+		if (mptcp(tp)) {
+			if (mptcp_init_tw_sock(sk, tcptw)) {
+				inet_twsk_free(tw);
+				goto exit;
+			}
+		} else {
+			tcptw->mptcp_tw = NULL;
+		}
+#endif
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
@ -330,6 +378,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
 	}

+#ifdef CONFIG_MPTCP
+exit:
+#endif
 	tcp_update_metrics(sk);
 	tcp_done(sk);
 }
@ -337,9 +388,16 @@ EXPORT_SYMBOL(tcp_time_wait);

 void tcp_twsk_destructor(struct sock *sk)
 {
-#ifdef CONFIG_TCP_MD5SIG
+#ifdef CONFIG_MPTCP
 	struct tcp_timewait_sock *twsk = tcp_twsk(sk);

+	if (twsk->mptcp_tw)
+		mptcp_twsk_destructor(twsk);
+#endif
+#ifdef CONFIG_TCP_MD5SIG
+#ifndef CONFIG_MPTCP
+	struct tcp_timewait_sock *twsk = tcp_twsk(sk);
+#endif
 	if (twsk->tw_md5_key)
 		kfree_rcu(twsk->tw_md5_key, rcu);
 #endif
@ -378,8 +436,14 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 		full_space = rcv_wnd * mss;

 	/* tcp_full_space because it is guaranteed to be the first packet */
+#ifdef CONFIG_MPTCP
+	tp->ops->select_initial_window(sk_listener, full_space,
+		mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) -
+		(ireq->saw_mpc ? MPTCP_SUB_LEN_DSM_ALIGN : 0),
+#else
 	tcp_select_initial_window(sk_listener, full_space,
 		mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+#endif
 		&req->rsk_rcv_wnd,
 		&req->rsk_window_clamp,
 		ireq->wscale_ok,
@ -477,6 +541,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 	newtp->snd_sml = newtp->snd_una =
 	newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;

+#ifdef CONFIG_MPTCP
+	newtp->out_of_order_queue = RB_ROOT;
+	newsk->tcp_rtx_queue = RB_ROOT;
+#endif
 	INIT_LIST_HEAD(&newtp->tsq_node);
 	INIT_LIST_HEAD(&newtp->tsorted_sent_queue);

@ -547,6 +615,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->rx_opt.ts_recent_stamp = 0;
 		newtp->tcp_header_len = sizeof(struct tcphdr);
 	}
+#ifdef CONFIG_MPTCP
+	if (ireq->saw_mpc)
+		newtp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN;
+#endif
 	newtp->tsoffset = treq->ts_off;
 #ifdef CONFIG_TCP_MD5SIG
 	newtp->md5sig_info = NULL;	/*XXX*/
@ -589,6 +661,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   bool fastopen, bool *req_stolen)
 {
 	struct tcp_options_received tmp_opt;
+#ifdef CONFIG_MPTCP
+	struct mptcp_options_received mopt;
+#endif
 	struct sock *child;
 	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
@ -596,8 +671,15 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	bool own_req;

 	tmp_opt.saw_tstamp = 0;
+#ifdef CONFIG_MPTCP
+	mptcp_init_mp_opt(&mopt);
+#endif
 	if (th->doff > (sizeof(struct tcphdr)>>2)) {
+#ifdef CONFIG_MPTCP
+		tcp_parse_options(sock_net(sk), skb, &tmp_opt, &mopt, 0, NULL, NULL);
+#else
 		tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
+#endif

 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
@ -638,7 +720,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 		 *
 		 * Reset timer after retransmitting SYNACK, similar to
 		 * the idea of fast retransmit in recovery.
+		 *
+		 * Fall back to TCP if MP_CAPABLE is not set.
 		 */
+#ifdef CONFIG_MPTCP
+		if (inet_rsk(req)->saw_mpc && !mopt.saw_mpc)
+			inet_rsk(req)->saw_mpc = false;
+#endif
+
 		if (!tcp_oow_rate_limited(sock_net(sk), skb,
 					  LINUX_MIB_TCPACKSKIPPEDSYNRECV,
 					  &tcp_rsk(req)->last_oow_ack_time) &&
@ -791,6 +880,19 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (!child)
 		goto listen_overflow;

+#ifdef CONFIG_MPTCP
+	if (own_req && !is_meta_sk(sk)) {
+		int ret = mptcp_check_req_master(sk, child, req, skb, 1, 0);
+		if (ret < 0)
+			goto listen_overflow;
+
+		/* MPTCP-supported */
+		if (!ret)
+			return tcp_sk(child)->mpcb->master_sk;
+	} else if (own_req) {
+		return mptcp_check_req_child(sk, child, req, skb, &mopt);
+	}
+#endif
 	sock_rps_save_rxhash(child, skb);
 	tcp_synack_rtt_meas(child, req);
 	*req_stolen = !own_req;
@ -842,12 +944,24 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 {
 	int ret = 0;
 	int state = child->sk_state;
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk = mptcp(tcp_sk(child)) ? mptcp_meta_sk(child) : child;
+#endif

 	/* record NAPI ID of child */
 	sk_mark_napi_id(child, skb);

 	tcp_segs_in(tcp_sk(child), skb);
+#ifdef CONFIG_MPTCP
+	/* The following will be removed when we allow lockless data-reception
+	 * on the subflows.
+	 */
+	if (mptcp(tcp_sk(child)))
+		bh_lock_sock_nested(meta_sk);
+	if (!sock_owned_by_user(meta_sk)) {
+#else
 	if (!sock_owned_by_user(child)) {
+#endif
 		ret = tcp_rcv_state_process(child, skb);
 		/* Wakeup parent, send SIGIO */
 		if (state == TCP_SYN_RECV && child->sk_state != state)
@ -857,10 +971,20 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 		 * in main socket hash table and lock on listening
 		 * socket does not protect us more.
 		 */
+#ifdef CONFIG_MPTCP
+		if (mptcp(tcp_sk(child)))
+			mptcp_prepare_for_backlog(child, skb);
+		__sk_add_backlog(meta_sk, skb);
+#else
 		__sk_add_backlog(child, skb);
+#endif
 	}

 	bh_unlock_sock(child);
+#ifdef CONFIG_MPTCP
+	if (mptcp(tcp_sk(child)))
+		bh_unlock_sock(meta_sk);
+#endif
 	sock_put(child);
 	return ret;
 }
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@ -36,6 +36,14 @@

 #define pr_fmt(fmt) "TCP: " fmt

+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#include <net/mptcp_v4.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/mptcp_v6.h>
+#endif
+#include <net/ipv6.h>
+#endif
 #include <net/tcp.h>

 #include <linux/compiler.h>
@ -45,11 +53,16 @@

 #include <trace/events/tcp.h>

+#ifndef CONFIG_MPTCP
 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			   int push_one, gfp_t gfp);
+#endif

 /* Account for new data that has been sent to the network. */
-static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@ -243,12 +256,24 @@ EXPORT_SYMBOL(tcp_select_initial_window);
 * value can be stuffed directly into th->window for an outgoing
 * frame.
 */
-static u16 tcp_select_window(struct sock *sk)
+#ifndef CONFIG_MPTCP
+static
+#endif
+u16 tcp_select_window(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 old_win = tp->rcv_wnd;
+	/* The window must never shrink at the meta-level. At the subflow we
+	 * have to allow this. Otherwise we may announce a window too large
+	 * for the current meta-level sk_rcvbuf.
+	 */
+#ifdef CONFIG_MPTCP
+	u32 cur_win = tcp_receive_window(mptcp(tp) ? tcp_sk(mptcp_meta_sk(sk)) : tp);
+	u32 new_win = tp->ops->__select_window(sk);
+#else
 	u32 cur_win = tcp_receive_window(tp);
 	u32 new_win = __tcp_select_window(sk);
+#endif

 	/* Never shrink the offered window */
 	if (new_win < cur_win) {
@ -264,6 +289,7 @@ static u16 tcp_select_window(struct sock *sk)
 				      LINUX_MIB_TCPWANTZEROWINDOWADV);
 		new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
 	}
+
 	tp->rcv_wnd = new_win;
 	tp->rcv_wup = tp->rcv_nxt;

@ -376,7 +402,10 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
 /* Constructs common control bits of non-data skb. If SYN/FIN is present,
 * auto increment end seqno.
 */
-static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 {
 	skb->ip_summed = CHECKSUM_PARTIAL;

@ -391,7 +420,10 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 	TCP_SKB_CB(skb)->end_seq = seq;
 }

-static inline bool tcp_urg_mode(const struct tcp_sock *tp)
+#ifndef CONFIG_MPTCP
+static inline
+#endif
+bool tcp_urg_mode(const struct tcp_sock *tp)
 {
 	return tp->snd_una != tp->snd_up;
 }
@ -402,6 +434,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
 #define OPTION_WSCALE		(1 << 3)
 #define OPTION_FAST_OPEN_COOKIE	(1 << 8)
 #define OPTION_SMC		(1 << 9)
+/* Before adding here - take a look at OPTION_MPTCP in include/net/mptcp.h */

 static void smc_options_write(__be32 *ptr, u16 *options)
 {
@ -418,6 +451,7 @@ static void smc_options_write(__be32 *ptr, u16 *options)
 #endif
 }

+#ifndef CONFIG_MPTCP
 struct tcp_out_options {
 	u16 options;		/* bit field of OPTION_* */
 	u16 mss;		/* 0 to disable */
@ -428,6 +462,7 @@ struct tcp_out_options {
 	__u32 tsval, tsecr;	/* need to include OPTION_TS */
 	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
 };
+#endif

 /* Write previously computed TCP options to the packet.
 *
@ -443,7 +478,11 @@ struct tcp_out_options {
 * (but it may well be that other scenarios fail similarly).
 */
 static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
-			      struct tcp_out_options *opts)
+			      struct tcp_out_options *opts
+#ifdef CONFIG_MPTCP
+				  , struct sk_buff *skb
+#endif
+					)
 {
 	u16 options = opts->options;	/* mungable copy */

@ -537,6 +576,10 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 	}

 	smc_options_write(ptr, &options);
+#ifdef CONFIG_MPTCP
+	if (unlikely(OPTION_MPTCP & opts->options))
+		mptcp_options_write(ptr, tp, opts, skb);
+#endif
 }

 static void smc_set_option(const struct tcp_sock *tp,
@ -622,7 +665,10 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 		if (unlikely(!(OPTION_TS & opts->options)))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
 	}
-
+#ifdef CONFIG_MPTCP
+	if (tp->request_mptcp || mptcp(tp))
+		mptcp_syn_options(sk, opts, &remaining);
+#endif
 	if (fastopen && fastopen->cookie.len >= 0) {
 		u32 need = fastopen->cookie.len;

@ -704,7 +750,10 @@ static unsigned int tcp_synack_options(const struct sock *sk,
 	}

 	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
-
+#ifdef CONFIG_MPTCP
+	if (ireq->saw_mpc)
+		mptcp_synack_options(req, opts, &remaining);
+#endif
 	return MAX_TCP_OPTION_SPACE - remaining;
 }

@ -738,10 +787,22 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
 		opts->tsecr = tp->rx_opt.ts_recent;
 		size += TCPOLEN_TSTAMP_ALIGNED;
 	}
+#ifdef CONFIG_MPTCP
+	if (mptcp(tp))
+		mptcp_established_options(sk, skb, opts, &size);
+#endif

 	eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
 	if (unlikely(eff_sacks)) {
+#ifdef CONFIG_MPTCP
+		const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
+
+		if (remaining < TCPOLEN_SACK_BASE_ALIGNED)
+			opts->num_sack_blocks = 0;
+		else
+#else
 		const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
+#endif
 		opts->num_sack_blocks =
 			min_t(unsigned int, eff_sacks,
 			      (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
@ -787,20 +848,45 @@ static void tcp_tsq_write(struct sock *sk)
 			tcp_mstamp_refresh(tp);
 			tcp_xmit_retransmit_queue(sk);
 	}
-
+#ifdef CONFIG_MPTCP
+		tcp_sk(sk)->ops->write_xmit(sk, tcp_current_mss(sk),
+					    tcp_sk(sk)->nonagle, 0, GFP_ATOMIC);
+#else
 		tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
 			       0, GFP_ATOMIC);
+#endif
 	}
 }

 static void tcp_tsq_handler(struct sock *sk)
 {
+#ifdef CONFIG_MPTCP
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk;
+
+	bh_lock_sock(meta_sk);
+	if (!sock_owned_by_user(meta_sk)) {
+		tcp_tsq_write(sk);
+
+		if (mptcp(tp))
+			tcp_tsq_write(meta_sk);
+	} else {
+		if (!test_and_set_bit(TCP_TSQ_DEFERRED, &meta_sk->sk_tsq_flags))
+			sock_hold(meta_sk);
+
+		if ((mptcp(tp)) && (sk->sk_state != TCP_CLOSE))
+			mptcp_tsq_flags(sk);
+	}
+
+	bh_unlock_sock(meta_sk);
+#else
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		tcp_tsq_write(sk);
 	else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
 		sock_hold(sk);
 	bh_unlock_sock(sk);
+#endif
 }
 /*
 * One tasklet per cpu tries to send more skbs.
@ -834,10 +920,19 @@ static void tcp_tasklet_func(unsigned long data)
 	}
 }

+#ifdef CONFIG_MPTCP
+#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED |		\
+			  TCPF_WRITE_TIMER_DEFERRED |	\
+			  TCPF_DELACK_TIMER_DEFERRED |	\
+			  TCPF_MTU_REDUCED_DEFERRED | \
+			  TCPF_PATH_MANAGER_DEFERRED |\
+			  TCPF_SUB_DEFERRED)
+#else
 #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED |		\
 			  TCPF_WRITE_TIMER_DEFERRED |	\
 			  TCPF_DELACK_TIMER_DEFERRED |	\
 			  TCPF_MTU_REDUCED_DEFERRED)
+#endif
 /**
 * tcp_release_cb - tcp release_sock() callback
 * @sk: socket
@ -860,6 +955,10 @@ void tcp_release_cb(struct sock *sk)
 	if (flags & TCPF_TSQ_DEFERRED) {
 		tcp_tsq_write(sk);
 		__sock_put(sk);
+#ifdef CONFIG_MPTCP
+		if (mptcp(tcp_sk(sk)))
+			tcp_tsq_write(mptcp_meta_sk(sk));
+#endif
 	}
 	/* Here begins the tricky part :
 	 * We are called from release_sock() with :
@ -884,6 +983,15 @@ void tcp_release_cb(struct sock *sk)
 		inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
 		__sock_put(sk);
 	}
+#ifdef CONFIG_MPTCP
+	if (flags & TCPF_PATH_MANAGER_DEFERRED) {
+		if (tcp_sk(sk)->mpcb->pm_ops->release_sock)
+			tcp_sk(sk)->mpcb->pm_ops->release_sock(sk);
+		__sock_put(sk);
+	}
+	if (flags & TCPF_SUB_DEFERRED)
+		mptcp_tsq_sub_deferred(sk);
+#endif
 }
 EXPORT_SYMBOL(tcp_release_cb);

@ -1004,7 +1112,10 @@ static bool tcp_pacing_check(const struct sock *sk)
 	       hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
 }

-static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
 {
 	skb->skb_mstamp = tp->tcp_mstamp;
 	list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
@ -1115,11 +1226,18 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 			th->urg = 1;
 		}
 	}
-
+#ifdef CONFIG_MPTCP
+	tcp_options_write((__be32 *)(th + 1), tp, &opts, skb);
+#else
 	tcp_options_write((__be32 *)(th + 1), tp, &opts);
+#endif
 	skb_shinfo(skb)->gso_type = sk->sk_gso_type;
 	if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
+#ifdef CONFIG_MPTCP
+		th->window	= htons(tp->ops->select_window(sk));
+#else
 		th->window      = htons(tcp_select_window(sk));
+#endif
 		tcp_ecn_send(sk, skb, th, tcp_header_size);
 	} else {
 		/* RFC1323: The window in SYN & SYN/ACK segments
@ -1177,7 +1295,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 	return err;
 }

-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+#ifndef CONFIG_MPTCP
+static
+#endif
+int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 			    gfp_t gfp_mask)
 {
 	return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
@ -1189,7 +1310,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
 * otherwise socket can stall.
 */
-static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);

@ -1202,7 +1326,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 }

 /* Initialize TSO segments for a packet. */
-static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
 {
 	if (skb->len <= mss_now) {
 		/* Avoid the costly divide in the normal
@ -1219,7 +1346,10 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
 /* Pcount in the middle of the write queue got changed, we need to do various
 * tweaks to fix counters
 */
-static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
 {
 	struct tcp_sock *tp = tcp_sk(sk);

@ -1387,7 +1517,10 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 /* This is similar to __pskb_pull_tail(). The difference is that pulled
 * data is not copied, but immediately discarded.
 */
-static int __pskb_trim_head(struct sk_buff *skb, int len)
+#ifndef CONFIG_MPTCP
+static
+#endif
+int __pskb_trim_head(struct sk_buff *skb, int len)
 {
 	struct skb_shared_info *shinfo;
 	int i, k, eat;
@ -1611,6 +1744,10 @@ unsigned int tcp_current_mss(struct sock *sk)
 	return mss_now;
 }

+#ifdef CONFIG_MPTCP
+EXPORT_SYMBOL(tcp_current_mss);
+#endif
+
 /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
 * As additional protections, we do not touch cwnd in retransmission phases,
 * and if application hit its sndbuf limit recently.
@ -1633,7 +1770,10 @@ static void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_jiffies32;
 }

-static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
 {
 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
 	struct tcp_sock *tp = tcp_sk(sk);
@ -1697,7 +1837,10 @@ static bool tcp_minshall_check(const struct tcp_sock *tp)
 * But we can avoid doing the divide again given we already have
 *  skb_pcount = skb->len / mss_now
 */
-static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
 			 const struct sk_buff *skb)
 {
 	if (skb->len < tcp_skb_pcount(skb) * mss_now)
@ -1757,7 +1900,10 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 }

 /* Returns the portion of skb which can be sent right away */
-static unsigned int tcp_mss_split_point(const struct sock *sk,
+#ifndef CONFIG_MPTCP
+static
+#endif
+unsigned int tcp_mss_split_point(const struct sock *sk,
 				 const struct sk_buff *skb,
 				 unsigned int mss_now,
 				 unsigned int max_segs,
@ -1791,13 +1937,20 @@ static unsigned int tcp_mss_split_point(const struct sock *sk,
 /* Can at least one segment of SKB be sent right now, according to the
 * congestion window rules?  If so, return how many segments are allowed.
 */
-static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
+#ifndef CONFIG_MPTCP
+static inline
+#endif
+unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
 			   const struct sk_buff *skb)
 {
 	u32 in_flight, cwnd, halfcwnd;

 	/* Don't be strict about the congestion window for the final FIN.  */
-	if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
+	if (
+#ifdef CONFIG_MPTCP
+		skb &&
+#endif
+		(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
 	    tcp_skb_pcount(skb) == 1)
 		return 1;

@ -1812,12 +1965,18 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
 	halfcwnd = max(cwnd >> 1, 1U);
 	return min(halfcwnd, cwnd - in_flight);
 }
+#ifdef CONFIG_MPTCP
+EXPORT_SYMBOL(tcp_cwnd_test);
+#endif

 /* Initialize TSO state of a skb.
 * This must be invoked the first time we consider transmitting
 * SKB onto the wire.
 */
-static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
+#ifndef CONFIG_MPTCP
+static
+#endif
+int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
 {
 	int tso_segs = tcp_skb_pcount(skb);

@ -1832,7 +1991,10 @@ static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
 /* Return true if the Nagle test allows this packet to be
 * sent now.
 */
-static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
+#ifndef CONFIG_MPTCP
+static inline
+#endif
+bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
 		    unsigned int cur_mss, int nonagle)
 {
 	/* Nagle rule does not apply to frames, which sit in the middle of the
@ -1845,7 +2007,11 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
 		return true;

 	/* Don't use the nagle rule for urgent data (or for the final FIN). */
-	if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
+	if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+#ifdef CONFIG_MPTCP
+		|| mptcp_is_data_fin(skb)
+#endif
+)
 		return true;

 	if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
@ -1855,7 +2021,10 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
 }

 /* Does at least the first segment of SKB fit into the send window? */
-static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
+#ifndef CONFIG_MPTCP
+static
+#endif
+bool tcp_snd_wnd_test(const struct tcp_sock *tp,
 			     const struct sk_buff *skb,
 			     unsigned int cur_mss)
 {
@ -1866,6 +2035,9 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,

 	return !after(end_seq, tcp_wnd_end(tp));
 }
+#ifdef CONFIG_MPTCP
+EXPORT_SYMBOL(tcp_snd_wnd_test);
+#endif

 /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
 * which is put after SKB on the list.  It is very much like
@ -2017,8 +2189,12 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 		}
 	}

+#ifdef CONFIG_MPTCP
 	/* If this packet won't get more data, do not wait. */
+	if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || mptcp_is_data_fin(skb))
+#else
 	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+#endif
 		goto send_now;

 	return true;
@ -2322,7 +2498,10 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
 * Returns true, if no segments are in flight and we have queued segments,
 * but cannot send anything now because of SWS or another problem.
 */
-static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
+#ifndef CONFIG_MPTCP
+static
+#endif
+bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		    int push_one, gfp_t gfp)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@ -2336,7 +2515,16 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 	sent_pkts = 0;

 	tcp_mstamp_refresh(tp);
-	if (!push_one) {
+
+	/* pmtu not yet supported with MPTCP. Should be possible, by early
+	 * exiting the loop inside tcp_mtu_probe, making sure that only one
+	 * single DSS-mapping gets probed.
+	 */
+	if (!push_one
+#ifdef CONFIG_MPTCP
+		&& !mptcp(tp)
+#endif
+	) {
 		/* Do MTU probing. */
 		result = tcp_mtu_probe(sk);
 		if (!result) {
@ -2435,7 +2623,12 @@ repair:

 	is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
 	if (likely(sent_pkts || is_cwnd_limited))
+#ifdef CONFIG_MPTCP
+		if (tp->ops->cwnd_validate)
+			tp->ops->cwnd_validate(sk, is_cwnd_limited);
+#else
 		tcp_cwnd_validate(sk, is_cwnd_limited);
+#endif

 	if (likely(sent_pkts)) {
 		if (tcp_in_cwnd_reduction(sk))
@ -2531,7 +2724,11 @@ void tcp_send_loss_probe(struct sock *sk)
 	skb = tcp_send_head(sk);
 	if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
 		pcount = tp->packets_out;
+#ifdef CONFIG_MPTCP
+		tp->ops->write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+#else
 		tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+#endif
 		if (tp->packets_out > pcount)
 			goto probe_sent;
 		goto rearm_timer;
@ -2593,8 +2790,13 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 	if (unlikely(sk->sk_state == TCP_CLOSE))
 		return;

+#ifdef CONFIG_MPTCP
+	if (tcp_sk(sk)->ops->write_xmit(sk, cur_mss, nonagle, 0,
+					sk_gfp_mask(sk, GFP_ATOMIC)))
+#else
 	if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
 			   sk_gfp_mask(sk, GFP_ATOMIC)))
+#endif
 		tcp_check_probe_timer(sk);
 }

@ -2607,7 +2809,12 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)

 	BUG_ON(!skb || skb->len < mss_now);

+#ifdef CONFIG_MPTCP
+	tcp_sk(sk)->ops->write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1,
+				    sk->sk_allocation);
+#else
 	tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
+#endif
 }

 /* This function returns the amount that we can raise the
@ -2829,6 +3036,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
 	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
 		return;

+#ifdef CONFIG_MPTCP
+	/* Currently not supported for MPTCP - but it should be possible */
+	if (mptcp(tp))
+		return;
+#endif
 	skb_rbtree_walk_from_safe(skb, tmp) {
 		if (!tcp_can_collapse(sk, skb))
 			break;
@ -3308,7 +3520,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,

 	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 	th->window = htons(min(req->rsk_rcv_wnd, 65535U));
+#ifdef CONFIG_MPTCP
+	tcp_options_write((__be32 *)(th + 1), NULL, &opts, skb);
+#else
 	tcp_options_write((__be32 *)(th + 1), NULL, &opts);
+#endif
 	th->doff = (tcp_header_size >> 2);
 	__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);

@ -3389,6 +3605,15 @@ static void tcp_connect_init(struct sock *sk)
 	if (rcv_wnd == 0)
 		rcv_wnd = dst_metric(dst, RTAX_INITRWND);

+#ifdef CONFIG_MPTCP
+	tp->ops->select_initial_window(sk, tcp_full_space(sk),
+				       tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+				       &tp->rcv_wnd,
+				       &tp->window_clamp,
+				       sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
+				       &rcv_wscale,
+				       rcv_wnd);
+#else
 	tcp_select_initial_window(sk, tcp_full_space(sk),
 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
@ -3396,6 +3621,7 @@ static void tcp_connect_init(struct sock *sk)
 				  sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
 				  &rcv_wscale,
 				  rcv_wnd);
+#endif

 	tp->rx_opt.rcv_wscale = rcv_wscale;
 	tp->rcv_ssthresh = tp->rcv_wnd;
@ -3420,6 +3646,36 @@ static void tcp_connect_init(struct sock *sk)
 	inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
 	inet_csk(sk)->icsk_retransmits = 0;
 	tcp_clear_retrans(tp);
+
+#ifdef CONFIG_MPTCP
+	if (sock_flag(sk, SOCK_MPTCP) && mptcp_doit(sk)) {
+		if (is_master_tp(tp)) {
+			tp->request_mptcp = 1;
+			mptcp_connect_init(sk);
+		} else if (tp->mptcp) {
+			struct inet_sock *inet	= inet_sk(sk);
+
+			tp->mptcp->snt_isn	= tp->write_seq;
+			tp->mptcp->init_rcv_wnd	= tp->rcv_wnd;
+
+			/* Set nonce for new subflows */
+			if (sk->sk_family == AF_INET)
+				tp->mptcp->mptcp_loc_nonce = mptcp_v4_get_nonce(
+							inet->inet_saddr,
+							inet->inet_daddr,
+							inet->inet_sport,
+							inet->inet_dport);
+#if IS_ENABLED(CONFIG_IPV6)
+			else
+				tp->mptcp->mptcp_loc_nonce = mptcp_v6_get_nonce(
+						inet6_sk(sk)->saddr.s6_addr32,
+						sk->sk_v6_daddr.s6_addr32,
+						inet->inet_sport,
+						inet->inet_dport);
+#endif
+		}
+	}
+#endif
 }

 static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
@ -3685,6 +3941,9 @@ void tcp_send_ack(struct sock *sk)
 {
 	__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
 }
+#ifdef CONFIG_MPTCP
+EXPORT_SYMBOL_GPL(tcp_send_ack);
+#endif

 /* This routine sends a packet with an out of date sequence
 * number. It assumes the other end will try to ack it.
@ -3697,7 +3956,10 @@ void tcp_send_ack(struct sock *sk)
 * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
 * out-of-date with SND.UNA-1 to probe window.
 */
-static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
+#ifndef CONFIG_MPTCP
+static
+#endif
+int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@ -3784,7 +4046,11 @@ void tcp_send_probe0(struct sock *sk)
 	unsigned long probe_max;
 	int err;

+#ifdef CONFIG_MPTCP
+	err = tp->ops->write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
+#else
 	err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
+#endif

 	if (tp->packets_out || tcp_write_queue_empty(sk)) {
 		/* Cancel probe timer, if it is not required. */
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@ -20,6 +20,9 @@

 #include <linux/module.h>
 #include <linux/gfp.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#endif
 #include <net/tcp.h>

 static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
@ -78,7 +81,10 @@ int tcp_use_userconfig_sysctl_handler(struct ctl_table *table, int write,
 *
 *  Returns: Nothing (void)
 */
-static void tcp_write_err(struct sock *sk)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_write_err(struct sock *sk)
 {
 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
 	sk->sk_error_report(sk);
@ -134,7 +140,11 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
 		    (!tp->snd_wnd && !tp->packets_out))
 			do_reset = true;
 		if (do_reset)
+#ifdef CONFIG_MPTCP
+			tp->ops->send_active_reset(sk, GFP_ATOMIC);
+#else
 			tcp_send_active_reset(sk, GFP_ATOMIC);
+#endif
 		tcp_done(sk);
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
 		return 1;
@ -219,7 +229,10 @@ static unsigned int tcp_model_timeout(struct sock *sk,
 * after "boundary" unsuccessful, exponentially backed-off
 * retransmissions with an initial RTO of TCP_RTO_MIN.
 */
-static bool retransmits_timed_out(struct sock *sk,
+#ifndef CONFIG_MPTCP
+static
+#endif
+bool retransmits_timed_out(struct sock *sk,
 				  unsigned int boundary,
 				  unsigned int timeout)
 {
@ -241,7 +254,10 @@ static bool retransmits_timed_out(struct sock *sk,
 }

 /* A write timeout has occurred. Process the after effects. */
-static int tcp_write_timeout(struct sock *sk)
+#ifndef CONFIG_MPTCP
+static
+#endif
+int tcp_write_timeout(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@ -256,6 +272,17 @@ static int tcp_write_timeout(struct sock *sk)
 			sk_rethink_txhash(sk);
 		}
 		retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+
+#ifdef CONFIG_MPTCP
+		/* Stop retransmitting MP_CAPABLE options in SYN if timed out. */
+		if (tcp_sk(sk)->request_mptcp &&
+		    icsk->icsk_retransmits >= sysctl_mptcp_syn_retries) {
+			tcp_sk(sk)->request_mptcp = 0;
+
+			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLERETRANSFALLBACK);
+		}
+#endif /* CONFIG_MPTCP */
+
 		expired = icsk->icsk_retransmits >= retry_until;
 	} else {
 		if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
@ -351,18 +378,36 @@ static void tcp_delack_timer(struct timer_list *t)
 	struct inet_connection_sock *icsk =
 			from_timer(icsk, t, icsk_delack_timer);
 	struct sock *sk = &icsk->icsk_inet.sk;
+#ifdef CONFIG_MPTCP
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk;

+	bh_lock_sock(meta_sk);
+	if (!sock_owned_by_user(meta_sk)) {
+#else
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk)) {
+#endif
 		tcp_delack_timer_handler(sk);
 	} else {
 		icsk->icsk_ack.blocked = 1;
+#ifdef CONFIG_MPTCP
+		__NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_DELAYEDACKLOCKED);
+#else
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
+#endif
 		/* deleguate our work to tcp_release_cb() */
 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
 			sock_hold(sk);
+#ifdef CONFIG_MPTCP
+		if (mptcp(tp))
+			mptcp_tsq_flags(sk);
+	}
+	bh_unlock_sock(meta_sk);
+#else
 	}
 	bh_unlock_sock(sk);
+#endif
 	sock_put(sk);
 }

@ -406,7 +451,16 @@ static void tcp_probe_timer(struct sock *sk)
 	}

 	if (icsk->icsk_probes_out >= max_probes) {
+#ifdef CONFIG_MPTCP
+abort:
+		tcp_write_err(sk);
+		if (is_meta_sk(sk) &&
+		    mptcp_in_infinite_mapping_weak(tp->mpcb)) {
+			mptcp_sub_force_close_all(tp->mpcb, NULL);
+		}
+#else
 abort:		tcp_write_err(sk);
+#endif
 	} else {
 		/* Only send another probe if we didn't close things up. */
 		tcp_send_probe0(sk);
@ -620,7 +674,11 @@ void tcp_write_timer_handler(struct sock *sk)
 		break;
 	case ICSK_TIME_RETRANS:
 		icsk->icsk_pending = 0;
+#ifdef CONFIG_MPTCP
+		tcp_sk(sk)->ops->retransmit_timer(sk);
+#else
 		tcp_retransmit_timer(sk);
+#endif
 		break;
 	case ICSK_TIME_PROBE0:
 		icsk->icsk_pending = 0;
@ -637,16 +695,29 @@ static void tcp_write_timer(struct timer_list *t)
 	struct inet_connection_sock *icsk =
 			from_timer(icsk, t, icsk_retransmit_timer);
 	struct sock *sk = &icsk->icsk_inet.sk;
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk = mptcp(tcp_sk(sk)) ? mptcp_meta_sk(sk) : sk;

+	bh_lock_sock(meta_sk);
+	if (!sock_owned_by_user(meta_sk)) {
+#else
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk)) {
+#endif
 		tcp_write_timer_handler(sk);
 	} else {
 		/* delegate our work to tcp_release_cb() */
 		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
 			sock_hold(sk);
+#ifdef CONFIG_MPTCP
+		if (mptcp(tcp_sk(sk)))
+			mptcp_tsq_flags(sk);
+	}
+	bh_unlock_sock(meta_sk);
+#else
 	}
 	bh_unlock_sock(sk);
+#endif
 	sock_put(sk);
 }

@ -676,11 +747,19 @@ static void tcp_keepalive_timer (struct timer_list *t)
 	struct sock *sk = from_timer(sk, t, sk_timer);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk;
+#endif
 	u32 elapsed;

 	/* Only process if socket is not in use. */
+#ifdef CONFIG_MPTCP
+	bh_lock_sock(meta_sk);
+	if (sock_owned_by_user(meta_sk)) {
+#else
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
+#endif
 		/* Try again later. */
 		inet_csk_reset_keepalive_timer (sk, HZ/20);
 		goto out;
@ -692,16 +771,39 @@ static void tcp_keepalive_timer (struct timer_list *t)
 	}

 	tcp_mstamp_refresh(tp);
+#ifdef CONFIG_MPTCP
+	if (tp->send_mp_fclose) {
+		if (icsk->icsk_retransmits >= MPTCP_FASTCLOSE_RETRIES) {
+			tcp_write_err(sk);
+			goto out;
+		}
+
+		tcp_send_ack(sk);
+		icsk->icsk_retransmits++;
+
+		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+		elapsed = icsk->icsk_rto;
+		goto resched;
+	}
+#endif
 	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
 		if (tp->linger2 >= 0) {
 			const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;

 			if (tmo > 0) {
+#ifdef CONFIG_MPTCP
+				tp->ops->time_wait(sk, TCP_FIN_WAIT2, tmo);
+#else
 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+#endif
 				goto out;
 			}
 		}
+#ifdef CONFIG_MPTCP
+		tp->ops->send_active_reset(sk, GFP_ATOMIC);
+#else
 		tcp_send_active_reset(sk, GFP_ATOMIC);
+#endif
 		goto death;
 	}

@ -726,11 +828,20 @@ static void tcp_keepalive_timer (struct timer_list *t)
 		    icsk->icsk_probes_out > 0) ||
 		    (icsk->icsk_user_timeout == 0 &&
 		    icsk->icsk_probes_out >= keepalive_probes(tp))) {
+#ifdef CONFIG_MPTCP
+			tp->ops->send_active_reset(sk, GFP_ATOMIC);
+#else
 			tcp_send_active_reset(sk, GFP_ATOMIC);
+#endif
 			tcp_write_err(sk);
 			goto out;
 		}
+
+#ifdef CONFIG_MPTCP
+		if (tp->ops->write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
+#else
 		if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
+#endif
 			icsk->icsk_probes_out++;
 			elapsed = keepalive_intvl_when(tp);
 		} else {
@ -754,7 +865,11 @@ death:
 	tcp_done(sk);

 out:
+#ifdef CONFIG_MPTCP
+	bh_unlock_sock(meta_sk);
+#else
 	bh_unlock_sock(sk);
+#endif
 	sock_put(sk);
 }

--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@ -931,6 +931,10 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 	kfree_rcu(ifp, rcu);
 }

+#ifdef CONFIG_MPTCP
+	EXPORT_SYMBOL(inet6_ifa_finish_destroy);
+#endif
+
 static void
 ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 {
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@ -121,7 +121,10 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
 }

-static int inet6_create(struct net *net, struct socket *sock, int protocol,
+#ifndef CONFIG_MPTCP
+static
+#endif
+int inet6_create(struct net *net, struct socket *sock, int protocol,
 		 int kern)
 {
 	struct inet_sock *inet;
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@ -48,6 +48,10 @@
 #include <net/addrconf.h>
 #include <net/inet_common.h>
 #include <net/tcp.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#include <net/mptcp_v4.h>
+#endif
 #include <net/udp.h>
 #include <net/udplite.h>
 #include <net/xfrm.h>
@ -68,6 +72,10 @@ int ip6_ra_control(struct sock *sk, int sel)
 		return -ENOPROTOOPT;

 	new_ra = (sel >= 0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+#ifdef CONFIG_MPTCP
+	if (sel >= 0 && !new_ra)
+		return -ENOMEM;
+#endif

 	write_lock_bh(&ip6_ra_lock);
 	for (rap = &ip6_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
@ -223,6 +231,11 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 				sock_prot_inuse_add(net, &tcp_prot, 1);
 				local_bh_enable();
 				sk->sk_prot = &tcp_prot;
+#ifdef CONFIG_MPTCP
+				if (sock_flag(sk, SOCK_MPTCP))
+					icsk->icsk_af_ops = &mptcp_v4_specific;
+				else
+#endif
 					icsk->icsk_af_ops = &ipv4_specific;
 				sk->sk_socket->ops = &inet_stream_ops;
 				sk->sk_family = PF_INET;
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@ -20,6 +20,10 @@
 #include <linux/kernel.h>
 #include <net/secure_seq.h>
 #include <net/ipv6.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#include <net/mptcp_v6.h>
+#endif
 #include <net/tcp.h>

 #define COOKIEBITS 24	/* Upper bits store count */
@ -111,7 +115,12 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
 }
 EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);

+#ifdef CONFIG_MPTCP
+__u32 cookie_v6_init_sequence(struct request_sock *req, const struct sock *sk,
+				      const struct sk_buff *skb, __u16 *mssp)
+#else
 __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
+#endif
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	const struct tcphdr *th = tcp_hdr(skb);
@ -133,6 +142,9 @@ EXPORT_SYMBOL_GPL(__cookie_v6_check);
 struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tcp_opt;
+#ifdef CONFIG_MPTCP
+	struct mptcp_options_received mopt;
+#endif
 	struct inet_request_sock *ireq;
 	struct tcp_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
@ -162,7 +174,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)

 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
+#ifdef CONFIG_MPTCP
+	mptcp_init_mp_opt(&mopt);
+	tcp_parse_options(sock_net(sk), skb, &tcp_opt, &mopt, 0, NULL, NULL);
+#else
 	tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+#endif

 	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
 		tsoff = secure_tcpv6_ts_off(sock_net(sk),
@ -175,15 +192,32 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		goto out;

 	ret = NULL;
+#ifdef CONFIG_MPTCP
+	if (mopt.saw_mpc)
+		req = inet_reqsk_alloc(&mptcp6_request_sock_ops, sk, false);
+	else
+#endif
 		req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
 	if (!req)
 		goto out;

 	ireq = inet_rsk(req);
+#ifdef CONFIG_MPTCP
+	ireq->mptcp_rqsk = 0;
+	ireq->saw_mpc = 0;
+#endif
 	treq = tcp_rsk(req);
 	treq->af_specific = &tcp_request_sock_ipv6_ops;
 	treq->tfo_listener = false;

+#ifdef CONFIG_MPTCP
+	/* Must be done before anything else, as it initializes
+	 * hash_entry of the MPTCP request-sock.
+	 */
+	if (mopt.saw_mpc)
+		mptcp_cookies_reqsk_init(req, &mopt, skb);
+#endif
+
 	if (security_inet_conn_request(sk, skb, req))
 		goto out_free;

@ -253,10 +287,17 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	    (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
 		req->rsk_window_clamp = full_space;

+#ifdef CONFIG_MPTCP
+	tp->ops->select_initial_window(sk, full_space, req->mss,
+				       &req->rsk_rcv_wnd, &req->rsk_window_clamp,
+				       ireq->wscale_ok, &rcv_wscale,
+				       dst_metric(dst, RTAX_INITRWND));
+#else
 	tcp_select_initial_window(sk, full_space, req->mss,
 				  &req->rsk_rcv_wnd, &req->rsk_window_clamp,
 				  ireq->wscale_ok, &rcv_wscale,
 				  dst_metric(dst, RTAX_INITRWND));
+#endif

 	ireq->rcv_wscale = rcv_wscale;
 	ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@ -61,6 +61,10 @@
 #include <net/timewait_sock.h>
 #include <net/inet_common.h>
 #include <net/secure_seq.h>
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+#include <net/mptcp_v6.h>
+#endif
 #include <net/busy_poll.h>

 #include <linux/proc_fs.h>
@ -71,6 +75,7 @@

 #include <trace/events/tcp.h>

+#ifndef CONFIG_MPTCP
 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 				      struct request_sock *req);
@ -79,6 +84,7 @@ static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);

 static const struct inet_connection_sock_af_ops ipv6_mapped;
 static const struct inet_connection_sock_af_ops ipv6_specific;
+#endif
 #ifdef CONFIG_TCP_MD5SIG
 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
@ -90,7 +96,10 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
 }
 #endif

-static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);

@ -132,7 +141,10 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
 }

-static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
+#ifndef CONFIG_MPTCP
+static
+#endif
+int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			  int addr_len)
 {
 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
@ -229,6 +241,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		sin.sin_port = usin->sin6_port;
 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];

+#ifdef CONFIG_MPTCP
+		if (sock_flag(sk, SOCK_MPTCP))
+			icsk->icsk_af_ops = &mptcp_v6_mapped;
+		else
+#endif
 			icsk->icsk_af_ops = &ipv6_mapped;
 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
 #ifdef CONFIG_TCP_MD5SIG
@ -239,6 +256,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,

 		if (err) {
 			icsk->icsk_ext_hdr_len = exthdrlen;
+#ifdef CONFIG_MPTCP
+			if (sock_flag(sk, SOCK_MPTCP))
+				icsk->icsk_af_ops = &mptcp_v6_specific;
+			else
+#endif
 				icsk->icsk_af_ops = &ipv6_specific;
 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
 #ifdef CONFIG_TCP_MD5SIG
@ -333,7 +355,10 @@ failure:
 	return err;
 }

-static void tcp_v6_mtu_reduced(struct sock *sk)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_v6_mtu_reduced(struct sock *sk)
 {
 	struct dst_entry *dst;
 	u32 mtu;
@ -370,6 +395,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct tcp_sock *tp;
 	__u32 seq, snd_una;
 	struct sock *sk;
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk;
+#endif
 	bool fatal;
 	int err;

@ -393,8 +421,19 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk->sk_state == TCP_NEW_SYN_RECV)
 		return tcp_req_err(sk, seq, fatal);

+#ifdef CONFIG_MPTCP
+	tp = tcp_sk(sk);
+	if (mptcp(tp))
+		meta_sk = mptcp_meta_sk(sk);
+	else
+		meta_sk = sk;
+
+	bh_lock_sock(meta_sk);
+	if (sock_owned_by_user(meta_sk) && type != ICMPV6_PKT_TOOBIG)
+#else
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
+#endif
 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);

 	if (sk->sk_state == TCP_CLOSE)
@ -405,7 +444,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		goto out;
 	}

+#ifndef CONFIG_MPTCP
 	tp = tcp_sk(sk);
+#endif
 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 	fastopen = tp->fastopen_rsk;
 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
@ -445,11 +486,27 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,

 		WRITE_ONCE(tp->mtu_info, mtu);

+#ifdef CONFIG_MPTCP
+		if (!sock_owned_by_user(meta_sk)) {
+#else
 		if (!sock_owned_by_user(sk))
+#endif
 			tcp_v6_mtu_reduced(sk);
+
+#ifdef CONFIG_MPTCP
+		} else {
+			if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
+					      &sk->sk_tsq_flags))
+
+				sock_hold(sk);
+			if (mptcp(tp))
+				mptcp_tsq_flags(sk);
+		}
+#else
 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
 					   &sk->sk_tsq_flags))
 			sock_hold(sk);
+#endif
 		goto out;
 	}

@ -463,8 +520,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		 */
 		if (fastopen && !fastopen->sk)
 			break;
-
+#ifdef CONFIG_MPTCP
+		if (!sock_owned_by_user(meta_sk)) {
+#else
 		if (!sock_owned_by_user(sk)) {
+#endif
 			sk->sk_err = err;
 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */

@ -474,14 +534,22 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		goto out;
 	}

+#ifdef CONFIG_MPTCP
+	if (!sock_owned_by_user(meta_sk) && np->recverr) {
+#else
 	if (!sock_owned_by_user(sk) && np->recverr) {
+#endif
 		sk->sk_err = err;
 		sk->sk_error_report(sk);
 	} else
 		sk->sk_err_soft = err;

 out:
+#ifdef CONFIG_MPTCP
+	bh_unlock_sock(meta_sk);
+#else
 	bh_unlock_sock(sk);
+#endif
 	sock_put(sk);
 }

@ -529,7 +597,10 @@ done:
 }


-static void tcp_v6_reqsk_destructor(struct request_sock *req)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
 	kfree(inet_rsk(req)->ipv6_opt);
 	kfree_skb(inet_rsk(req)->pktopts);
@ -747,9 +818,14 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
 	return false;
 }

+#ifdef CONFIG_MPTCP
+static int tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener,
+			   struct sk_buff *skb, bool want_cookie)
+#else
 static void tcp_v6_init_req(struct request_sock *req,
 			    const struct sock *sk_listener,
 			    struct sk_buff *skb)
+#endif
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	const struct ipv6_pinfo *np = inet6_sk(sk_listener);
@ -770,6 +846,9 @@ static void tcp_v6_init_req(struct request_sock *req,
 		refcount_inc(&skb->users);
 		ireq->pktopts = skb;
 	}
+#ifdef CONFIG_MPTCP
+	return 0;
+#endif
 }

 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
@ -789,6 +868,9 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 	.syn_ack_timeout =	tcp_syn_ack_timeout,
 };

+#ifndef CONFIG_MPTCP
+static
+#endif
 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
 				sizeof(struct ipv6hdr),
@ -806,10 +888,17 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 	.send_synack	=	tcp_v6_send_synack,
 };

+#ifdef CONFIG_MPTCP
+static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
+				 u32 ack, u32 data_ack, u32 win, u32 tsval, u32 tsecr,
+				 int oif, struct tcp_md5sig_key *key, int rst,
+				 u8 tclass, __be32 label, int mptcp)
+#else
 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
 				 int oif, struct tcp_md5sig_key *key, int rst,
 				 u8 tclass, __be32 label)
+#endif
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcphdr *t1;
@ -828,7 +917,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 	if (key)
 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif
-
+#ifdef CONFIG_MPTCP
+	if (mptcp)
+		tot_len += MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK;
+#endif
 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
 			 GFP_ATOMIC);
 	if (!buff)
@ -866,6 +958,19 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
 				    &ipv6_hdr(skb)->saddr,
 				    &ipv6_hdr(skb)->daddr, t1);
+#ifdef CONFIG_MPTCP
+		topt += 4;
+#endif
+	}
+#endif
+#ifdef CONFIG_MPTCP
+	if (mptcp) {
+		/* Construction of 32-bit data_ack */
+		*topt++ = htonl((TCPOPT_MPTCP << 24) |
+				((MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK) << 16) |
+				(0x20 << 8) |
+				(0x01));
+		*topt++ = htonl(data_ack);
 	}
 #endif

@ -915,7 +1020,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 	kfree_skb(buff);
 }

-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = 0, ack_seq = 0;
@ -983,7 +1091,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 			trace_tcp_send_reset(sk, skb);
 	}

+#ifdef CONFIG_MPTCP
+	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, 0, oif, key, 1, 0, 0, 0);
+#else
 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
+#endif

 #ifdef CONFIG_TCP_MD5SIG
 out:
@ -991,6 +1103,16 @@ out:
 #endif
 }

+#ifdef CONFIG_MPTCP
+static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
+			    u32 ack, u32 data_ack, u32 win, u32 tsval, u32 tsecr, int oif,
+			    struct tcp_md5sig_key *key, u8 tclass,
+			    __be32 label, int mptcp)
+{
+	tcp_v6_send_response(sk, skb, seq, ack, data_ack, win, tsval, tsecr, oif,
+			     key, 0, tclass, label, mptcp);
+}
+#else
 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key, u8 tclass,
@ -999,22 +1121,42 @@ static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
 			     tclass, label);
 }
+#endif

 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_timewait_sock *tw = inet_twsk(sk);
 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);

+#ifdef CONFIG_MPTCP
+	u32 data_ack = 0;
+	int mptcp = 0;
+
+	if (tcptw->mptcp_tw) {
+		data_ack = (u32)tcptw->mptcp_tw->rcv_nxt;
+		mptcp = 1;
+	}
+	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+			data_ack,
+			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
+			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
+			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), mptcp);
+#else
 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
+#endif

 	inet_twsk_put(tw);
 }

-static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			   struct request_sock *req)
 {
 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
@ -1025,6 +1167,17 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 	 * exception of <SYN> segments, MUST be right-shifted by
 	 * Rcv.Wind.Shift bits:
 	 */
+#ifdef CONFIG_MPTCP
+tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN || is_meta_sk(sk)) ?
+			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+			tcp_rsk(req)->rcv_nxt, 0,
+			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+			req->ts_recent, sk->sk_bound_dev_if,
+			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
+			0, 0, 0);
+
+#else
 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
 			tcp_rsk(req)->rcv_nxt,
@ -1033,10 +1186,14 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			req->ts_recent, sk->sk_bound_dev_if,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
 			0, 0);
+#endif
 }


-static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
 {
 #ifdef CONFIG_SYN_COOKIES
 	const struct tcphdr *th = tcp_hdr(skb);
@ -1047,7 +1204,10 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
 	return sk;
 }

-static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_conn_request(sk, skb);
@ -1078,7 +1238,10 @@ static void tcp_v6_restore_cb(struct sk_buff *skb)
 		sizeof(struct inet6_skb_parm));
 }

-static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
+#ifndef CONFIG_MPTCP
+static
+#endif
+struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req,
 				  struct dst_entry *dst,
 				  struct request_sock *req_unhash,
@ -1120,6 +1283,14 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *

 		newnp->saddr = newsk->sk_v6_rcv_saddr;

+#ifdef CONFIG_MPTCP
+		/* We must check on the request-socket because the listener
+		 * socket's flag may have been changed halfway through.
+		 */
+		if (!inet_rsk(req)->saw_mpc)
+			inet_csk(newsk)->icsk_af_ops = &mptcp_v6_mapped;
+		else
+#endif
 			inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
 #ifdef CONFIG_TCP_MD5SIG
@ -1167,6 +1338,14 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	if (!newsk)
 		goto out_nonewsk;

+#ifdef CONFIG_MPTCP
+	/* If the meta_sk is v6-mapped we can end up here with the wrong af_ops.
+	 * Just make sure that this subflow is v6.
+	 */
+	if (is_meta_sk(sk))
+		inet_csk(newsk)->icsk_af_ops = &mptcp_v6_specific;
+#endif
+
 	/*
 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
 	 * count here, tcp_create_openreq_child now does this for us, see the
@ -1305,7 +1484,10 @@ out:
 * This is because we cannot sleep with the original spinlock
 * held.
 */
-static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+#ifndef CONFIG_MPTCP
+static
+#endif
+int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp;
@ -1321,6 +1503,11 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)

 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_do_rcv(sk, skb);
+#ifdef CONFIG_MPTCP
+	if (is_meta_sk(sk))
+		return mptcp_v6_do_rcv(sk, skb);
+#endif
+

 	/*
 	 *	socket locking is here for SMP purposes as backlog rcv
@ -1452,6 +1639,10 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+#ifdef CONFIG_MPTCP
+	TCP_SKB_CB(skb)->mptcp_flags = 0;
+	TCP_SKB_CB(skb)->dss_off = 0;
+#endif
 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
@ -1467,6 +1658,9 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	const struct ipv6hdr *hdr;
 	bool refcounted;
 	struct sock *sk;
+#ifdef CONFIG_MPTCP
+	struct sock *meta_sk = NULL;
+#endif
 	int ret;
 	struct net *net = dev_net(skb->dev);

@ -1520,10 +1714,20 @@ process:
 			reqsk_put(req);
 			goto csum_error;
 		}
-		if (unlikely(sk->sk_state != TCP_LISTEN)) {
+		if (unlikely(sk->sk_state != TCP_LISTEN
+#ifdef CONFIG_MPTCP
+		&& !is_meta_sk(sk)
+#endif
+		)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
+#ifdef CONFIG_MPTCP
+		if (unlikely(is_meta_sk(sk) && !mptcp_can_new_subflow(sk))) {
+			inet_csk_reqsk_queue_drop_and_put(sk, req);
+			goto lookup;
+		}
+#endif
 		sock_hold(sk);
 		refcounted = true;
 		nsk = NULL;
@ -1583,16 +1787,42 @@ process:
 	}

 	sk_incoming_cpu_update(sk);
+#ifdef CONFIG_MPTCP
+	if (mptcp(tcp_sk(sk))) {
+		meta_sk = mptcp_meta_sk(sk);

+		bh_lock_sock_nested(meta_sk);
+		if (sock_owned_by_user(meta_sk))
+			mptcp_prepare_for_backlog(sk, skb);
+	} else {
+		meta_sk = sk;
+#endif
 		bh_lock_sock_nested(sk);
+#ifdef CONFIG_MPTCP
+	}
+#endif
+
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
+#ifdef CONFIG_MPTCP
+	if (!sock_owned_by_user(meta_sk)) {
+#else
 	if (!sock_owned_by_user(sk)) {
+#endif
 		ret = tcp_v6_do_rcv(sk, skb);
+#ifdef CONFIG_MPTCP
+	} else if (tcp_add_backlog(meta_sk, skb)) {
+#else
 	} else if (tcp_add_backlog(sk, skb)) {
+#endif
 		goto discard_and_relse;
 	}
+#ifdef CONFIG_MPTCP
+	bh_unlock_sock(meta_sk);
+#else
 	bh_unlock_sock(sk);
+#endif
+

 put_and_return:
 	if (refcounted)
@ -1605,6 +1835,19 @@ no_tcp_socket:

 	tcp_v6_fill_cb(skb, hdr, th);

+#ifdef CONFIG_MPTCP
+	if (!sk && th->syn && !th->ack) {
+		int ret = mptcp_lookup_join(skb, NULL);
+
+		if (ret < 0) {
+			tcp_v6_send_reset(NULL, skb);
+			goto discard_it;
+		} else if (ret > 0) {
+			return 0;
+		}
+	}
+#endif
+
 	if (tcp_checksum_complete(skb)) {
 csum_error:
 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
@ -1657,6 +1900,18 @@ do_time_wait:
 			refcounted = false;
 			goto process;
 		}
+#ifdef CONFIG_MPTCP
+		if (th->syn && !th->ack) {
+			int ret = mptcp_lookup_join(skb, inet_twsk(sk));
+
+			if (ret < 0) {
+				tcp_v6_send_reset(NULL, skb);
+				goto discard_it;
+			} else if (ret > 0) {
+				return 0;
+			}
+		}
+#endif
 	}
 		/* to ACK */
 		/* fall through */
@ -1711,13 +1966,19 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
 	}
 }

-static struct timewait_sock_ops tcp6_timewait_sock_ops = {
+#ifndef CONFIG_MPTCP
+static
+#endif
+struct timewait_sock_ops tcp6_timewait_sock_ops = {
 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
 	.twsk_unique	= tcp_twsk_unique,
 	.twsk_destructor = tcp_twsk_destructor,
 };

-static const struct inet_connection_sock_af_ops ipv6_specific = {
+#ifndef CONFIG_MPTCP
+static
+#endif
+const struct inet_connection_sock_af_ops ipv6_specific = {
 	.queue_xmit	   = inet6_csk_xmit,
 	.send_check	   = tcp_v6_send_check,
 	.rebuild_header	   = inet6_sk_rebuild_header,
@ -1748,7 +2009,10 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
 /*
 *	TCP over IPv4 via INET6 API
 */
-static const struct inet_connection_sock_af_ops ipv6_mapped = {
+#ifndef CONFIG_MPTCP
+static
+#endif
+const struct inet_connection_sock_af_ops ipv6_mapped = {
 	.queue_xmit	   = ip_queue_xmit,
 	.send_check	   = tcp_v4_send_check,
 	.rebuild_header	   = inet_sk_rebuild_header,
@ -1784,6 +2048,11 @@ static int tcp_v6_init_sock(struct sock *sk)

 	tcp_init_sock(sk);

+#ifdef CONFIG_MPTCP
+	if (sock_flag(sk, SOCK_MPTCP))
+		icsk->icsk_af_ops = &mptcp_v6_specific;
+	else
+#endif
 		icsk->icsk_af_ops = &ipv6_specific;

 #ifdef CONFIG_TCP_MD5SIG
@ -1793,7 +2062,10 @@ static int tcp_v6_init_sock(struct sock *sk)
 	return 0;
 }

-static void tcp_v6_destroy_sock(struct sock *sk)
+#ifndef CONFIG_MPTCP
+static
+#endif
+void tcp_v6_destroy_sock(struct sock *sk)
 {
 	tcp_v4_destroy_sock(sk);
 	inet6_destroy_sock(sk);
@ -2020,6 +2292,11 @@ struct proto tcpv6_prot = {
 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
+#ifdef CONFIG_MPTCP
+	.useroffset		= offsetof(struct tcp_sock, mptcp_sched_name),
+	.usersize		= sizeof_field(struct tcp_sock, mptcp_sched_name) +
+				  sizeof_field(struct tcp_sock, mptcp_pm_name),
+#endif
 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
@ -2030,6 +2307,9 @@ struct proto tcpv6_prot = {
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
 	.diag_destroy		= tcp_abort,
+#ifdef CONFIG_MPTCP
+	.clear_sk		= mptcp_clear_sk,
+#endif
 };

 /* thinking of making this const? Don't.
--- a/net/mptcp/Kconfig
+++ b/net/mptcp/Kconfig
@ -0,0 +1,146 @@
+#
+# MPTCP configuration
+#
+config MPTCP
+        bool "MPTCP protocol"
+        depends on (IPV6=y || IPV6=n)
+        ---help---
+          This replaces the normal TCP stack with a Multipath TCP stack,
+          able to use several paths at once.
+
+menuconfig MPTCP_PM_ADVANCED
+	bool "MPTCP: advanced path-manager control"
+	depends on MPTCP=y
+	---help---
+	  Support for selection of different path-managers. You should choose 'Y' here,
+	  because otherwise you will not actively create new MPTCP-subflows.
+
+if MPTCP_PM_ADVANCED
+
+config MPTCP_FULLMESH
+	tristate "MPTCP Full-Mesh Path-Manager"
+	depends on MPTCP=y
+	---help---
+	  This path-management module will create a full-mesh among all IP-addresses.
+
+config MPTCP_NDIFFPORTS
+	tristate "MPTCP ndiff-ports"
+	depends on MPTCP=y
+	---help---
+	  This path-management module will create multiple subflows between the same
+	  pair of IP-addresses, modifying the source-port. You can set the number
+	  of subflows via the mptcp_ndiffports-sysctl.
+
+config MPTCP_BINDER
+	tristate "MPTCP Binder"
+	depends on (MPTCP=y)
+	---help---
+	  This path-management module works like ndiffports, and adds the sysctl
+	  option to set the gateway (and/or path to) per each additional subflow
+	  via Loose Source Routing (IPv4 only).
+
+config MPTCP_NETLINK
+	tristate "MPTCP Netlink Path-Manager"
+	depends on MPTCP=y
+	---help---
+	  This path-management module is controlled over a Netlink interface. A userspace
+	  module can therefore control the establishment of new subflows and the policy
+	  to apply over those new subflows for every connection.
+
+choice
+	prompt "Default MPTCP Path-Manager"
+	default DEFAULT_FULLMESH
+	help
+	  Select the Path-Manager of your choice
+
+	config DEFAULT_FULLMESH
+		bool "Full mesh" if MPTCP_FULLMESH=y
+
+	config DEFAULT_NDIFFPORTS
+		bool "ndiff-ports" if MPTCP_NDIFFPORTS=y
+
+	config DEFAULT_BINDER
+		bool "binder" if MPTCP_BINDER=y
+
+	config DEFAULT_NETLINK
+		bool "Netlink" if MPTCP_NETLINK=y
+
+	config DEFAULT_DUMMY
+		bool "Default"
+
+endchoice
+
+endif
+
+config DEFAULT_MPTCP_PM
+	string
+	default "default" if DEFAULT_DUMMY
+	default "fullmesh" if DEFAULT_FULLMESH 
+	default "ndiffports" if DEFAULT_NDIFFPORTS
+	default "binder" if DEFAULT_BINDER
+	default "default"
+
+menuconfig MPTCP_SCHED_ADVANCED
+	bool "MPTCP: advanced scheduler control"
+	depends on MPTCP=y
+	---help---
+	  Support for selection of different schedulers. You should choose 'Y' here,
+	  if you want to choose a different scheduler than the default one.
+
+if MPTCP_SCHED_ADVANCED
+
+config MPTCP_BLEST
+	tristate "MPTCP BLEST"
+	depends on MPTCP=y
+	---help---
+	  This is an experimental BLocking ESTimation-based (BLEST) scheduler.
+
+config MPTCP_ROUNDROBIN
+	tristate "MPTCP Round-Robin"
+	depends on (MPTCP=y)
+	---help---
+	  This is a very simple round-robin scheduler. Probably has bad performance
+	  but might be interesting for researchers.
+
+config MPTCP_REDUNDANT
+	tristate "MPTCP Redundant"
+	depends on (MPTCP=y)
+	---help---
+	  This scheduler sends all packets redundantly over all subflows to decreases
+	  latency and jitter on the cost of lower throughput.
+
+choice
+	prompt "Default MPTCP Scheduler"
+	default DEFAULT_SCHEDULER
+	help
+	  Select the Scheduler of your choice
+
+	config DEFAULT_SCHEDULER
+		bool "Default"
+		---help---
+		  This is the default scheduler, sending first on the subflow
+		  with the lowest RTT.
+
+	config DEFAULT_ROUNDROBIN
+		bool "Round-Robin" if MPTCP_ROUNDROBIN=y
+		---help---
+		  This is the round-rob scheduler, sending in a round-robin
+		  fashion..
+
+	config DEFAULT_REDUNDANT
+		bool "Redundant" if MPTCP_REDUNDANT=y
+		---help---
+		  This is the redundant scheduler, sending packets redundantly over
+		  all the subflows.
+
+endchoice
+endif
+
+config DEFAULT_MPTCP_SCHED
+	string
+	depends on (MPTCP=y)
+	default "default" if DEFAULT_SCHEDULER
+	default "roundrobin" if DEFAULT_ROUNDROBIN
+	default "redundant" if DEFAULT_REDUNDANT
+	default "default"
+
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@ -0,0 +1,24 @@
+#
+## Makefile for MultiPath TCP support code.
+#
+#
+
+obj-$(CONFIG_MPTCP) += mptcp.o
+
+mptcp-y := mptcp_ctrl.o mptcp_ipv4.o mptcp_pm.o \
+	   mptcp_output.o mptcp_input.o mptcp_sched.o
+
+obj-$(CONFIG_TCP_CONG_LIA) += mptcp_coupled.o
+obj-$(CONFIG_TCP_CONG_OLIA) += mptcp_olia.o
+obj-$(CONFIG_TCP_CONG_WVEGAS) += mptcp_wvegas.o
+obj-$(CONFIG_TCP_CONG_BALIA) += mptcp_balia.o
+obj-$(CONFIG_TCP_CONG_MCTCPDESYNC) += mctcp_desync.o
+obj-$(CONFIG_MPTCP_FULLMESH) += mptcp_fullmesh.o
+obj-$(CONFIG_MPTCP_NDIFFPORTS) += mptcp_ndiffports.o
+obj-$(CONFIG_MPTCP_BINDER) += mptcp_binder.o
+obj-$(CONFIG_MPTCP_NETLINK) += mptcp_netlink.o
+obj-$(CONFIG_MPTCP_ROUNDROBIN) += mptcp_rr.o
+obj-$(CONFIG_MPTCP_REDUNDANT) += mptcp_redundant.o
+obj-$(CONFIG_MPTCP_BLEST) += mptcp_blest.o
+
+mptcp-$(subst m,y,$(CONFIG_IPV6)) += mptcp_ipv6.o
--- a/net/mptcp/mctcp_desync.c
+++ b/net/mptcp/mctcp_desync.c
@ -0,0 +1,193 @@
+/*
+ *  Desynchronized Multi-Channel TCP Congestion Control Algorithm
+ *
+ *  Implementation based on publications of "DMCTCP:Desynchronized Multi-Channel
+ *  TCP for high speed access networks with tiny buffers" in 23rd international
+ *  conference of Computer Communication and Networks (ICCCN), 2014, and
+ *  "Exploring parallelism and desynchronization of TCP over high speed networks
+ *  with tiny buffers" in Journal of Computer Communications Elsevier, 2015.
+ *
+ *  http://ieeexplore.ieee.org/abstract/document/6911722/
+ *  https://doi.org/10.1016/j.comcom.2015.07.010
+ *
+ *  This prototype is for research purpose and is currently experimental code
+ *  that only support a single path. Future support of multi-channel over
+ *  multi-path requires channels grouping.
+ *
+ *  Initial Design and Implementation:
+ *  Cheng Cui <Cheng.Cui@netapp.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ */
+#include <net/tcp.h>
+#include <net/mptcp.h>
+#include <linux/module.h>
+
+enum {
+	MASTER_CHANNEL = 1,
+	INI_MIN_CWND = 2,
+};
+
+/* private congestion control structure:
+ * off_tstamp: the last backoff timestamp for loss synchronization event
+ * off_subfid: the subflow which was backoff on off_tstamp
+ */
+struct mctcp_desync {
+	u64	off_tstamp;
+	u8	off_subfid;
+};
+
+static inline int mctcp_cc_sk_can_send(const struct sock *sk)
+{
+	return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us;
+}
+
+static void mctcp_desync_init(struct sock *sk)
+{
+	if (mptcp(tcp_sk(sk))) {
+		struct mctcp_desync *ca = inet_csk_ca(mptcp_meta_sk(sk));
+		ca->off_tstamp = 0;
+		ca->off_subfid = 0;
+    }
+    /* If we do not mptcp, behave like reno: return */
+}
+
+static void mctcp_desync_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!mptcp(tp)) {
+		tcp_reno_cong_avoid(sk, ack, acked);
+		return;
+	} else if (!tcp_is_cwnd_limited(sk)) {
+		return;
+	} else {
+		const struct mctcp_desync *ca = inet_csk_ca(mptcp_meta_sk(sk));
+		const u8 subfid = tp->mptcp->path_index;
+
+		/* current aggregated cwnd */
+		u32 agg_cwnd = 0;
+		u32 min_cwnd = 0xffffffff;
+		u8 min_cwnd_subfid = 0;
+
+		/* In "safe" area, increase */
+		if (tcp_in_slow_start(tp)) {
+			if (ca->off_subfid) {
+				/* passed initial phase, allow slow start */
+				tcp_slow_start(tp, acked);
+			} else if (MASTER_CHANNEL == tp->mptcp->path_index) {
+				/* master channel is normal slow start in
+				 * initial phase */
+				tcp_slow_start(tp, acked);
+			} else {
+				/* secondary channels increase slowly until
+				 * the initial phase passed
+				 */
+				tp->snd_ssthresh = tp->snd_cwnd = INI_MIN_CWND;
+			}
+			return;
+		} else {
+			/* In dangerous area, increase slowly and linearly. */
+			const struct mptcp_tcp_sock *mptcp;
+
+			/* get total cwnd and the subflow that has min cwnd */
+			mptcp_for_each_sub(tp->mpcb, mptcp) {
+				const struct sock *sub_sk = mptcp_to_sock(mptcp);
+
+				if (mctcp_cc_sk_can_send(sub_sk)) {
+					const struct tcp_sock *sub_tp =
+								tcp_sk(sub_sk);
+					agg_cwnd += sub_tp->snd_cwnd;
+					if(min_cwnd > sub_tp->snd_cwnd) {
+						min_cwnd = sub_tp->snd_cwnd;
+						min_cwnd_subfid =
+						      sub_tp->mptcp->path_index;
+					}
+				}
+			}
+			/* the smallest subflow grows faster than others */
+			if (subfid == min_cwnd_subfid) {
+				tcp_cong_avoid_ai(tp, min_cwnd, acked);
+			} else {
+				tcp_cong_avoid_ai(tp, agg_cwnd - min_cwnd,
+						  acked);
+			}
+		}
+	}
+}
+
+static u32 mctcp_desync_ssthresh(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!mptcp(tp)) {
+		return max(tp->snd_cwnd >> 1U, 2U);
+	} else {
+		struct mctcp_desync *ca = inet_csk_ca(mptcp_meta_sk(sk));
+		const u8 subfid = tp->mptcp->path_index;
+		const struct mptcp_tcp_sock *mptcp;
+		u32 max_cwnd = 0;
+		u8 max_cwnd_subfid = 0;
+
+		/* Find the subflow that has the max cwnd. */
+		mptcp_for_each_sub(tp->mpcb, mptcp) {
+			const struct sock *sub_sk = mptcp_to_sock(mptcp);
+
+			if (mctcp_cc_sk_can_send(sub_sk)) {
+				const struct tcp_sock *sub_tp = tcp_sk(sub_sk);
+				if (max_cwnd < sub_tp->snd_cwnd) {
+					max_cwnd = sub_tp->snd_cwnd;
+					max_cwnd_subfid =
+						sub_tp->mptcp->path_index;
+				}
+			}
+		}
+		/* Use high resolution clock. */
+		if (subfid == max_cwnd_subfid) {
+			u64 now = tcp_clock_us();
+			u32 delta = tcp_stamp_us_delta(now, ca->off_tstamp);
+
+			if (delta < (tp->srtt_us >> 3)) {
+				/* desynchronize */
+				return tp->snd_cwnd;
+			} else {
+				ca->off_tstamp = now;
+				ca->off_subfid = subfid;
+				return max(max_cwnd >> 1U, 2U);
+			}
+		} else {
+			return tp->snd_cwnd;
+		}
+	}
+}
+
+static struct tcp_congestion_ops mctcp_desync = {
+	.init       = mctcp_desync_init,
+	.ssthresh   = mctcp_desync_ssthresh,
+	.undo_cwnd  = tcp_reno_undo_cwnd,
+	.cong_avoid = mctcp_desync_cong_avoid,
+	.owner      = THIS_MODULE,
+	.name       = "mctcpdesync",
+};
+
+static int __init mctcp_desync_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct mctcp_desync) > ICSK_CA_PRIV_SIZE);
+	return tcp_register_congestion_control(&mctcp_desync);
+}
+
+static void __exit mctcp_desync_unregister(void)
+{
+	tcp_unregister_congestion_control(&mctcp_desync);
+}
+
+module_init(mctcp_desync_register);
+module_exit(mctcp_desync_unregister);
+
+MODULE_AUTHOR("Cheng Cui");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MCTCP: DESYNCHRONIZED MULTICHANNEL TCP CONGESTION CONTROL");
+MODULE_VERSION("1.0");
--- a/net/mptcp/mptcp_balia.c
+++ b/net/mptcp/mptcp_balia.c
@ -0,0 +1,261 @@
+/*
+ *	MPTCP implementation - Balia Congestion Control
+ *	(Balanced Linked Adaptation Algorithm)
+ *
+ *	Analysis, Design and Implementation:
+ *	Qiuyu Peng <qpeng@caltech.edu>
+ *	Anwar Walid <anwar@research.bell-labs.com>
+ *	Jaehyun Hwang <jhyun.hwang@samsung.com>
+ *	Steven H. Low <slow@caltech.edu>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <net/tcp.h>
+#include <net/mptcp.h>
+
+#include <linux/module.h>
+
+/* The variable 'rate' (i.e., x_r) will be scaled
+ * e.g., from B/s to KB/s, MB/s, or GB/s
+ * if max_rate > 2^rate_scale_limit
+ */
+
+static int rate_scale_limit = 25;
+static int alpha_scale = 10;
+static int scale_num = 5;
+
+struct mptcp_balia {
+	u64	ai;
+	u64	md;
+	bool	forced_update;
+};
+
+static inline int mptcp_balia_sk_can_send(const struct sock *sk)
+{
+	return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us;
+}
+
+static inline u64 mptcp_get_ai(const struct sock *meta_sk)
+{
+	return ((struct mptcp_balia *)inet_csk_ca(meta_sk))->ai;
+}
+
+static inline void mptcp_set_ai(const struct sock *meta_sk, u64 ai)
+{
+	((struct mptcp_balia *)inet_csk_ca(meta_sk))->ai = ai;
+}
+
+static inline u64 mptcp_get_md(const struct sock *meta_sk)
+{
+	return ((struct mptcp_balia *)inet_csk_ca(meta_sk))->md;
+}
+
+static inline void mptcp_set_md(const struct sock *meta_sk, u64 md)
+{
+	((struct mptcp_balia *)inet_csk_ca(meta_sk))->md = md;
+}
+
+static inline u64 mptcp_balia_scale(u64 val, int scale)
+{
+	return (u64) val << scale;
+}
+
+static inline bool mptcp_get_forced(const struct sock *meta_sk)
+{
+	return ((struct mptcp_balia *)inet_csk_ca(meta_sk))->forced_update;
+}
+
+static inline void mptcp_set_forced(const struct sock *meta_sk, bool force)
+{
+	((struct mptcp_balia *)inet_csk_ca(meta_sk))->forced_update = force;
+}
+
+static void mptcp_balia_recalc_ai(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct mptcp_cb *mpcb = tp->mpcb;
+	struct mptcp_tcp_sock *mptcp;
+	u64 max_rate = 0, rate = 0, sum_rate = 0;
+	u64 alpha, ai = tp->snd_cwnd, md = (tp->snd_cwnd >> 1);
+	int num_scale_down = 0;
+
+	if (!mpcb)
+		return;
+
+	/* Find max_rate first */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		const struct sock *sub_sk = mptcp_to_sock(mptcp);
+		struct tcp_sock *sub_tp = tcp_sk(sub_sk);
+		u64 tmp;
+
+		if (!mptcp_balia_sk_can_send(sub_sk))
+			continue;
+
+		tmp = div_u64((u64)tp->mss_cache * sub_tp->snd_cwnd
+				* (USEC_PER_SEC << 3), sub_tp->srtt_us);
+		sum_rate += tmp;
+
+		if (tp == sub_tp)
+			rate = tmp;
+
+		if (tmp >= max_rate)
+			max_rate = tmp;
+	}
+
+	/* At least, the current subflow should be able to send */
+	if (unlikely(!rate))
+		goto exit;
+
+	alpha = div64_u64(max_rate, rate);
+
+	/* Scale down max_rate if it is too high (e.g., >2^25) */
+	while (max_rate > mptcp_balia_scale(1, rate_scale_limit)) {
+		max_rate >>= scale_num;
+		num_scale_down++;
+	}
+
+	if (num_scale_down) {
+		sum_rate = 0;
+		mptcp_for_each_sub(mpcb, mptcp) {
+			const struct sock *sub_sk = mptcp_to_sock(mptcp);
+			struct tcp_sock *sub_tp = tcp_sk(sub_sk);
+			u64 tmp;
+
+			if (!mptcp_balia_sk_can_send(sub_sk))
+				continue;
+
+			tmp = div_u64((u64)tp->mss_cache * sub_tp->snd_cwnd
+				* (USEC_PER_SEC << 3), sub_tp->srtt_us);
+			tmp >>= (scale_num * num_scale_down);
+
+			sum_rate += tmp;
+		}
+		rate >>= (scale_num * num_scale_down);
+	}
+
+	/*	(sum_rate)^2 * 10 * w_r
+	 * ai = ------------------------------------
+	 *	(x_r + max_rate) * (4x_r + max_rate)
+	 */
+	sum_rate *= sum_rate;
+
+	ai = div64_u64(sum_rate * 10, rate + max_rate);
+	ai = div64_u64(ai * tp->snd_cwnd, (rate << 2) + max_rate);
+
+	if (unlikely(!ai))
+		ai = tp->snd_cwnd;
+
+	md = ((tp->snd_cwnd >> 1) * min(mptcp_balia_scale(alpha, alpha_scale),
+					mptcp_balia_scale(3, alpha_scale) >> 1))
+					>> alpha_scale;
+
+exit:
+	mptcp_set_ai(sk, ai);
+	mptcp_set_md(sk, md);
+}
+
+static void mptcp_balia_init(struct sock *sk)
+{
+	if (mptcp(tcp_sk(sk))) {
+		mptcp_set_forced(sk, 0);
+		mptcp_set_ai(sk, 0);
+		mptcp_set_md(sk, 0);
+	}
+}
+
+static void mptcp_balia_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+	if (event == CA_EVENT_COMPLETE_CWR || event == CA_EVENT_LOSS)
+		mptcp_balia_recalc_ai(sk);
+}
+
+static void mptcp_balia_set_state(struct sock *sk, u8 ca_state)
+{
+	if (!mptcp(tcp_sk(sk)))
+		return;
+
+	mptcp_set_forced(sk, 1);
+}
+
+static void mptcp_balia_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int snd_cwnd;
+
+	if (!mptcp(tp)) {
+		tcp_reno_cong_avoid(sk, ack, acked);
+		return;
+	}
+
+	if (!tcp_is_cwnd_limited(sk))
+		return;
+
+	if (tcp_in_slow_start(tp)) {
+		/* In "safe" area, increase. */
+		tcp_slow_start(tp, acked);
+		mptcp_balia_recalc_ai(sk);
+		return;
+	}
+
+	if (mptcp_get_forced(mptcp_meta_sk(sk))) {
+		mptcp_balia_recalc_ai(sk);
+		mptcp_set_forced(sk, 0);
+	}
+
+	snd_cwnd = (int)mptcp_get_ai(sk);
+
+	if (tp->snd_cwnd_cnt >= snd_cwnd) {
+		if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
+			tp->snd_cwnd++;
+			mptcp_balia_recalc_ai(sk);
+		}
+
+		tp->snd_cwnd_cnt = 0;
+	} else {
+		tp->snd_cwnd_cnt++;
+	}
+}
+
+static u32 mptcp_balia_ssthresh(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	if (unlikely(!mptcp(tp)))
+		return tcp_reno_ssthresh(sk);
+	else
+		return max((u32)(tp->snd_cwnd - mptcp_get_md(sk)), 1U);
+}
+
+static struct tcp_congestion_ops mptcp_balia = {
+	.init		= mptcp_balia_init,
+	.ssthresh	= mptcp_balia_ssthresh,
+	.cong_avoid	= mptcp_balia_cong_avoid,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
+	.cwnd_event	= mptcp_balia_cwnd_event,
+	.set_state	= mptcp_balia_set_state,
+	.owner		= THIS_MODULE,
+	.name		= "balia",
+};
+
+static int __init mptcp_balia_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct mptcp_balia) > ICSK_CA_PRIV_SIZE);
+	return tcp_register_congestion_control(&mptcp_balia);
+}
+
+static void __exit mptcp_balia_unregister(void)
+{
+	tcp_unregister_congestion_control(&mptcp_balia);
+}
+
+module_init(mptcp_balia_register);
+module_exit(mptcp_balia_unregister);
+
+MODULE_AUTHOR("Jaehyun Hwang, Anwar Walid, Qiuyu Peng, Steven H. Low");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MPTCP BALIA CONGESTION CONTROL ALGORITHM");
+MODULE_VERSION("0.1");
--- a/net/mptcp/mptcp_binder.c
+++ b/net/mptcp/mptcp_binder.c
@ -0,0 +1,494 @@
+#include <linux/module.h>
+
+#include <net/mptcp.h>
+#include <net/mptcp_v4.h>
+
+#include <linux/route.h>
+#include <linux/inet.h>
+#include <linux/mroute.h>
+#include <linux/spinlock_types.h>
+#include <net/inet_ecn.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+#include <net/compat.h>
+#include <linux/slab.h>
+
+#define MPTCP_GW_MAX_LISTS	10
+#define MPTCP_GW_LIST_MAX_LEN	6
+#define MPTCP_GW_SYSCTL_MAX_LEN	(15 * MPTCP_GW_LIST_MAX_LEN *	\
+							MPTCP_GW_MAX_LISTS)
+
+struct mptcp_gw_list {
+	struct in_addr list[MPTCP_GW_MAX_LISTS][MPTCP_GW_LIST_MAX_LEN];
+	u8 len[MPTCP_GW_MAX_LISTS];
+};
+
+struct binder_priv {
+	/* Worker struct for subflow establishment */
+	struct work_struct subflow_work;
+
+	struct mptcp_cb *mpcb;
+
+	/* Prevent multiple sub-sockets concurrently iterating over sockets */
+	spinlock_t *flow_lock;
+};
+
+static struct mptcp_gw_list *mptcp_gws;
+static rwlock_t mptcp_gws_lock;
+
+static int mptcp_binder_ndiffports __read_mostly = 1;
+
+static char sysctl_mptcp_binder_gateways[MPTCP_GW_SYSCTL_MAX_LEN] __read_mostly;
+
+static int mptcp_get_avail_list_ipv4(struct sock *sk)
+{
+	int i, j, list_taken, opt_ret, opt_len;
+	unsigned char *opt_ptr, *opt_end_ptr, opt[MAX_IPOPTLEN];
+
+	for (i = 0; i < MPTCP_GW_MAX_LISTS; ++i) {
+		struct mptcp_tcp_sock *mptcp;
+
+		if (mptcp_gws->len[i] == 0)
+			goto error;
+
+		mptcp_debug("mptcp_get_avail_list_ipv4: List %i\n", i);
+		list_taken = 0;
+
+		/* Loop through all sub-sockets in this connection */
+		mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) {
+			sk = mptcp_to_sock(mptcp);
+
+			mptcp_debug("mptcp_get_avail_list_ipv4: Next sock\n");
+
+			/* Reset length and options buffer, then retrieve
+			 * from socket
+			 */
+			opt_len = MAX_IPOPTLEN;
+			memset(opt, 0, MAX_IPOPTLEN);
+			opt_ret = ip_getsockopt(sk, IPPROTO_IP,
+				IP_OPTIONS, (char __user *)opt, (int __user *)&opt_len);
+			if (opt_ret < 0) {
+				mptcp_debug("%s: MPTCP subsocket getsockopt() IP_OPTIONS failed, error %d\n",
+					    __func__, opt_ret);
+				goto error;
+			}
+
+			/* If socket has no options, it has no stake in this list */
+			if (opt_len <= 0)
+				continue;
+
+			/* Iterate options buffer */
+			for (opt_ptr = &opt[0]; opt_ptr < &opt[opt_len]; opt_ptr++) {
+				if (*opt_ptr == IPOPT_LSRR) {
+					mptcp_debug("mptcp_get_avail_list_ipv4: LSRR options found\n");
+					goto sock_lsrr;
+				}
+			}
+			continue;
+
+sock_lsrr:
+			/* Pointer to the 2nd to last address */
+			opt_end_ptr = opt_ptr+(*(opt_ptr+1))-4;
+
+			/* Addresses start 3 bytes after type offset */
+			opt_ptr += 3;
+			j = 0;
+
+			/* Different length lists cannot be the same */
+			if ((opt_end_ptr-opt_ptr)/4 != mptcp_gws->len[i])
+				continue;
+
+			/* Iterate if we are still inside options list
+			 * and sysctl list
+			 */
+			while (opt_ptr < opt_end_ptr && j < mptcp_gws->len[i]) {
+				/* If there is a different address, this list must
+				 * not be set on this socket
+				 */
+				if (memcmp(&mptcp_gws->list[i][j], opt_ptr, 4))
+					break;
+
+				/* Jump 4 bytes to next address */
+				opt_ptr += 4;
+				j++;
+			}
+
+			/* Reached the end without a differing address, lists
+			 * are therefore identical.
+			 */
+			if (j == mptcp_gws->len[i]) {
+				mptcp_debug("mptcp_get_avail_list_ipv4: List already used\n");
+				list_taken = 1;
+				break;
+			}
+		}
+
+		/* Free list found if not taken by a socket */
+		if (!list_taken) {
+			mptcp_debug("mptcp_get_avail_list_ipv4: List free\n");
+			break;
+		}
+	}
+
+	if (i >= MPTCP_GW_MAX_LISTS)
+		goto error;
+
+	return i;
+error:
+	return -1;
+}
+
+/* The list of addresses is parsed each time a new connection is opened,
+ *  to make sure it's up to date. In case of error, all the lists are
+ *  marked as unavailable and the subflow's fingerprint is set to 0.
+ */
+static void mptcp_v4_add_lsrr(struct sock *sk, struct in_addr addr)
+{
+	int i, j, ret;
+	unsigned char opt[MAX_IPOPTLEN] = {0};
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct binder_priv *fmp = (struct binder_priv *)&tp->mpcb->mptcp_pm[0];
+
+	/* Read lock: multiple sockets can read LSRR addresses at the same
+	 * time, but writes are done in mutual exclusion.
+	 * Spin lock: must search for free list for one socket at a time, or
+	 * multiple sockets could take the same list.
+	 */
+	read_lock(&mptcp_gws_lock);
+	spin_lock(fmp->flow_lock);
+
+	i = mptcp_get_avail_list_ipv4(sk);
+
+	/* Execution enters here only if a free path is found.
+	 */
+	if (i >= 0) {
+		opt[0] = IPOPT_NOP;
+		opt[1] = IPOPT_LSRR;
+		opt[2] = sizeof(mptcp_gws->list[i][0].s_addr) *
+				(mptcp_gws->len[i] + 1) + 3;
+		opt[3] = IPOPT_MINOFF;
+		for (j = 0; j < mptcp_gws->len[i]; ++j)
+			memcpy(opt + 4 +
+				(j * sizeof(mptcp_gws->list[i][0].s_addr)),
+				&mptcp_gws->list[i][j].s_addr,
+				sizeof(mptcp_gws->list[i][0].s_addr));
+		/* Final destination must be part of IP_OPTIONS parameter. */
+		memcpy(opt + 4 + (j * sizeof(addr.s_addr)), &addr.s_addr,
+		       sizeof(addr.s_addr));
+
+		/* setsockopt must be inside the lock, otherwise another
+		 * subflow could fail to see that we have taken a list.
+		 */
+		ret = ip_setsockopt(sk, IPPROTO_IP, IP_OPTIONS, (char __user *)opt,
+				    4 + sizeof(mptcp_gws->list[i][0].s_addr) * (mptcp_gws->len[i] + 1));
+
+		if (ret < 0) {
+			mptcp_debug("%s: MPTCP subsock setsockopt() IP_OPTIONS failed, error %d\n",
+				    __func__, ret);
+		}
+	}
+
+	spin_unlock(fmp->flow_lock);
+	read_unlock(&mptcp_gws_lock);
+
+	return;
+}
+
+/* Parses gateways string for a list of paths to different
+ * gateways, and stores them for use with the Loose Source Routing (LSRR)
+ * socket option. Each list must have "," separated addresses, and the lists
+ * themselves must be separated by "-". Returns -1 in case one or more of the
+ * addresses is not a valid ipv4/6 address.
+ */
+static int mptcp_parse_gateway_ipv4(char *gateways)
+{
+	int i, j, k, ret;
+	char *tmp_string = NULL;
+	struct in_addr tmp_addr;
+
+	tmp_string = kzalloc(16, GFP_KERNEL);
+	if (tmp_string == NULL)
+		return -ENOMEM;
+
+	write_lock(&mptcp_gws_lock);
+
+	memset(mptcp_gws, 0, sizeof(struct mptcp_gw_list));
+
+	/* A TMP string is used since inet_pton needs a null terminated string
+	 * but we do not want to modify the sysctl for obvious reasons.
+	 * i will iterate over the SYSCTL string, j will iterate over the
+	 * temporary string where each IP is copied into, k will iterate over
+	 * the IPs in each list.
+	 */
+	for (i = j = k = 0;
+			i < MPTCP_GW_SYSCTL_MAX_LEN && k < MPTCP_GW_MAX_LISTS;
+			++i) {
+		if (gateways[i] == '-' || gateways[i] == ',' || gateways[i] == '\0') {
+			/* If the temp IP is empty and the current list is
+			 *  empty, we are done.
+			 */
+			if (j == 0 && mptcp_gws->len[k] == 0)
+				break;
+
+			/* Terminate the temp IP string, then if it is
+			 * non-empty parse the IP and copy it.
+			 */
+			tmp_string[j] = '\0';
+			if (j > 0) {
+				mptcp_debug("mptcp_parse_gateway_list tmp: %s i: %d\n", tmp_string, i);
+
+				ret = in4_pton(tmp_string, strlen(tmp_string),
+						(u8 *)&tmp_addr.s_addr, '\0',
+						NULL);
+
+				if (ret) {
+					mptcp_debug("mptcp_parse_gateway_list ret: %d s_addr: %pI4\n",
+						    ret,
+						    &tmp_addr.s_addr);
+					memcpy(&mptcp_gws->list[k][mptcp_gws->len[k]].s_addr,
+					       &tmp_addr.s_addr,
+					       sizeof(tmp_addr.s_addr));
+					mptcp_gws->len[k]++;
+					j = 0;
+					tmp_string[j] = '\0';
+					/* Since we can't impose a limit to
+					 * what the user can input, make sure
+					 * there are not too many IPs in the
+					 * SYSCTL string.
+					 */
+					if (mptcp_gws->len[k] > MPTCP_GW_LIST_MAX_LEN) {
+						mptcp_debug("mptcp_parse_gateway_list too many members in list %i: max %i\n",
+							    k,
+							    MPTCP_GW_LIST_MAX_LEN);
+						goto error;
+					}
+				} else {
+					goto error;
+				}
+			}
+
+			if (gateways[i] == '-' || gateways[i] == '\0')
+				++k;
+		} else {
+			tmp_string[j] = gateways[i];
+			++j;
+		}
+	}
+
+	/* Number of flows is number of gateway lists plus master flow */
+	mptcp_binder_ndiffports = k+1;
+
+	write_unlock(&mptcp_gws_lock);
+	kfree(tmp_string);
+
+	return 0;
+
+error:
+	memset(mptcp_gws, 0, sizeof(struct mptcp_gw_list));
+	memset(gateways, 0, sizeof(char) * MPTCP_GW_SYSCTL_MAX_LEN);
+	write_unlock(&mptcp_gws_lock);
+	kfree(tmp_string);
+	return -1;
+}
+
+/**
+ * Create all new subflows, by doing calls to mptcp_initX_subsockets
+ *
+ * This function uses a goto next_subflow, to allow releasing the lock between
+ * new subflows and giving other processes a chance to do some work on the
+ * socket and potentially finishing the communication.
+ **/
+static void create_subflow_worker(struct work_struct *work)
+{
+	const struct binder_priv *pm_priv = container_of(work,
+						     struct binder_priv,
+						     subflow_work);
+	struct mptcp_cb *mpcb = pm_priv->mpcb;
+	struct sock *meta_sk = mpcb->meta_sk;
+	int iter = 0;
+
+next_subflow:
+	if (iter) {
+		release_sock(meta_sk);
+		mutex_unlock(&mpcb->mpcb_mutex);
+
+		cond_resched();
+	}
+	mutex_lock(&mpcb->mpcb_mutex);
+	lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
+
+	if (!mptcp(tcp_sk(meta_sk)))
+		goto exit;
+
+	iter++;
+
+	if (sock_flag(meta_sk, SOCK_DEAD))
+		goto exit;
+
+	if (mpcb->master_sk &&
+	    !tcp_sk(mpcb->master_sk)->mptcp->fully_established)
+		goto exit;
+
+	if (mptcp_binder_ndiffports > iter &&
+	    mptcp_binder_ndiffports > mptcp_subflow_count(mpcb)) {
+		struct mptcp_loc4 loc;
+		struct mptcp_rem4 rem;
+
+		loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr;
+		loc.loc4_id = 0;
+		loc.low_prio = 0;
+
+		rem.addr.s_addr = inet_sk(meta_sk)->inet_daddr;
+		rem.port = inet_sk(meta_sk)->inet_dport;
+		rem.rem4_id = 0; /* Default 0 */
+
+		mptcp_init4_subsockets(meta_sk, &loc, &rem);
+
+		goto next_subflow;
+	}
+
+exit:
+	release_sock(meta_sk);
+	mutex_unlock(&mpcb->mpcb_mutex);
+	mptcp_mpcb_put(mpcb);
+	sock_put(meta_sk);
+}
+
+static void binder_new_session(const struct sock *meta_sk)
+{
+	struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct binder_priv *fmp = (struct binder_priv *)&mpcb->mptcp_pm[0];
+	static DEFINE_SPINLOCK(flow_lock);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (meta_sk->sk_family == AF_INET6 &&
+	    !mptcp_v6_is_v4_mapped(meta_sk)) {
+			mptcp_fallback_default(mpcb);
+			return;
+	}
+#endif
+
+	/* Initialize workqueue-struct */
+	INIT_WORK(&fmp->subflow_work, create_subflow_worker);
+	fmp->mpcb = mpcb;
+
+	fmp->flow_lock = &flow_lock;
+}
+
+static void binder_create_subflows(struct sock *meta_sk)
+{
+	struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct binder_priv *pm_priv = (struct binder_priv *)&mpcb->mptcp_pm[0];
+
+	if (mptcp_in_infinite_mapping_weak(mpcb) ||
+	    mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
+		return;
+
+	if (!work_pending(&pm_priv->subflow_work)) {
+		sock_hold(meta_sk);
+		refcount_inc(&mpcb->mpcb_refcnt);
+		queue_work(mptcp_wq, &pm_priv->subflow_work);
+	}
+}
+
+static int binder_get_local_id(const struct sock *meta_sk, sa_family_t family,
+			       union inet_addr *addr, bool *low_prio)
+{
+	return 0;
+}
+
+/* Callback functions, executed when syctl mptcp.mptcp_gateways is updated.
+ * Inspired from proc_tcp_congestion_control().
+ */
+static int proc_mptcp_gateways(struct ctl_table *ctl, int write,
+			       void __user *buffer, size_t *lenp,
+			       loff_t *ppos)
+{
+	int ret;
+	struct ctl_table tbl = {
+		.maxlen = MPTCP_GW_SYSCTL_MAX_LEN,
+	};
+
+	if (write) {
+		tbl.data = kzalloc(MPTCP_GW_SYSCTL_MAX_LEN, GFP_KERNEL);
+		if (tbl.data == NULL)
+			return -ENOMEM;
+		ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+		if (ret == 0) {
+			ret = mptcp_parse_gateway_ipv4(tbl.data);
+			memcpy(ctl->data, tbl.data, MPTCP_GW_SYSCTL_MAX_LEN);
+		}
+		kfree(tbl.data);
+	} else {
+		ret = proc_dostring(ctl, write, buffer, lenp, ppos);
+	}
+
+
+	return ret;
+}
+
+static struct mptcp_pm_ops binder __read_mostly = {
+	.new_session = binder_new_session,
+	.fully_established = binder_create_subflows,
+	.get_local_id = binder_get_local_id,
+	.init_subsocket_v4 = mptcp_v4_add_lsrr,
+	.name = "binder",
+	.owner = THIS_MODULE,
+};
+
+static struct ctl_table binder_table[] = {
+	{
+		.procname = "mptcp_binder_gateways",
+		.data = &sysctl_mptcp_binder_gateways,
+		.maxlen = sizeof(char) * MPTCP_GW_SYSCTL_MAX_LEN,
+		.mode = 0644,
+		.proc_handler = &proc_mptcp_gateways
+	},
+	{ }
+};
+
+static struct ctl_table_header *mptcp_sysctl_binder;
+
+/* General initialization of MPTCP_PM */
+static int __init binder_register(void)
+{
+	mptcp_gws = kzalloc(sizeof(*mptcp_gws), GFP_KERNEL);
+	if (!mptcp_gws)
+		return -ENOMEM;
+
+	rwlock_init(&mptcp_gws_lock);
+
+	BUILD_BUG_ON(sizeof(struct binder_priv) > MPTCP_PM_SIZE);
+
+	mptcp_sysctl_binder = register_net_sysctl(&init_net, "net/mptcp",
+			binder_table);
+	if (!mptcp_sysctl_binder)
+		goto sysctl_fail;
+
+	if (mptcp_register_path_manager(&binder))
+		goto pm_failed;
+
+	return 0;
+
+pm_failed:
+	unregister_net_sysctl_table(mptcp_sysctl_binder);
+sysctl_fail:
+	kfree(mptcp_gws);
+
+	return -1;
+}
+
+static void binder_unregister(void)
+{
+	mptcp_unregister_path_manager(&binder);
+	unregister_net_sysctl_table(mptcp_sysctl_binder);
+	kfree(mptcp_gws);
+}
+
+module_init(binder_register);
+module_exit(binder_unregister);
+
+MODULE_AUTHOR("Luca Boccassi, Duncan Eastoe, Christoph Paasch (ndiffports)");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("BINDER MPTCP");
+MODULE_VERSION("0.1");
--- a/net/mptcp/mptcp_blest.c
+++ b/net/mptcp/mptcp_blest.c
@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0
+/*	MPTCP Scheduler to reduce HoL-blocking and spurious retransmissions.
+ *
+ *	Algorithm Design:
+ *	Simone Ferlin <ferlin@simula.no>
+ *	Ozgu Alay <ozgu@simula.no>
+ *	Olivier Mehani <olivier.mehani@nicta.com.au>
+ *	Roksana Boreli <roksana.boreli@nicta.com.au>
+ *
+ *	Initial Implementation:
+ *	Simone Ferlin <ferlin@simula.no>
+ *
+ *	Additional Authors:
+ *	Daniel Weber <weberd@cs.uni-bonn.de>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <net/mptcp.h>
+#include <trace/events/tcp.h>
+
+static unsigned char lambda __read_mostly = 12;
+module_param(lambda, byte, 0644);
+MODULE_PARM_DESC(lambda, "Divided by 10 for scaling factor of fast flow rate estimation");
+
+static unsigned char max_lambda __read_mostly = 13;
+module_param(max_lambda, byte, 0644);
+MODULE_PARM_DESC(max_lambda, "Divided by 10 for maximum scaling factor of fast flow rate estimation");
+
+static unsigned char min_lambda __read_mostly = 10;
+module_param(min_lambda, byte, 0644);
+MODULE_PARM_DESC(min_lambda, "Divided by 10 for minimum scaling factor of fast flow rate estimation");
+
+static unsigned char dyn_lambda_good = 10; /* 1% */
+module_param(dyn_lambda_good, byte, 0644);
+MODULE_PARM_DESC(dyn_lambda_good, "Decrease of lambda in positive case.");
+
+static unsigned char dyn_lambda_bad = 40; /* 4% */
+module_param(dyn_lambda_bad, byte, 0644);
+MODULE_PARM_DESC(dyn_lambda_bad, "Increase of lambda in negative case.");
+
+struct blestsched_priv {
+	u32 last_rbuf_opti;
+	u32 min_srtt_us;
+	u32 max_srtt_us;
+};
+
+struct blestsched_cb {
+	bool retrans_flag;
+	s16 lambda_1000; /* values range from min_lambda * 100 to max_lambda * 100 */
+	u32 last_lambda_update;
+};
+
+static struct blestsched_priv *blestsched_get_priv(const struct tcp_sock *tp)
+{
+	return (struct blestsched_priv *)&tp->mptcp->mptcp_sched[0];
+}
+
+static struct blestsched_cb *blestsched_get_cb(const struct tcp_sock *tp)
+{
+	return (struct blestsched_cb *)&tp->mpcb->mptcp_sched[0];
+}
+
+static void blestsched_update_lambda(struct sock *meta_sk, struct sock *sk)
+{
+	struct blestsched_cb *blest_cb = blestsched_get_cb(tcp_sk(meta_sk));
+	struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk));
+
+	if (tcp_jiffies32 - blest_cb->last_lambda_update < usecs_to_jiffies(blest_p->min_srtt_us >> 3))
+		return;
+
+	/* if there have been retransmissions of packets of the slow flow
+	 * during the slow flows last RTT => increase lambda
+	 * otherwise decrease
+	 */
+	if (blest_cb->retrans_flag) {
+		/* need to slow down on the slow flow */
+		blest_cb->lambda_1000 += dyn_lambda_bad;
+	} else {
+		/* use the slow flow more */
+		blest_cb->lambda_1000 -= dyn_lambda_good;
+	}
+	blest_cb->retrans_flag = false;
+
+	/* cap lambda_1000 to its value range */
+	blest_cb->lambda_1000 = min_t(s16, blest_cb->lambda_1000, max_lambda * 100);
+	blest_cb->lambda_1000 = max_t(s16, blest_cb->lambda_1000, min_lambda * 100);
+
+	blest_cb->last_lambda_update = tcp_jiffies32;
+}
+
+/* how many bytes will sk send during the rtt of another, slower flow? */
+static u32 blestsched_estimate_bytes(struct sock *sk, u32 time_8)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct blestsched_priv *blest_p = blestsched_get_priv(tp);
+	struct blestsched_cb *blest_cb = blestsched_get_cb(mptcp_meta_tp(tp));
+	u32 avg_rtt, num_rtts, ca_cwnd, packets;
+
+	avg_rtt = (blest_p->min_srtt_us + blest_p->max_srtt_us) / 2;
+	if (avg_rtt == 0)
+		num_rtts = 1; /* sanity */
+	else
+		num_rtts = (time_8 / avg_rtt) + 1; /* round up */
+
+	/* during num_rtts, how many bytes will be sent on the flow?
+	 * assumes for simplification that Reno is applied as congestion-control
+	 */
+	if (tp->snd_ssthresh == TCP_INFINITE_SSTHRESH) {
+		/* we are in initial slow start */
+		if (num_rtts > 16)
+			num_rtts = 16; /* cap for sanity */
+		packets = tp->snd_cwnd * ((1 << num_rtts) - 1); /* cwnd + 2*cwnd + 4*cwnd */
+	} else {
+		ca_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh + 1); /* assume we jump to CA already */
+		packets = (ca_cwnd + (num_rtts - 1) / 2) * num_rtts;
+	}
+
+	return div_u64(((u64)packets) * tp->mss_cache * blest_cb->lambda_1000, 1000);
+}
+
+static u32 blestsched_estimate_linger_time(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct blestsched_priv *blest_p = blestsched_get_priv(tp);
+	u32 estimate, slope, inflight, cwnd;
+
+	inflight = tcp_packets_in_flight(tp) + 1; /* take into account the new one */
+	cwnd = tp->snd_cwnd;
+
+	if (inflight >= cwnd) {
+		estimate = blest_p->max_srtt_us;
+	} else {
+		slope = blest_p->max_srtt_us - blest_p->min_srtt_us;
+		if (cwnd == 0)
+			cwnd = 1; /* sanity */
+		estimate = blest_p->min_srtt_us + (slope * inflight) / cwnd;
+	}
+
+	return (tp->srtt_us > estimate) ? tp->srtt_us : estimate;
+}
+
+/* This is the BLEST scheduler. This function decides on which flow to send
+ * a given MSS. If all subflows are found to be busy or the currently best
+ * subflow is estimated to possibly cause HoL-blocking, NULL is returned.
+ */
+struct sock *blest_get_available_subflow(struct sock *meta_sk, struct sk_buff *skb,
+					 bool zero_wnd_test)
+{
+	struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct sock *bestsk, *minsk = NULL;
+	struct tcp_sock *meta_tp, *besttp;
+	struct mptcp_tcp_sock *mptcp;
+	struct blestsched_priv *blest_p;
+	u32 min_srtt = U32_MAX;
+
+	/* Answer data_fin on same subflow!!! */
+	if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
+	    skb && mptcp_is_data_fin(skb)) {
+		mptcp_for_each_sub(mpcb, mptcp) {
+			bestsk = mptcp_to_sock(mptcp);
+
+			if (tcp_sk(bestsk)->mptcp->path_index == mpcb->dfin_path_index &&
+			    mptcp_is_available(bestsk, skb, zero_wnd_test))
+				return bestsk;
+		}
+	}
+
+	/* First, find the overall best subflow */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		bestsk = mptcp_to_sock(mptcp);
+		besttp = tcp_sk(bestsk);
+		blest_p = blestsched_get_priv(besttp);
+
+		/* Set of states for which we are allowed to send data */
+		if (!mptcp_sk_can_send(bestsk))
+			continue;
+
+		/* We do not send data on this subflow unless it is
+		 * fully established, i.e. the 4th ack has been received.
+		 */
+		if (besttp->mptcp->pre_established)
+			continue;
+
+		blest_p->min_srtt_us = min(blest_p->min_srtt_us, besttp->srtt_us);
+		blest_p->max_srtt_us = max(blest_p->max_srtt_us, besttp->srtt_us);
+
+		/* record minimal rtt */
+		if (besttp->srtt_us < min_srtt) {
+			min_srtt = besttp->srtt_us;
+			minsk = bestsk;
+		}
+	}
+
+	/* find the current best subflow according to the default scheduler */
+	bestsk = get_available_subflow(meta_sk, skb, zero_wnd_test);
+
+	/* if we decided to use a slower flow, we have the option of not using it at all */
+	if (bestsk && minsk && bestsk != minsk) {
+		u32 slow_linger_time, fast_bytes, slow_inflight_bytes, slow_bytes, avail_space;
+		u32 buffered_bytes = 0;
+
+		meta_tp = tcp_sk(meta_sk);
+		besttp = tcp_sk(bestsk);
+
+		blestsched_update_lambda(meta_sk, bestsk);
+
+		/* if we send this SKB now, it will be acked in besttp->srtt seconds
+		 * during this time: how many bytes will we send on the fast flow?
+		 */
+		slow_linger_time = blestsched_estimate_linger_time(bestsk);
+		fast_bytes = blestsched_estimate_bytes(minsk, slow_linger_time);
+
+		if (skb)
+			buffered_bytes = skb->len;
+
+		/* is the required space available in the mptcp meta send window?
+		 * we assume that all bytes inflight on the slow path will be acked in besttp->srtt seconds
+		 * (just like the SKB if it was sent now) -> that means that those inflight bytes will
+		 * keep occupying space in the meta window until then
+		 */
+		slow_inflight_bytes = besttp->write_seq - besttp->snd_una;
+		slow_bytes = buffered_bytes + slow_inflight_bytes; // bytes of this SKB plus those in flight already
+
+		avail_space = (slow_bytes < meta_tp->snd_wnd) ? (meta_tp->snd_wnd - slow_bytes) : 0;
+
+		if (fast_bytes > avail_space) {
+			/* sending this SKB on the slow flow means
+			 * we wouldn't be able to send all the data we'd like to send on the fast flow
+			 * so don't do that
+			 */
+			return NULL;
+		}
+	}
+
+	return bestsk;
+}
+
+/* copy from mptcp_sched.c: mptcp_rcv_buf_optimization */
+static struct sk_buff *mptcp_blest_rcv_buf_optimization(struct sock *sk, int penal)
+{
+	struct sock *meta_sk;
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct mptcp_tcp_sock *mptcp;
+	struct sk_buff *skb_head;
+	struct blestsched_priv *blest_p = blestsched_get_priv(tp);
+	struct blestsched_cb *blest_cb;
+
+	meta_sk = mptcp_meta_sk(sk);
+	skb_head = tcp_rtx_queue_head(meta_sk);
+
+	if (!skb_head)
+		return NULL;
+
+	/* If penalization is optional (coming from mptcp_next_segment() and
+	 * We are not send-buffer-limited we do not penalize. The retransmission
+	 * is just an optimization to fix the idle-time due to the delay before
+	 * we wake up the application.
+	 */
+	if (!penal && sk_stream_memory_free(meta_sk))
+		goto retrans;
+
+	/* Record the occurrence of a retransmission to update the lambda value */
+	blest_cb = blestsched_get_cb(tcp_sk(meta_sk));
+	blest_cb->retrans_flag = true;
+
+	/* Only penalize again after an RTT has elapsed */
+	if (tcp_jiffies32 - blest_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3))
+		goto retrans;
+
+	/* Half the cwnd of the slow flows */
+	mptcp_for_each_sub(tp->mpcb, mptcp) {
+		struct tcp_sock *tp_it = mptcp->tp;
+
+		if (tp_it != tp &&
+		    TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
+			if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) {
+				u32 prior_cwnd = tp_it->snd_cwnd;
+
+				tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
+
+				/* If in slow start, do not reduce the ssthresh */
+				if (prior_cwnd >= tp_it->snd_ssthresh)
+					tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U);
+
+				blest_p->last_rbuf_opti = tcp_jiffies32;
+			}
+		}
+	}
+
+retrans:
+
+	/* Segment not yet injected into this path? Take it!!! */
+	if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) {
+		bool do_retrans = false;
+		mptcp_for_each_sub(tp->mpcb, mptcp) {
+			struct tcp_sock *tp_it = mptcp->tp;
+
+			if (tp_it != tp &&
+			    TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
+				if (tp_it->snd_cwnd <= 4) {
+					do_retrans = true;
+					break;
+				}
+
+				if (4 * tp->srtt_us >= tp_it->srtt_us) {
+					do_retrans = false;
+					break;
+				} else {
+					do_retrans = true;
+				}
+			}
+		}
+
+		if (do_retrans && mptcp_is_available(sk, skb_head, false)) {
+			trace_mptcp_retransmit(sk, skb_head);
+			return skb_head;
+		}
+	}
+	return NULL;
+}
+
+/* copy from mptcp_sched.c: __mptcp_next_segment */
+/* Returns the next segment to be sent from the mptcp meta-queue.
+ * (chooses the reinject queue if any segment is waiting in it, otherwise,
+ * chooses the normal write queue).
+ * Sets *@reinject to 1 if the returned segment comes from the
+ * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
+ * and sets it to -1 if it is a meta-level retransmission to optimize the
+ * receive-buffer.
+ */
+static struct sk_buff *__mptcp_blest_next_segment(struct sock *meta_sk, int *reinject)
+{
+	const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct sk_buff *skb = NULL;
+
+	*reinject = 0;
+
+	/* If we are in fallback-mode, just take from the meta-send-queue */
+	if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
+		return tcp_send_head(meta_sk);
+
+	skb = skb_peek(&mpcb->reinject_queue);
+
+	if (skb) {
+		*reinject = 1;
+	} else {
+		skb = tcp_send_head(meta_sk);
+
+		if (!skb && meta_sk->sk_socket &&
+		    test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
+		    sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
+			struct sock *subsk = blest_get_available_subflow(meta_sk, NULL,
+									 false);
+			if (!subsk)
+				return NULL;
+
+			skb = mptcp_blest_rcv_buf_optimization(subsk, 0);
+			if (skb)
+				*reinject = -1;
+		}
+	}
+	return skb;
+}
+
+/* copy from mptcp_sched.c: mptcp_next_segment */
+static struct sk_buff *mptcp_blest_next_segment(struct sock *meta_sk,
+						int *reinject,
+						struct sock **subsk,
+						unsigned int *limit)
+{
+	struct sk_buff *skb = __mptcp_blest_next_segment(meta_sk, reinject);
+	unsigned int mss_now;
+	struct tcp_sock *subtp;
+	u16 gso_max_segs;
+	u32 max_len, max_segs, window, needed;
+
+	/* As we set it, we have to reset it as well. */
+	*limit = 0;
+
+	if (!skb)
+		return NULL;
+
+	*subsk = blest_get_available_subflow(meta_sk, skb, false);
+	if (!*subsk)
+		return NULL;
+
+	subtp = tcp_sk(*subsk);
+	mss_now = tcp_current_mss(*subsk);
+
+	if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) {
+		skb = mptcp_blest_rcv_buf_optimization(*subsk, 1);
+		if (skb)
+			*reinject = -1;
+		else
+			return NULL;
+	}
+
+	/* No splitting required, as we will only send one single segment */
+	if (skb->len <= mss_now)
+		return skb;
+
+	/* The following is similar to tcp_mss_split_point, but
+	 * we do not care about nagle, because we will anyways
+	 * use TCP_NAGLE_PUSH, which overrides this.
+	 *
+	 * So, we first limit according to the cwnd/gso-size and then according
+	 * to the subflow's window.
+	 */
+
+	gso_max_segs = (*subsk)->sk_gso_max_segs;
+	if (!gso_max_segs) /* No gso supported on the subflow's NIC */
+		gso_max_segs = 1;
+	max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
+	if (!max_segs)
+		return NULL;
+
+	max_len = mss_now * max_segs;
+	window = tcp_wnd_end(subtp) - subtp->write_seq;
+
+	needed = min(skb->len, window);
+	if (max_len <= skb->len)
+		/* Take max_win, which is actually the cwnd/gso-size */
+		*limit = max_len;
+	else
+		/* Or, take the window */
+		*limit = needed;
+
+	return skb;
+}
+
+static void blestsched_init(struct sock *sk)
+{
+	struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk));
+	struct blestsched_cb *blest_cb = blestsched_get_cb(tcp_sk(mptcp_meta_sk(sk)));
+
+	blest_p->last_rbuf_opti = tcp_jiffies32;
+	blest_p->min_srtt_us = U32_MAX;
+	blest_p->max_srtt_us = 0;
+
+	if (!blest_cb->lambda_1000) {
+		blest_cb->lambda_1000 = lambda * 100;
+		blest_cb->last_lambda_update = tcp_jiffies32;
+	}
+}
+
+static struct mptcp_sched_ops mptcp_sched_blest = {
+	.get_subflow = blest_get_available_subflow,
+	.next_segment = mptcp_blest_next_segment,
+	.init = blestsched_init,
+	.name = "blest",
+	.owner = THIS_MODULE,
+};
+
+static int __init blest_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct blestsched_priv) > MPTCP_SCHED_SIZE);
+	BUILD_BUG_ON(sizeof(struct blestsched_cb) > MPTCP_SCHED_DATA_SIZE);
+
+	if (mptcp_register_scheduler(&mptcp_sched_blest))
+		return -1;
+
+	return 0;
+}
+
+static void blest_unregister(void)
+{
+	mptcp_unregister_scheduler(&mptcp_sched_blest);
+}
+
+module_init(blest_register);
+module_exit(blest_unregister);
+
+MODULE_AUTHOR("Simone Ferlin, Daniel Weber");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("BLEST scheduler for MPTCP, based on default minimum RTT scheduler");
+MODULE_VERSION("0.95");
--- a/net/mptcp/mptcp_coupled.c
+++ b/net/mptcp/mptcp_coupled.c
@ -0,0 +1,262 @@
+/*
+ *	MPTCP implementation - Linked Increase congestion control Algorithm (LIA)
+ *
+ *	Initial Design & Implementation:
+ *	Sébastien Barré <sebastien.barre@uclouvain.be>
+ *
+ *	Current Maintainer & Author:
+ *	Christoph Paasch <christoph.paasch@uclouvain.be>
+ *
+ *	Additional authors:
+ *	Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
+ *	Gregory Detal <gregory.detal@uclouvain.be>
+ *	Fabien Duchêne <fabien.duchene@uclouvain.be>
+ *	Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
+ *	Lavkesh Lahngir <lavkesh51@gmail.com>
+ *	Andreas Ripke <ripke@neclab.eu>
+ *	Vlad Dogaru <vlad.dogaru@intel.com>
+ *	Octavian Purdila <octavian.purdila@intel.com>
+ *	John Ronan <jronan@tssg.org>
+ *	Catalin Nicutar <catalin.nicutar@gmail.com>
+ *	Brandon Heller <brandonh@stanford.edu>
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <net/tcp.h>
+#include <net/mptcp.h>
+
+#include <linux/module.h>
+
+/* Scaling is done in the numerator with alpha_scale_num and in the denominator
+ * with alpha_scale_den.
+ *
+ * To downscale, we just need to use alpha_scale.
+ *
+ * We have: alpha_scale = alpha_scale_num / (alpha_scale_den ^ 2)
+ */
+static int alpha_scale_den = 10;
+static int alpha_scale_num = 32;
+static int alpha_scale = 12;
+
+struct mptcp_ccc {
+	u64	alpha;
+	bool	forced_update;
+};
+
+static inline int mptcp_ccc_sk_can_send(const struct sock *sk)
+{
+	return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us;
+}
+
+static inline u64 mptcp_get_alpha(const struct sock *meta_sk)
+{
+	return ((struct mptcp_ccc *)inet_csk_ca(meta_sk))->alpha;
+}
+
+static inline void mptcp_set_alpha(const struct sock *meta_sk, u64 alpha)
+{
+	((struct mptcp_ccc *)inet_csk_ca(meta_sk))->alpha = alpha;
+}
+
+static inline u64 mptcp_ccc_scale(u32 val, int scale)
+{
+	return (u64) val << scale;
+}
+
+static inline bool mptcp_get_forced(const struct sock *meta_sk)
+{
+	return ((struct mptcp_ccc *)inet_csk_ca(meta_sk))->forced_update;
+}
+
+static inline void mptcp_set_forced(const struct sock *meta_sk, bool force)
+{
+	((struct mptcp_ccc *)inet_csk_ca(meta_sk))->forced_update = force;
+}
+
+static void mptcp_ccc_recalc_alpha(const struct sock *sk)
+{
+	const struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb;
+	const struct mptcp_tcp_sock *mptcp;
+	int best_cwnd = 0, best_rtt = 0, can_send = 0;
+	u64 max_numerator = 0, sum_denominator = 0, alpha = 1;
+
+	if (!mpcb)
+		return;
+
+	/* Do regular alpha-calculation for multiple subflows */
+
+	/* Find the max numerator of the alpha-calculation */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		const struct sock *sub_sk = mptcp_to_sock(mptcp);
+		struct tcp_sock *sub_tp = tcp_sk(sub_sk);
+		u64 tmp;
+
+		if (!mptcp_ccc_sk_can_send(sub_sk))
+			continue;
+
+		can_send++;
+
+		/* We need to look for the path, that provides the max-value.
+		 * Integer-overflow is not possible here, because
+		 * tmp will be in u64.
+		 */
+		tmp = div64_u64(mptcp_ccc_scale(sub_tp->snd_cwnd,
+				alpha_scale_num), (u64)sub_tp->srtt_us * sub_tp->srtt_us);
+
+		if (tmp >= max_numerator) {
+			max_numerator = tmp;
+			best_cwnd = sub_tp->snd_cwnd;
+			best_rtt = sub_tp->srtt_us;
+		}
+	}
+
+	/* No subflow is able to send - we don't care anymore */
+	if (unlikely(!can_send))
+		goto exit;
+
+	/* Calculate the denominator */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		const struct sock *sub_sk = mptcp_to_sock(mptcp);
+		struct tcp_sock *sub_tp = tcp_sk(sub_sk);
+
+		if (!mptcp_ccc_sk_can_send(sub_sk))
+			continue;
+
+		sum_denominator += div_u64(
+				mptcp_ccc_scale(sub_tp->snd_cwnd,
+						alpha_scale_den) * best_rtt,
+						sub_tp->srtt_us);
+	}
+	sum_denominator *= sum_denominator;
+	if (unlikely(!sum_denominator)) {
+		pr_err("%s: sum_denominator == 0\n", __func__);
+		mptcp_for_each_sub(mpcb, mptcp) {
+			const struct sock *sub_sk = mptcp_to_sock(mptcp);
+			struct tcp_sock *sub_tp = tcp_sk(sub_sk);
+			pr_err("%s: pi:%d, state:%d\n, rtt:%u, cwnd: %u",
+			       __func__, sub_tp->mptcp->path_index,
+			       sub_sk->sk_state, sub_tp->srtt_us,
+			       sub_tp->snd_cwnd);
+		}
+	}
+
+	alpha = div64_u64(mptcp_ccc_scale(best_cwnd, alpha_scale_num), sum_denominator);
+
+	if (unlikely(!alpha))
+		alpha = 1;
+
+exit:
+	mptcp_set_alpha(mptcp_meta_sk(sk), alpha);
+}
+
+static void mptcp_ccc_init(struct sock *sk)
+{
+	if (mptcp(tcp_sk(sk))) {
+		mptcp_set_forced(mptcp_meta_sk(sk), 0);
+		mptcp_set_alpha(mptcp_meta_sk(sk), 1);
+	}
+	/* If we do not mptcp, behave like reno: return */
+}
+
+static void mptcp_ccc_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+	if (event == CA_EVENT_LOSS)
+		mptcp_ccc_recalc_alpha(sk);
+}
+
+static void mptcp_ccc_set_state(struct sock *sk, u8 ca_state)
+{
+	if (!mptcp(tcp_sk(sk)))
+		return;
+
+	mptcp_set_forced(mptcp_meta_sk(sk), 1);
+}
+
+static void mptcp_ccc_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int snd_cwnd;
+	u64 alpha;
+
+	if (!mptcp(tp)) {
+		tcp_reno_cong_avoid(sk, ack, acked);
+		return;
+	}
+
+	if (!tcp_is_cwnd_limited(sk))
+		return;
+
+	if (tcp_in_slow_start(tp)) {
+		/* In "safe" area, increase. */
+		tcp_slow_start(tp, acked);
+		mptcp_ccc_recalc_alpha(sk);
+		return;
+	}
+
+	if (mptcp_get_forced(mptcp_meta_sk(sk))) {
+		mptcp_ccc_recalc_alpha(sk);
+		mptcp_set_forced(mptcp_meta_sk(sk), 0);
+	}
+
+	alpha = mptcp_get_alpha(mptcp_meta_sk(sk));
+
+	/* This may happen, if at the initialization, the mpcb
+	 * was not yet attached to the sock, and thus
+	 * initializing alpha failed.
+	 */
+	if (unlikely(!alpha))
+		alpha = 1;
+
+	snd_cwnd = (int)div_u64((u64)mptcp_ccc_scale(1, alpha_scale), alpha);
+
+	/* snd_cwnd_cnt >= max (scale * tot_cwnd / alpha, cwnd)
+	 * Thus, we select here the max value.
+	 */
+	if (snd_cwnd < tp->snd_cwnd)
+		snd_cwnd = tp->snd_cwnd;
+
+	if (tp->snd_cwnd_cnt >= snd_cwnd) {
+		if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
+			tp->snd_cwnd++;
+			mptcp_ccc_recalc_alpha(sk);
+		}
+
+		tp->snd_cwnd_cnt = 0;
+	} else {
+		tp->snd_cwnd_cnt++;
+	}
+}
+
+static struct tcp_congestion_ops mptcp_ccc = {
+	.init		= mptcp_ccc_init,
+	.ssthresh	= tcp_reno_ssthresh,
+	.cong_avoid	= mptcp_ccc_cong_avoid,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
+	.cwnd_event	= mptcp_ccc_cwnd_event,
+	.set_state	= mptcp_ccc_set_state,
+	.owner		= THIS_MODULE,
+	.name		= "lia",
+};
+
+static int __init mptcp_ccc_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct mptcp_ccc) > ICSK_CA_PRIV_SIZE);
+	return tcp_register_congestion_control(&mptcp_ccc);
+}
+
+static void __exit mptcp_ccc_unregister(void)
+{
+	tcp_unregister_congestion_control(&mptcp_ccc);
+}
+
+module_init(mptcp_ccc_register);
+module_exit(mptcp_ccc_unregister);
+
+MODULE_AUTHOR("Christoph Paasch, Sébastien Barré");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MPTCP LINKED INCREASE CONGESTION CONTROL ALGORITHM");
+MODULE_VERSION("0.1");
--- a/net/mptcp/mptcp_ctrl.c
+++ b/net/mptcp/mptcp_ctrl.c
--- a/net/mptcp/mptcp_fullmesh.c
+++ b/net/mptcp/mptcp_fullmesh.c
--- a/net/mptcp/mptcp_input.c
+++ b/net/mptcp/mptcp_input.c
--- a/net/mptcp/mptcp_ipv4.c
+++ b/net/mptcp/mptcp_ipv4.c
@ -0,0 +1,427 @@
+/*
+ *	MPTCP implementation - IPv4-specific functions
+ *
+ *	Initial Design & Implementation:
+ *	Sébastien Barré <sebastien.barre@uclouvain.be>
+ *
+ *	Current Maintainer:
+ *	Christoph Paasch <christoph.paasch@uclouvain.be>
+ *
+ *	Additional authors:
+ *	Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
+ *	Gregory Detal <gregory.detal@uclouvain.be>
+ *	Fabien Duchêne <fabien.duchene@uclouvain.be>
+ *	Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
+ *	Lavkesh Lahngir <lavkesh51@gmail.com>
+ *	Andreas Ripke <ripke@neclab.eu>
+ *	Vlad Dogaru <vlad.dogaru@intel.com>
+ *	Octavian Purdila <octavian.purdila@intel.com>
+ *	John Ronan <jronan@tssg.org>
+ *	Catalin Nicutar <catalin.nicutar@gmail.com>
+ *	Brandon Heller <brandonh@stanford.edu>
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/ip.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/tcp.h>
+
+#include <net/inet_common.h>
+#include <net/inet_connection_sock.h>
+#include <net/mptcp.h>
+#include <net/mptcp_v4.h>
+#include <net/request_sock.h>
+#include <net/tcp.h>
+
+u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport)
+{
+	return siphash_4u32((__force u32)saddr, (__force u32)daddr,
+			    (__force u32)sport << 16 | (__force u32)dport,
+			    mptcp_seed++, &mptcp_secret);
+}
+
+u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
+		     u32 seed)
+{
+	return siphash_2u64((__force u64)saddr << 32 | (__force u64)daddr,
+			    (__force u64)seed << 32 | (__force u64)sport << 16 | (__force u64)dport,
+			    &mptcp_secret);
+}
+
+
+static void mptcp_v4_reqsk_destructor(struct request_sock *req)
+{
+	mptcp_reqsk_destructor(req);
+
+	tcp_v4_reqsk_destructor(req);
+}
+
+static int mptcp_v4_init_req(struct request_sock *req, const struct sock *sk,
+			     struct sk_buff *skb, bool want_cookie)
+{
+	tcp_request_sock_ipv4_ops.init_req(req, sk, skb, want_cookie);
+
+	mptcp_rsk(req)->hash_entry.pprev = NULL;
+	mptcp_rsk(req)->is_sub = 0;
+	inet_rsk(req)->mptcp_rqsk = 1;
+
+	/* In case of SYN-cookies, we wait for the isn to be generated - it is
+	 * input to the key-generation.
+	 */
+	if (!want_cookie)
+		mptcp_reqsk_init(req, sk, skb, false);
+
+	return 0;
+}
+
+#ifdef CONFIG_SYN_COOKIES
+static u32 mptcp_v4_cookie_init_seq(struct request_sock *req, const struct sock *sk,
+				    const struct sk_buff *skb, __u16 *mssp)
+{
+	__u32 isn = cookie_v4_init_sequence(req, sk, skb, mssp);
+
+	tcp_rsk(req)->snt_isn = isn;
+
+	mptcp_reqsk_init(req, sk, skb, true);
+
+	return isn;
+}
+#endif
+
+/* May be called without holding the meta-level lock */
+static int mptcp_v4_join_init_req(struct request_sock *req, const struct sock *meta_sk,
+				  struct sk_buff *skb, bool want_cookie)
+{
+	struct mptcp_request_sock *mtreq = mptcp_rsk(req);
+	const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	union inet_addr addr;
+	int loc_id;
+	bool low_prio = false;
+
+	/* We need to do this as early as possible. Because, if we fail later
+	 * (e.g., get_local_id), then reqsk_free tries to remove the
+	 * request-socket from the htb in mptcp_hash_request_remove as pprev
+	 * may be different from NULL.
+	 */
+	mtreq->hash_entry.pprev = NULL;
+
+	tcp_request_sock_ipv4_ops.init_req(req, meta_sk, skb, want_cookie);
+
+	mtreq->mptcp_loc_nonce = mptcp_v4_get_nonce(ip_hdr(skb)->saddr,
+						    ip_hdr(skb)->daddr,
+						    tcp_hdr(skb)->source,
+						    tcp_hdr(skb)->dest);
+	addr.ip = inet_rsk(req)->ir_loc_addr;
+	loc_id = mpcb->pm_ops->get_local_id(meta_sk, AF_INET, &addr, &low_prio);
+	if (loc_id == -1)
+		return -1;
+	mtreq->loc_id = loc_id;
+	mtreq->low_prio = low_prio;
+
+	mptcp_join_reqsk_init(mpcb, req, skb);
+
+	return 0;
+}
+
+/* Similar to tcp_request_sock_ops */
+struct request_sock_ops mptcp_request_sock_ops __read_mostly = {
+	.family		=	PF_INET,
+	.obj_size	=	sizeof(struct mptcp_request_sock),
+	.rtx_syn_ack	=	tcp_rtx_synack,
+	.send_ack	=	tcp_v4_reqsk_send_ack,
+	.destructor	=	mptcp_v4_reqsk_destructor,
+	.send_reset	=	tcp_v4_send_reset,
+	.syn_ack_timeout =	tcp_syn_ack_timeout,
+};
+
+/* Similar to: tcp_v4_conn_request
+ * May be called without holding the meta-level lock
+ */
+static int mptcp_v4_join_request(struct sock *meta_sk, struct sk_buff *skb)
+{
+	return tcp_conn_request(&mptcp_request_sock_ops,
+				&mptcp_join_request_sock_ipv4_ops,
+				meta_sk, skb);
+}
+
+/* Similar to: tcp_v4_do_rcv
+ * We only process join requests here. (either the SYN or the final ACK)
+ */
+int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
+{
+	const struct tcphdr *th = tcp_hdr(skb);
+	const struct iphdr *iph = ip_hdr(skb);
+	struct sock *child, *rsk = NULL, *sk;
+	int ret;
+
+	sk = inet_lookup_established(sock_net(meta_sk), &tcp_hashinfo,
+				     iph->saddr, th->source, iph->daddr,
+				     th->dest, inet_iif(skb));
+
+	if (!sk)
+		goto new_subflow;
+
+	if (is_meta_sk(sk)) {
+		WARN("%s Did not find a sub-sk - did found the meta!\n", __func__);
+		sock_put(sk);
+		goto discard;
+	}
+
+	if (sk->sk_state == TCP_TIME_WAIT) {
+		inet_twsk_put(inet_twsk(sk));
+		goto discard;
+	}
+
+	if (sk->sk_state == TCP_NEW_SYN_RECV) {
+		struct request_sock *req = inet_reqsk(sk);
+		bool req_stolen;
+
+		if (!mptcp_can_new_subflow(meta_sk))
+			goto reset_and_discard;
+
+		local_bh_disable();
+		child = tcp_check_req(meta_sk, skb, req, false, &req_stolen);
+		if (!child) {
+			reqsk_put(req);
+			local_bh_enable();
+			goto discard;
+		}
+
+		if (child != meta_sk) {
+			ret = mptcp_finish_handshake(child, skb);
+			if (ret) {
+				rsk = child;
+				local_bh_enable();
+				goto reset_and_discard;
+			}
+
+			local_bh_enable();
+			return 0;
+		}
+
+		/* tcp_check_req failed */
+		reqsk_put(req);
+
+		local_bh_enable();
+		goto discard;
+	}
+
+	ret = tcp_v4_do_rcv(sk, skb);
+	sock_put(sk);
+
+	return ret;
+
+new_subflow:
+	if (!mptcp_can_new_subflow(meta_sk))
+		goto reset_and_discard;
+
+	child = tcp_v4_cookie_check(meta_sk, skb);
+	if (!child)
+		goto discard;
+
+	if (child != meta_sk) {
+		ret = mptcp_finish_handshake(child, skb);
+		if (ret) {
+			rsk = child;
+			goto reset_and_discard;
+		}
+	}
+
+	if (tcp_hdr(skb)->syn) {
+		local_bh_disable();
+		mptcp_v4_join_request(meta_sk, skb);
+		local_bh_enable();
+	}
+
+discard:
+	kfree_skb(skb);
+	return 0;
+
+reset_and_discard:
+	tcp_v4_send_reset(rsk, skb);
+	goto discard;
+}
+
+/* Create a new IPv4 subflow.
+ *
+ * We are in user-context and meta-sock-lock is hold.
+ */
+int __mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc,
+			     __be16 sport, struct mptcp_rem4 *rem,
+			     struct sock **subsk)
+{
+	struct tcp_sock *tp;
+	struct sock *sk;
+	struct sockaddr_in loc_in, rem_in;
+	struct socket_alloc sock_full;
+	struct socket *sock = (struct socket *)&sock_full;
+	int ret;
+
+	/** First, create and prepare the new socket */
+	memcpy(&sock_full, meta_sk->sk_socket, sizeof(sock_full));
+	sock->state = SS_UNCONNECTED;
+	sock->ops = NULL;
+
+	ret = inet_create(sock_net(meta_sk), sock, IPPROTO_TCP, 1);
+	if (unlikely(ret < 0)) {
+		net_err_ratelimited("%s inet_create failed ret: %d\n",
+				    __func__, ret);
+		return ret;
+	}
+
+	sk = sock->sk;
+	tp = tcp_sk(sk);
+
+	/* All subsockets need the MPTCP-lock-class */
+	lockdep_set_class_and_name(&(sk)->sk_lock.slock, &meta_slock_key, meta_slock_key_name);
+	lockdep_init_map(&(sk)->sk_lock.dep_map, meta_key_name, &meta_key, 0);
+
+	ret = mptcp_add_sock(meta_sk, sk, loc->loc4_id, rem->rem4_id, GFP_KERNEL);
+	if (ret) {
+		net_err_ratelimited("%s mptcp_add_sock failed ret: %d\n",
+				    __func__, ret);
+		goto error;
+	}
+
+	tp->mptcp->slave_sk = 1;
+	tp->mptcp->low_prio = loc->low_prio;
+
+	/* Initializing the timer for an MPTCP subflow */
+	timer_setup(&tp->mptcp->mptcp_ack_timer, mptcp_ack_handler, 0);
+
+	/** Then, connect the socket to the peer */
+	loc_in.sin_family = AF_INET;
+	rem_in.sin_family = AF_INET;
+	loc_in.sin_port = sport;
+	if (rem->port)
+		rem_in.sin_port = rem->port;
+	else
+		rem_in.sin_port = inet_sk(meta_sk)->inet_dport;
+	loc_in.sin_addr = loc->addr;
+	rem_in.sin_addr = rem->addr;
+
+	if (loc->if_idx)
+		sk->sk_bound_dev_if = loc->if_idx;
+
+	ret = kernel_bind(sock, (struct sockaddr *)&loc_in,
+			  sizeof(struct sockaddr_in));
+	if (ret < 0) {
+		net_err_ratelimited("%s: token %#x bind() to %pI4 index %d failed, error %d\n",
+				    __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
+				    &loc_in.sin_addr, loc->if_idx, ret);
+		goto error;
+	}
+
+	mptcp_debug("%s: token %#x pi %d src_addr:%pI4:%d dst_addr:%pI4:%d ifidx: %d\n",
+		    __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
+		    tp->mptcp->path_index, &loc_in.sin_addr,
+		    ntohs(loc_in.sin_port), &rem_in.sin_addr,
+		    ntohs(rem_in.sin_port), loc->if_idx);
+
+	if (tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v4)
+		tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v4(sk, rem->addr);
+
+	ret = kernel_connect(sock, (struct sockaddr *)&rem_in,
+			     sizeof(struct sockaddr_in), O_NONBLOCK);
+	if (ret < 0 && ret != -EINPROGRESS) {
+		net_err_ratelimited("%s: MPTCP subsocket connect() failed, error %d\n",
+				    __func__, ret);
+		goto error;
+	}
+
+	MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINSYNTX);
+
+	sk_set_socket(sk, meta_sk->sk_socket);
+	sk->sk_wq = meta_sk->sk_wq;
+
+	if (subsk)
+		*subsk = sk;
+
+	return 0;
+
+error:
+	/* May happen if mptcp_add_sock fails first */
+	if (!mptcp(tp)) {
+		tcp_close(sk, 0);
+	} else {
+		local_bh_disable();
+		mptcp_sub_force_close(sk);
+		local_bh_enable();
+	}
+	return ret;
+}
+EXPORT_SYMBOL(__mptcp_init4_subsockets);
+
+const struct inet_connection_sock_af_ops mptcp_v4_specific = {
+	.queue_xmit	   = ip_queue_xmit,
+	.send_check	   = tcp_v4_send_check,
+	.rebuild_header	   = inet_sk_rebuild_header,
+	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
+	.conn_request	   = mptcp_conn_request,
+	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
+	.net_header_len	   = sizeof(struct iphdr),
+	.setsockopt	   = ip_setsockopt,
+	.getsockopt	   = ip_getsockopt,
+	.addr2sockaddr	   = inet_csk_addr2sockaddr,
+	.sockaddr_len	   = sizeof(struct sockaddr_in),
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_ip_setsockopt,
+	.compat_getsockopt = compat_ip_getsockopt,
+#endif
+	.mtu_reduced	   = tcp_v4_mtu_reduced,
+};
+
+struct tcp_request_sock_ops mptcp_request_sock_ipv4_ops;
+struct tcp_request_sock_ops mptcp_join_request_sock_ipv4_ops;
+
+/* General initialization of IPv4 for MPTCP */
+int mptcp_pm_v4_init(void)
+{
+	int ret = 0;
+	struct request_sock_ops *ops = &mptcp_request_sock_ops;
+
+	mptcp_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
+	mptcp_request_sock_ipv4_ops.init_req = mptcp_v4_init_req;
+#ifdef CONFIG_SYN_COOKIES
+	mptcp_request_sock_ipv4_ops.cookie_init_seq = mptcp_v4_cookie_init_seq;
+#endif
+	mptcp_join_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
+	mptcp_join_request_sock_ipv4_ops.init_req = mptcp_v4_join_init_req;
+
+	ops->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", "MPTCP");
+	if (ops->slab_name == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ops->slab = kmem_cache_create(ops->slab_name, ops->obj_size, 0,
+				      SLAB_TYPESAFE_BY_RCU|SLAB_HWCACHE_ALIGN,
+				      NULL);
+
+	if (ops->slab == NULL) {
+		ret =  -ENOMEM;
+		goto err_reqsk_create;
+	}
+
+out:
+	return ret;
+
+err_reqsk_create:
+	kfree(ops->slab_name);
+	ops->slab_name = NULL;
+	goto out;
+}
+
+void mptcp_pm_v4_undo(void)
+{
+	kmem_cache_destroy(mptcp_request_sock_ops.slab);
+	kfree(mptcp_request_sock_ops.slab_name);
+}
--- a/net/mptcp/mptcp_ipv6.c
+++ b/net/mptcp/mptcp_ipv6.c
@ -0,0 +1,475 @@
+/*
+ *	MPTCP implementation - IPv6-specific functions
+ *
+ *	Initial Design & Implementation:
+ *	Sébastien Barré <sebastien.barre@uclouvain.be>
+ *
+ *	Current Maintainer:
+ *	Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
+ *
+ *	Additional authors:
+ *	Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
+ *	Gregory Detal <gregory.detal@uclouvain.be>
+ *	Fabien Duchêne <fabien.duchene@uclouvain.be>
+ *	Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
+ *	Lavkesh Lahngir <lavkesh51@gmail.com>
+ *	Andreas Ripke <ripke@neclab.eu>
+ *	Vlad Dogaru <vlad.dogaru@intel.com>
+ *	Octavian Purdila <octavian.purdila@intel.com>
+ *	John Ronan <jronan@tssg.org>
+ *	Catalin Nicutar <catalin.nicutar@gmail.com>
+ *	Brandon Heller <brandonh@stanford.edu>
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/in6.h>
+#include <linux/kernel.h>
+
+#include <net/addrconf.h>
+#include <net/flow.h>
+#include <net/inet6_connection_sock.h>
+#include <net/inet6_hashtables.h>
+#include <net/inet_common.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/mptcp.h>
+#include <net/mptcp_v6.h>
+#include <net/tcp.h>
+#include <net/transp_v6.h>
+
+__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr,
+			 __be16 sport, __be16 dport)
+{
+	const struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		u32 seed;
+		__be16 sport;
+		__be16 dport;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *(struct in6_addr *)saddr,
+		.daddr = *(struct in6_addr *)daddr,
+		.seed = mptcp_seed++,
+		.sport = sport,
+		.dport = dport
+	};
+
+	return siphash(&combined, offsetofend(typeof(combined), dport),
+		       &mptcp_secret);
+}
+
+u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr,
+		     __be16 sport, __be16 dport, u32 seed)
+{
+	const struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		u32 seed;
+		__be16 sport;
+		__be16 dport;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *(struct in6_addr *)saddr,
+		.daddr = *(struct in6_addr *)daddr,
+		.seed = seed,
+		.sport = sport,
+		.dport = dport
+	};
+
+	return siphash(&combined, offsetofend(typeof(combined), dport),
+		       &mptcp_secret);
+}
+
+static void mptcp_v6_reqsk_destructor(struct request_sock *req)
+{
+	mptcp_reqsk_destructor(req);
+
+	tcp_v6_reqsk_destructor(req);
+}
+
+static int mptcp_v6_init_req(struct request_sock *req, const struct sock *sk,
+			     struct sk_buff *skb, bool want_cookie)
+{
+	tcp_request_sock_ipv6_ops.init_req(req, sk, skb, want_cookie);
+
+	mptcp_rsk(req)->hash_entry.pprev = NULL;
+	mptcp_rsk(req)->is_sub = 0;
+	inet_rsk(req)->mptcp_rqsk = 1;
+
+	/* In case of SYN-cookies, we wait for the isn to be generated - it is
+	 * input to the key-generation.
+	 */
+	if (!want_cookie)
+		mptcp_reqsk_init(req, sk, skb, false);
+
+	return 0;
+}
+
+#ifdef CONFIG_SYN_COOKIES
+static u32 mptcp_v6_cookie_init_seq(struct request_sock *req, const struct sock *sk,
+				    const struct sk_buff *skb, __u16 *mssp)
+{
+	__u32 isn = cookie_v6_init_sequence(req, sk, skb, mssp);
+
+	tcp_rsk(req)->snt_isn = isn;
+
+	mptcp_reqsk_init(req, sk, skb, true);
+
+	return isn;
+}
+#endif
+
+/* May be called without holding the meta-level lock */
+static int mptcp_v6_join_init_req(struct request_sock *req, const struct sock *meta_sk,
+				  struct sk_buff *skb, bool want_cookie)
+{
+	struct mptcp_request_sock *mtreq = mptcp_rsk(req);
+	const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	union inet_addr addr;
+	int loc_id;
+	bool low_prio = false;
+
+	/* We need to do this as early as possible. Because, if we fail later
+	 * (e.g., get_local_id), then reqsk_free tries to remove the
+	 * request-socket from the htb in mptcp_hash_request_remove as pprev
+	 * may be different from NULL.
+	 */
+	mtreq->hash_entry.pprev = NULL;
+
+	tcp_request_sock_ipv6_ops.init_req(req, meta_sk, skb, want_cookie);
+
+	mtreq->mptcp_loc_nonce = mptcp_v6_get_nonce(ipv6_hdr(skb)->saddr.s6_addr32,
+						    ipv6_hdr(skb)->daddr.s6_addr32,
+						    tcp_hdr(skb)->source,
+						    tcp_hdr(skb)->dest);
+	addr.in6 = inet_rsk(req)->ir_v6_loc_addr;
+	loc_id = mpcb->pm_ops->get_local_id(meta_sk, AF_INET6, &addr, &low_prio);
+	if (loc_id == -1)
+		return -1;
+	mtreq->loc_id = loc_id;
+	mtreq->low_prio = low_prio;
+
+	mptcp_join_reqsk_init(mpcb, req, skb);
+
+	return 0;
+}
+
+/* Similar to tcp6_request_sock_ops */
+struct request_sock_ops mptcp6_request_sock_ops __read_mostly = {
+	.family		=	AF_INET6,
+	.obj_size	=	sizeof(struct mptcp_request_sock),
+	.rtx_syn_ack	=	tcp_rtx_synack,
+	.send_ack	=	tcp_v6_reqsk_send_ack,
+	.destructor	=	mptcp_v6_reqsk_destructor,
+	.send_reset	=	tcp_v6_send_reset,
+	.syn_ack_timeout =	tcp_syn_ack_timeout,
+};
+
+/* Similar to: tcp_v6_conn_request
+ * May be called without holding the meta-level lock
+ */
+static int mptcp_v6_join_request(struct sock *meta_sk, struct sk_buff *skb)
+{
+	return tcp_conn_request(&mptcp6_request_sock_ops,
+				&mptcp_join_request_sock_ipv6_ops,
+				meta_sk, skb);
+}
+
+int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
+{
+	const struct tcphdr *th = tcp_hdr(skb);
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct sock *child, *rsk = NULL, *sk;
+	int ret;
+
+	sk = __inet6_lookup_established(sock_net(meta_sk),
+					&tcp_hashinfo,
+					&ip6h->saddr, th->source,
+					&ip6h->daddr, ntohs(th->dest),
+					tcp_v6_iif(skb), tcp_v6_sdif(skb));
+
+	if (!sk)
+		goto new_subflow;
+
+	if (is_meta_sk(sk)) {
+		WARN("%s Did not find a sub-sk - did found the meta!\n", __func__);
+		sock_put(sk);
+		goto discard;
+	}
+
+	if (sk->sk_state == TCP_TIME_WAIT) {
+		inet_twsk_put(inet_twsk(sk));
+		goto discard;
+	}
+
+	if (sk->sk_state == TCP_NEW_SYN_RECV) {
+		struct request_sock *req = inet_reqsk(sk);
+		bool req_stolen;
+
+		if (!mptcp_can_new_subflow(meta_sk))
+			goto reset_and_discard;
+
+		local_bh_disable();
+		child = tcp_check_req(meta_sk, skb, req, false, &req_stolen);
+		if (!child) {
+			reqsk_put(req);
+			local_bh_enable();
+			goto discard;
+		}
+
+		if (child != meta_sk) {
+			ret = mptcp_finish_handshake(child, skb);
+			if (ret) {
+				rsk = child;
+				local_bh_enable();
+				goto reset_and_discard;
+			}
+
+			local_bh_enable();
+			return 0;
+		}
+
+		/* tcp_check_req failed */
+		reqsk_put(req);
+
+		local_bh_enable();
+		goto discard;
+	}
+
+	ret = tcp_v6_do_rcv(sk, skb);
+	sock_put(sk);
+
+	return ret;
+
+new_subflow:
+	if (!mptcp_can_new_subflow(meta_sk))
+		goto reset_and_discard;
+
+	child = tcp_v6_cookie_check(meta_sk, skb);
+	if (!child)
+		goto discard;
+
+	if (child != meta_sk) {
+		ret = mptcp_finish_handshake(child, skb);
+		if (ret) {
+			rsk = child;
+			goto reset_and_discard;
+		}
+	}
+
+	if (tcp_hdr(skb)->syn) {
+		local_bh_disable();
+		mptcp_v6_join_request(meta_sk, skb);
+		local_bh_enable();
+	}
+
+discard:
+	kfree_skb(skb);
+	return 0;
+
+reset_and_discard:
+	tcp_v6_send_reset(rsk, skb);
+	goto discard;
+}
+
+/* Create a new IPv6 subflow.
+ *
+ * We are in user-context and meta-sock-lock is hold.
+ */
+int __mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc,
+			     __be16 sport, struct mptcp_rem6 *rem,
+			     struct sock **subsk)
+{
+	struct tcp_sock *tp;
+	struct sock *sk;
+	struct sockaddr_in6 loc_in, rem_in;
+	struct socket_alloc sock_full;
+	struct socket *sock = (struct socket *)&sock_full;
+	int ret;
+
+	/** First, create and prepare the new socket */
+	memcpy(&sock_full, meta_sk->sk_socket, sizeof(sock_full));
+	sock->state = SS_UNCONNECTED;
+	sock->ops = NULL;
+
+	ret = inet6_create(sock_net(meta_sk), sock, IPPROTO_TCP, 1);
+	if (unlikely(ret < 0)) {
+		net_err_ratelimited("%s inet6_create failed ret: %d\n",
+				    __func__, ret);
+		return ret;
+	}
+
+	sk = sock->sk;
+	tp = tcp_sk(sk);
+
+	/* All subsockets need the MPTCP-lock-class */
+	lockdep_set_class_and_name(&(sk)->sk_lock.slock, &meta_slock_key, meta_slock_key_name);
+	lockdep_init_map(&(sk)->sk_lock.dep_map, meta_key_name, &meta_key, 0);
+
+	ret = mptcp_add_sock(meta_sk, sk, loc->loc6_id, rem->rem6_id, GFP_KERNEL);
+	if (ret) {
+		net_err_ratelimited("%s mptcp_add_sock failed ret: %d\n",
+				    __func__, ret);
+		goto error;
+	}
+
+	tp->mptcp->slave_sk = 1;
+	tp->mptcp->low_prio = loc->low_prio;
+
+	/* Initializing the timer for an MPTCP subflow */
+	timer_setup(&tp->mptcp->mptcp_ack_timer, mptcp_ack_handler, 0);
+
+	/** Then, connect the socket to the peer */
+	loc_in.sin6_family = AF_INET6;
+	rem_in.sin6_family = AF_INET6;
+	loc_in.sin6_port = sport;
+	if (rem->port)
+		rem_in.sin6_port = rem->port;
+	else
+		rem_in.sin6_port = inet_sk(meta_sk)->inet_dport;
+	loc_in.sin6_addr = loc->addr;
+	rem_in.sin6_addr = rem->addr;
+
+	if (loc->if_idx)
+		sk->sk_bound_dev_if = loc->if_idx;
+
+	ret = kernel_bind(sock, (struct sockaddr *)&loc_in,
+			  sizeof(struct sockaddr_in6));
+	if (ret < 0) {
+		net_err_ratelimited("%s: token %#x bind() to %pI6 index %d failed, error %d\n",
+				    __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
+				    &loc_in.sin6_addr, loc->if_idx, ret);
+		goto error;
+	}
+
+	mptcp_debug("%s: token %#x pi %d src_addr:%pI6:%d dst_addr:%pI6:%d ifidx: %u\n",
+		    __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
+		    tp->mptcp->path_index, &loc_in.sin6_addr,
+		    ntohs(loc_in.sin6_port), &rem_in.sin6_addr,
+		    ntohs(rem_in.sin6_port), loc->if_idx);
+
+	if (tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v6)
+		tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v6(sk, rem->addr);
+
+	ret = kernel_connect(sock, (struct sockaddr *)&rem_in,
+			     sizeof(struct sockaddr_in6), O_NONBLOCK);
+	if (ret < 0 && ret != -EINPROGRESS) {
+		net_err_ratelimited("%s: MPTCP subsocket connect() failed, error %d\n",
+				    __func__, ret);
+		goto error;
+	}
+
+	MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINSYNTX);
+
+	sk_set_socket(sk, meta_sk->sk_socket);
+	sk->sk_wq = meta_sk->sk_wq;
+
+	if (subsk)
+		*subsk = sk;
+
+	return 0;
+
+error:
+	/* May happen if mptcp_add_sock fails first */
+	if (!mptcp(tp)) {
+		tcp_close(sk, 0);
+	} else {
+		local_bh_disable();
+		mptcp_sub_force_close(sk);
+		local_bh_enable();
+	}
+	return ret;
+}
+EXPORT_SYMBOL(__mptcp_init6_subsockets);
+
+const struct inet_connection_sock_af_ops mptcp_v6_specific = {
+	.queue_xmit	   = inet6_csk_xmit,
+	.send_check	   = tcp_v6_send_check,
+	.rebuild_header	   = inet6_sk_rebuild_header,
+	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
+	.conn_request	   = mptcp_conn_request,
+	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
+	.net_header_len	   = sizeof(struct ipv6hdr),
+	.net_frag_header_len = sizeof(struct frag_hdr),
+	.setsockopt	   = ipv6_setsockopt,
+	.getsockopt	   = ipv6_getsockopt,
+	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
+	.sockaddr_len	   = sizeof(struct sockaddr_in6),
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_ipv6_setsockopt,
+	.compat_getsockopt = compat_ipv6_getsockopt,
+#endif
+	.mtu_reduced	   = tcp_v6_mtu_reduced,
+};
+
+const struct inet_connection_sock_af_ops mptcp_v6_mapped = {
+	.queue_xmit	   = ip_queue_xmit,
+	.send_check	   = tcp_v4_send_check,
+	.rebuild_header	   = inet_sk_rebuild_header,
+	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
+	.conn_request	   = mptcp_conn_request,
+	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
+	.net_header_len	   = sizeof(struct iphdr),
+	.setsockopt	   = ipv6_setsockopt,
+	.getsockopt	   = ipv6_getsockopt,
+	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
+	.sockaddr_len	   = sizeof(struct sockaddr_in6),
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_ipv6_setsockopt,
+	.compat_getsockopt = compat_ipv6_getsockopt,
+#endif
+	.mtu_reduced	   = tcp_v4_mtu_reduced,
+};
+
+struct tcp_request_sock_ops mptcp_request_sock_ipv6_ops;
+struct tcp_request_sock_ops mptcp_join_request_sock_ipv6_ops;
+
+int mptcp_pm_v6_init(void)
+{
+	int ret = 0;
+	struct request_sock_ops *ops = &mptcp6_request_sock_ops;
+
+	mptcp_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
+	mptcp_request_sock_ipv6_ops.init_req = mptcp_v6_init_req;
+#ifdef CONFIG_SYN_COOKIES
+	mptcp_request_sock_ipv6_ops.cookie_init_seq = mptcp_v6_cookie_init_seq;
+#endif
+
+	mptcp_join_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
+	mptcp_join_request_sock_ipv6_ops.init_req = mptcp_v6_join_init_req;
+
+	ops->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", "MPTCP6");
+	if (ops->slab_name == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ops->slab = kmem_cache_create(ops->slab_name, ops->obj_size, 0,
+				      SLAB_TYPESAFE_BY_RCU|SLAB_HWCACHE_ALIGN,
+				      NULL);
+
+	if (ops->slab == NULL) {
+		ret =  -ENOMEM;
+		goto err_reqsk_create;
+	}
+
+out:
+	return ret;
+
+err_reqsk_create:
+	kfree(ops->slab_name);
+	ops->slab_name = NULL;
+	goto out;
+}
+
+void mptcp_pm_v6_undo(void)
+{
+	kmem_cache_destroy(mptcp6_request_sock_ops.slab);
+	kfree(mptcp6_request_sock_ops.slab_name);
+}
--- a/net/mptcp/mptcp_ndiffports.c
+++ b/net/mptcp/mptcp_ndiffports.c
@ -0,0 +1,174 @@
+#include <linux/module.h>
+
+#include <net/mptcp.h>
+#include <net/mptcp_v4.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/mptcp_v6.h>
+#endif
+
+struct ndiffports_priv {
+	/* Worker struct for subflow establishment */
+	struct work_struct subflow_work;
+
+	struct mptcp_cb *mpcb;
+};
+
+static int num_subflows __read_mostly = 2;
+module_param(num_subflows, int, 0644);
+MODULE_PARM_DESC(num_subflows, "choose the number of subflows per MPTCP connection");
+
+/**
+ * Create all new subflows, by doing calls to mptcp_initX_subsockets
+ *
+ * This function uses a goto next_subflow, to allow releasing the lock between
+ * new subflows and giving other processes a chance to do some work on the
+ * socket and potentially finishing the communication.
+ **/
+static void create_subflow_worker(struct work_struct *work)
+{
+	const struct ndiffports_priv *pm_priv = container_of(work,
+						     struct ndiffports_priv,
+						     subflow_work);
+	struct mptcp_cb *mpcb = pm_priv->mpcb;
+	struct sock *meta_sk = mpcb->meta_sk;
+	int iter = 0;
+
+next_subflow:
+	if (iter) {
+		release_sock(meta_sk);
+		mutex_unlock(&mpcb->mpcb_mutex);
+
+		cond_resched();
+	}
+	mutex_lock(&mpcb->mpcb_mutex);
+	lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
+
+	if (!mptcp(tcp_sk(meta_sk)))
+		goto exit;
+
+	iter++;
+
+	if (sock_flag(meta_sk, SOCK_DEAD))
+		goto exit;
+
+	if (mpcb->master_sk &&
+	    !tcp_sk(mpcb->master_sk)->mptcp->fully_established)
+		goto exit;
+
+	if (num_subflows > iter && num_subflows > mptcp_subflow_count(mpcb)) {
+		if (meta_sk->sk_family == AF_INET ||
+		    mptcp_v6_is_v4_mapped(meta_sk)) {
+			struct mptcp_loc4 loc;
+			struct mptcp_rem4 rem;
+
+			loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr;
+			loc.loc4_id = 0;
+			loc.low_prio = 0;
+			if (mpcb->master_sk)
+				loc.if_idx = mpcb->master_sk->sk_bound_dev_if;
+			else
+				loc.if_idx = 0;
+
+			rem.addr.s_addr = inet_sk(meta_sk)->inet_daddr;
+			rem.port = inet_sk(meta_sk)->inet_dport;
+			rem.rem4_id = 0; /* Default 0 */
+
+			mptcp_init4_subsockets(meta_sk, &loc, &rem);
+		} else {
+#if IS_ENABLED(CONFIG_IPV6)
+			struct mptcp_loc6 loc;
+			struct mptcp_rem6 rem;
+
+			loc.addr = inet6_sk(meta_sk)->saddr;
+			loc.loc6_id = 0;
+			loc.low_prio = 0;
+			if (mpcb->master_sk)
+				loc.if_idx = mpcb->master_sk->sk_bound_dev_if;
+			else
+				loc.if_idx = 0;
+
+			rem.addr = meta_sk->sk_v6_daddr;
+			rem.port = inet_sk(meta_sk)->inet_dport;
+			rem.rem6_id = 0; /* Default 0 */
+
+			mptcp_init6_subsockets(meta_sk, &loc, &rem);
+#endif
+		}
+		goto next_subflow;
+	}
+
+exit:
+	release_sock(meta_sk);
+	mutex_unlock(&mpcb->mpcb_mutex);
+	mptcp_mpcb_put(mpcb);
+	sock_put(meta_sk);
+}
+
+static void ndiffports_new_session(const struct sock *meta_sk)
+{
+	struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct ndiffports_priv *fmp = (struct ndiffports_priv *)&mpcb->mptcp_pm[0];
+
+	/* Initialize workqueue-struct */
+	INIT_WORK(&fmp->subflow_work, create_subflow_worker);
+	fmp->mpcb = mpcb;
+}
+
+static void ndiffports_create_subflows(struct sock *meta_sk)
+{
+	struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct ndiffports_priv *pm_priv = (struct ndiffports_priv *)&mpcb->mptcp_pm[0];
+
+	if (mptcp_in_infinite_mapping_weak(mpcb) ||
+	    mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
+		return;
+
+	if (!work_pending(&pm_priv->subflow_work)) {
+		sock_hold(meta_sk);
+		refcount_inc(&mpcb->mpcb_refcnt);
+		queue_work(mptcp_wq, &pm_priv->subflow_work);
+	}
+}
+
+static int ndiffports_get_local_id(const struct sock *meta_sk,
+				   sa_family_t family, union inet_addr *addr,
+				   bool *low_prio)
+{
+	return 0;
+}
+
+static struct mptcp_pm_ops ndiffports __read_mostly = {
+	.new_session = ndiffports_new_session,
+	.fully_established = ndiffports_create_subflows,
+	.get_local_id = ndiffports_get_local_id,
+	.name = "ndiffports",
+	.owner = THIS_MODULE,
+};
+
+/* General initialization of MPTCP_PM */
+static int __init ndiffports_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct ndiffports_priv) > MPTCP_PM_SIZE);
+
+	if (mptcp_register_path_manager(&ndiffports))
+		goto exit;
+
+	return 0;
+
+exit:
+	return -1;
+}
+
+static void ndiffports_unregister(void)
+{
+	mptcp_unregister_path_manager(&ndiffports);
+}
+
+module_init(ndiffports_register);
+module_exit(ndiffports_unregister);
+
+MODULE_AUTHOR("Christoph Paasch");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("NDIFF-PORTS MPTCP");
+MODULE_VERSION("0.88");
--- a/net/mptcp/mptcp_netlink.c
+++ b/net/mptcp/mptcp_netlink.c
--- a/net/mptcp/mptcp_olia.c
+++ b/net/mptcp/mptcp_olia.c
@ -0,0 +1,318 @@
+/*
+ * MPTCP implementation - OPPORTUNISTIC LINKED INCREASES CONGESTION CONTROL:
+ *
+ * Algorithm design:
+ * Ramin Khalili <ramin.khalili@epfl.ch>
+ * Nicolas Gast <nicolas.gast@epfl.ch>
+ * Jean-Yves Le Boudec <jean-yves.leboudec@epfl.ch>
+ *
+ * Implementation:
+ * Ramin Khalili <ramin.khalili@epfl.ch>
+ *
+ * Ported to the official MPTCP-kernel:
+ * Christoph Paasch <christoph.paasch@uclouvain.be>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <net/tcp.h>
+#include <net/mptcp.h>
+
+#include <linux/module.h>
+
+static int scale = 10;
+
+struct mptcp_olia {
+	u32	mptcp_loss1;
+	u32	mptcp_loss2;
+	u32	mptcp_loss3;
+	int	epsilon_num;
+	u32	epsilon_den;
+	int	mptcp_snd_cwnd_cnt;
+};
+
+static inline int mptcp_olia_sk_can_send(const struct sock *sk)
+{
+	return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us;
+}
+
+static inline u64 mptcp_olia_scale(u64 val, int scale)
+{
+	return (u64) val << scale;
+}
+
+/* take care of artificially inflate (see RFC5681)
+ * of cwnd during fast-retransmit phase
+ */
+static u32 mptcp_get_crt_cwnd(struct sock *sk)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (icsk->icsk_ca_state == TCP_CA_Recovery)
+		return tcp_sk(sk)->snd_ssthresh;
+	else
+		return tcp_sk(sk)->snd_cwnd;
+}
+
+/* return the dominator of the first term of  the increasing term */
+static u64 mptcp_get_rate(const struct mptcp_cb *mpcb , u32 path_rtt)
+{
+	struct mptcp_tcp_sock *mptcp;
+	u64 rate = 1; /* We have to avoid a zero-rate because it is used as a divisor */
+
+	mptcp_for_each_sub(mpcb, mptcp) {
+		struct sock *sk = mptcp_to_sock(mptcp);
+		struct tcp_sock *tp = tcp_sk(sk);
+		u64 scaled_num;
+		u32 tmp_cwnd;
+
+		if (!mptcp_olia_sk_can_send(sk))
+			continue;
+
+		tmp_cwnd = mptcp_get_crt_cwnd(sk);
+		scaled_num = mptcp_olia_scale(tmp_cwnd, scale) * path_rtt;
+		rate += div_u64(scaled_num , tp->srtt_us);
+	}
+	rate *= rate;
+	return rate;
+}
+
+/* find the maximum cwnd, used to find set M */
+static u32 mptcp_get_max_cwnd(const struct mptcp_cb *mpcb)
+{
+	struct mptcp_tcp_sock *mptcp;
+	u32 best_cwnd = 0;
+
+	mptcp_for_each_sub(mpcb, mptcp) {
+		struct sock *sk = mptcp_to_sock(mptcp);
+		u32 tmp_cwnd;
+
+		if (!mptcp_olia_sk_can_send(sk))
+			continue;
+
+		tmp_cwnd = mptcp_get_crt_cwnd(sk);
+		if (tmp_cwnd > best_cwnd)
+			best_cwnd = tmp_cwnd;
+	}
+	return best_cwnd;
+}
+
+static void mptcp_get_epsilon(const struct mptcp_cb *mpcb)
+{
+	struct mptcp_tcp_sock *mptcp;
+	struct mptcp_olia *ca;
+	struct tcp_sock *tp;
+	struct sock *sk;
+	u64 tmp_int, tmp_rtt, best_int = 0, best_rtt = 1;
+	u32 max_cwnd, tmp_cwnd, established_cnt = 0;
+	u8 M = 0, B_not_M = 0;
+
+	/* TODO - integrate this in the following loop - we just want to iterate once */
+
+	max_cwnd = mptcp_get_max_cwnd(mpcb);
+
+	/* find the best path */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		sk = mptcp_to_sock(mptcp);
+		tp = tcp_sk(sk);
+		ca = inet_csk_ca(sk);
+
+		if (!mptcp_olia_sk_can_send(sk))
+			continue;
+
+		established_cnt++;
+
+		tmp_rtt = (u64)tp->srtt_us * tp->srtt_us;
+		/* TODO - check here and rename variables */
+		tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
+			      ca->mptcp_loss2 - ca->mptcp_loss1);
+
+		if ((u64)tmp_int * best_rtt >= (u64)best_int * tmp_rtt) {
+			best_rtt = tmp_rtt;
+			best_int = tmp_int;
+		}
+	}
+
+	/* TODO - integrate this here in mptcp_get_max_cwnd and in the previous loop */
+	/* find the size of M and B_not_M */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		sk = mptcp_to_sock(mptcp);
+		tp = tcp_sk(sk);
+		ca = inet_csk_ca(sk);
+
+		if (!mptcp_olia_sk_can_send(sk))
+			continue;
+
+		tmp_cwnd = mptcp_get_crt_cwnd(sk);
+		if (tmp_cwnd == max_cwnd) {
+			M++;
+		} else {
+			tmp_rtt = (u64)tp->srtt_us * tp->srtt_us;
+			tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
+				      ca->mptcp_loss2 - ca->mptcp_loss1);
+
+			if ((u64)tmp_int * best_rtt == (u64)best_int * tmp_rtt)
+				B_not_M++;
+		}
+	}
+
+	/* check if the path is in M or B_not_M and set the value of epsilon accordingly */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		sk = mptcp_to_sock(mptcp);
+		tp = tcp_sk(sk);
+		ca = inet_csk_ca(sk);
+
+		if (!mptcp_olia_sk_can_send(sk))
+			continue;
+
+		if (B_not_M == 0) {
+			ca->epsilon_num = 0;
+			ca->epsilon_den = 1;
+		} else {
+			tmp_rtt = (u64)tp->srtt_us * tp->srtt_us;
+			tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
+				      ca->mptcp_loss2 - ca->mptcp_loss1);
+			tmp_cwnd = mptcp_get_crt_cwnd(sk);
+
+			if (tmp_cwnd < max_cwnd &&
+			    (u64)tmp_int * best_rtt == (u64)best_int * tmp_rtt) {
+				ca->epsilon_num = 1;
+				ca->epsilon_den = established_cnt * B_not_M;
+			} else if (tmp_cwnd == max_cwnd) {
+				ca->epsilon_num = -1;
+				ca->epsilon_den = established_cnt * M;
+			} else {
+				ca->epsilon_num = 0;
+				ca->epsilon_den = 1;
+			}
+		}
+	}
+}
+
+/* setting the initial values */
+static void mptcp_olia_init(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct mptcp_olia *ca = inet_csk_ca(sk);
+
+	if (mptcp(tp)) {
+		ca->mptcp_loss1 = tp->snd_una;
+		ca->mptcp_loss2 = tp->snd_una;
+		ca->mptcp_loss3 = tp->snd_una;
+		ca->mptcp_snd_cwnd_cnt = 0;
+		ca->epsilon_num = 0;
+		ca->epsilon_den = 1;
+	}
+}
+
+/* updating inter-loss distance and ssthresh */
+static void mptcp_olia_set_state(struct sock *sk, u8 new_state)
+{
+	if (!mptcp(tcp_sk(sk)))
+		return;
+
+	if (new_state == TCP_CA_Loss ||
+	    new_state == TCP_CA_Recovery || new_state == TCP_CA_CWR) {
+		struct mptcp_olia *ca = inet_csk_ca(sk);
+
+		if (ca->mptcp_loss3 != ca->mptcp_loss2 &&
+		    !inet_csk(sk)->icsk_retransmits) {
+			ca->mptcp_loss1 = ca->mptcp_loss2;
+			ca->mptcp_loss2 = ca->mptcp_loss3;
+		}
+	}
+}
+
+/* main algorithm */
+static void mptcp_olia_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct mptcp_olia *ca = inet_csk_ca(sk);
+	const struct mptcp_cb *mpcb = tp->mpcb;
+
+	u64 inc_num, inc_den, rate, cwnd_scaled;
+
+	if (!mptcp(tp)) {
+		tcp_reno_cong_avoid(sk, ack, acked);
+		return;
+	}
+
+	ca->mptcp_loss3 = tp->snd_una;
+
+	if (!tcp_is_cwnd_limited(sk))
+		return;
+
+	/* slow start if it is in the safe area */
+	if (tcp_in_slow_start(tp)) {
+		tcp_slow_start(tp, acked);
+		return;
+	}
+
+	mptcp_get_epsilon(mpcb);
+	rate = mptcp_get_rate(mpcb, tp->srtt_us);
+	cwnd_scaled = mptcp_olia_scale(tp->snd_cwnd, scale);
+	inc_den = ca->epsilon_den * tp->snd_cwnd * rate ? : 1;
+
+	/* calculate the increasing term, scaling is used to reduce the rounding effect */
+	if (ca->epsilon_num == -1) {
+		if (ca->epsilon_den * cwnd_scaled * cwnd_scaled < rate) {
+			inc_num = rate - ca->epsilon_den *
+				cwnd_scaled * cwnd_scaled;
+			ca->mptcp_snd_cwnd_cnt -= div64_u64(
+			    mptcp_olia_scale(inc_num , scale) , inc_den);
+		} else {
+			inc_num = ca->epsilon_den *
+			    cwnd_scaled * cwnd_scaled - rate;
+			ca->mptcp_snd_cwnd_cnt += div64_u64(
+			    mptcp_olia_scale(inc_num , scale) , inc_den);
+		}
+	} else {
+		inc_num = ca->epsilon_num * rate +
+		    ca->epsilon_den * cwnd_scaled * cwnd_scaled;
+		ca->mptcp_snd_cwnd_cnt += div64_u64(
+		    mptcp_olia_scale(inc_num , scale) , inc_den);
+	}
+
+
+	if (ca->mptcp_snd_cwnd_cnt >= (1 << scale) - 1) {
+		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+			tp->snd_cwnd++;
+		ca->mptcp_snd_cwnd_cnt = 0;
+	} else if (ca->mptcp_snd_cwnd_cnt <= 0 - (1 << scale) + 1) {
+		tp->snd_cwnd = max((int) 1 , (int) tp->snd_cwnd - 1);
+		ca->mptcp_snd_cwnd_cnt = 0;
+	}
+}
+
+static struct tcp_congestion_ops mptcp_olia = {
+	.init		= mptcp_olia_init,
+	.ssthresh	= tcp_reno_ssthresh,
+	.cong_avoid	= mptcp_olia_cong_avoid,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
+	.set_state	= mptcp_olia_set_state,
+	.owner		= THIS_MODULE,
+	.name		= "olia",
+};
+
+static int __init mptcp_olia_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct mptcp_olia) > ICSK_CA_PRIV_SIZE);
+	return tcp_register_congestion_control(&mptcp_olia);
+}
+
+static void __exit mptcp_olia_unregister(void)
+{
+	tcp_unregister_congestion_control(&mptcp_olia);
+}
+
+module_init(mptcp_olia_register);
+module_exit(mptcp_olia_unregister);
+
+MODULE_AUTHOR("Ramin Khalili, Nicolas Gast, Jean-Yves Le Boudec");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MPTCP COUPLED CONGESTION CONTROL");
+MODULE_VERSION("0.1");
--- a/net/mptcp/mptcp_output.c
+++ b/net/mptcp/mptcp_output.c
--- a/net/mptcp/mptcp_pm.c
+++ b/net/mptcp/mptcp_pm.c
@ -0,0 +1,226 @@
+/*
+ *     MPTCP implementation - MPTCP-subflow-management
+ *
+ *     Initial Design & Implementation:
+ *     Sébastien Barré <sebastien.barre@uclouvain.be>
+ *
+ *     Current Maintainer & Author:
+ *     Christoph Paasch <christoph.paasch@uclouvain.be>
+ *
+ *     Additional authors:
+ *     Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
+ *     Gregory Detal <gregory.detal@uclouvain.be>
+ *     Fabien Duchêne <fabien.duchene@uclouvain.be>
+ *     Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
+ *     Lavkesh Lahngir <lavkesh51@gmail.com>
+ *     Andreas Ripke <ripke@neclab.eu>
+ *     Vlad Dogaru <vlad.dogaru@intel.com>
+ *     Octavian Purdila <octavian.purdila@intel.com>
+ *     John Ronan <jronan@tssg.org>
+ *     Catalin Nicutar <catalin.nicutar@gmail.com>
+ *     Brandon Heller <brandonh@stanford.edu>
+ *
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/module.h>
+#include <net/mptcp.h>
+
+static DEFINE_SPINLOCK(mptcp_pm_list_lock);
+static LIST_HEAD(mptcp_pm_list);
+
+static int mptcp_default_id(const struct sock *meta_sk, sa_family_t family,
+			    union inet_addr *addr, bool *low_prio)
+{
+	return 0;
+}
+
+struct mptcp_pm_ops mptcp_pm_default = {
+	.get_local_id = mptcp_default_id, /* We do not care */
+	.name = "default",
+	.owner = THIS_MODULE,
+};
+
+static struct mptcp_pm_ops *mptcp_pm_find(const char *name)
+{
+	struct mptcp_pm_ops *e;
+
+	list_for_each_entry_rcu(e, &mptcp_pm_list, list) {
+		if (strcmp(e->name, name) == 0)
+			return e;
+	}
+
+	return NULL;
+}
+
+int mptcp_register_path_manager(struct mptcp_pm_ops *pm)
+{
+	int ret = 0;
+
+	if (!pm->get_local_id)
+		return -EINVAL;
+
+	spin_lock(&mptcp_pm_list_lock);
+	if (mptcp_pm_find(pm->name)) {
+		pr_notice("%s already registered\n", pm->name);
+		ret = -EEXIST;
+	} else {
+		list_add_tail_rcu(&pm->list, &mptcp_pm_list);
+		pr_info("%s registered\n", pm->name);
+	}
+	spin_unlock(&mptcp_pm_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mptcp_register_path_manager);
+
+void mptcp_unregister_path_manager(struct mptcp_pm_ops *pm)
+{
+	spin_lock(&mptcp_pm_list_lock);
+	list_del_rcu(&pm->list);
+	spin_unlock(&mptcp_pm_list_lock);
+
+	/* Wait for outstanding readers to complete before the
+	 * module gets removed entirely.
+	 *
+	 * A try_module_get() should fail by now as our module is
+	 * in "going" state since no refs are held anymore and
+	 * module_exit() handler being called.
+	 */
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(mptcp_unregister_path_manager);
+
+void mptcp_get_default_path_manager(char *name)
+{
+	struct mptcp_pm_ops *pm;
+
+	BUG_ON(list_empty(&mptcp_pm_list));
+
+	rcu_read_lock();
+	pm = list_entry(mptcp_pm_list.next, struct mptcp_pm_ops, list);
+	strncpy(name, pm->name, MPTCP_PM_NAME_MAX);
+	rcu_read_unlock();
+}
+
+int mptcp_set_default_path_manager(const char *name)
+{
+	struct mptcp_pm_ops *pm;
+	int ret = -ENOENT;
+
+	spin_lock(&mptcp_pm_list_lock);
+	pm = mptcp_pm_find(name);
+#ifdef CONFIG_MODULES
+	if (!pm && capable(CAP_NET_ADMIN)) {
+		spin_unlock(&mptcp_pm_list_lock);
+
+		request_module("mptcp_%s", name);
+		spin_lock(&mptcp_pm_list_lock);
+		pm = mptcp_pm_find(name);
+	}
+#endif
+
+	if (pm) {
+		list_move(&pm->list, &mptcp_pm_list);
+		ret = 0;
+	} else {
+		pr_info("%s is not available\n", name);
+	}
+	spin_unlock(&mptcp_pm_list_lock);
+
+	return ret;
+}
+
+static struct mptcp_pm_ops *__mptcp_pm_find_autoload(const char *name)
+{
+	struct mptcp_pm_ops *pm = mptcp_pm_find(name);
+#ifdef CONFIG_MODULES
+	if (!pm && capable(CAP_NET_ADMIN)) {
+		rcu_read_unlock();
+		request_module("mptcp_%s", name);
+		rcu_read_lock();
+		pm = mptcp_pm_find(name);
+	}
+#endif
+	return pm;
+}
+
+void mptcp_init_path_manager(struct mptcp_cb *mpcb)
+{
+	struct mptcp_pm_ops *pm;
+	struct sock *meta_sk = mpcb->meta_sk;
+	struct tcp_sock *meta_tp = tcp_sk(meta_sk);
+
+	rcu_read_lock();
+	/* if path manager was set using socket option */
+	if (meta_tp->mptcp_pm_setsockopt) {
+		pm = __mptcp_pm_find_autoload(meta_tp->mptcp_pm_name);
+		if (pm && try_module_get(pm->owner)) {
+			mpcb->pm_ops = pm;
+			goto out;
+		}
+	}
+
+	list_for_each_entry_rcu(pm, &mptcp_pm_list, list) {
+		if (try_module_get(pm->owner)) {
+			mpcb->pm_ops = pm;
+			break;
+		}
+	}
+out:
+	rcu_read_unlock();
+}
+
+/* Change path manager for socket */
+int mptcp_set_path_manager(struct sock *sk, const char *name)
+{
+	struct mptcp_pm_ops *pm;
+	int err = 0;
+
+	rcu_read_lock();
+	pm = __mptcp_pm_find_autoload(name);
+
+	if (!pm) {
+		err = -ENOENT;
+	} else if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+		err = -EPERM;
+	} else {
+		strcpy(tcp_sk(sk)->mptcp_pm_name, name);
+		tcp_sk(sk)->mptcp_pm_setsockopt = 1;
+	}
+	rcu_read_unlock();
+
+	return err;
+}
+
+/* Manage refcounts on socket close. */
+void mptcp_cleanup_path_manager(struct mptcp_cb *mpcb)
+{
+	module_put(mpcb->pm_ops->owner);
+}
+
+/* Fallback to the default path-manager. */
+void mptcp_fallback_default(struct mptcp_cb *mpcb)
+{
+	struct mptcp_pm_ops *pm;
+
+	mptcp_cleanup_path_manager(mpcb);
+	pm = mptcp_pm_find("default");
+
+	/* Cannot fail - it's the default module */
+	try_module_get(pm->owner);
+	mpcb->pm_ops = pm;
+}
+EXPORT_SYMBOL_GPL(mptcp_fallback_default);
+
+/* Set default value from kernel configuration at bootup */
+static int __init mptcp_path_manager_default(void)
+{
+	return mptcp_set_default_path_manager("fullmesh");
+}
+late_initcall(mptcp_path_manager_default);
--- a/net/mptcp/mptcp_redundant.c
+++ b/net/mptcp/mptcp_redundant.c
@ -0,0 +1,389 @@
+/*
+ *	MPTCP Scheduler to reduce latency and jitter.
+ *
+ *	This scheduler sends all packets redundantly on all available subflows.
+ *
+ *	Initial Design & Implementation:
+ *	Tobias Erbshaeusser <erbshauesser@dvs.tu-darmstadt.de>
+ *	Alexander Froemmgen <froemmge@dvs.tu-darmstadt.de>
+ *
+ *	Initial corrections & modifications:
+ *	Christian Pinedo <christian.pinedo@ehu.eus>
+ *	Igor Lopez <igor.lopez@ehu.eus>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <net/mptcp.h>
+
+/* Struct to store the data of a single subflow */
+struct redsched_priv {
+	/* The skb or NULL */
+	struct sk_buff *skb;
+	/* End sequence number of the skb. This number should be checked
+	 * to be valid before the skb field is used
+	 */
+	u32 skb_end_seq;
+};
+
+/* Struct to store the data of the control block */
+struct redsched_cb {
+	/* The next subflow where a skb should be sent or NULL */
+	struct tcp_sock *next_subflow;
+};
+
+/* Returns the socket data from a given subflow socket */
+static struct redsched_priv *redsched_get_priv(struct tcp_sock *tp)
+{
+	return (struct redsched_priv *)&tp->mptcp->mptcp_sched[0];
+}
+
+/* Returns the control block data from a given meta socket */
+static struct redsched_cb *redsched_get_cb(struct tcp_sock *tp)
+{
+	return (struct redsched_cb *)&tp->mpcb->mptcp_sched[0];
+}
+
+static bool redsched_get_active_valid_sks(struct sock *meta_sk)
+{
+	struct tcp_sock *meta_tp = tcp_sk(meta_sk);
+	struct mptcp_cb *mpcb = meta_tp->mpcb;
+	struct mptcp_tcp_sock *mptcp;
+	int active_valid_sks = 0;
+
+	mptcp_for_each_sub(mpcb, mptcp) {
+		struct sock *sk = mptcp_to_sock(mptcp);
+
+		if (subflow_is_active((struct tcp_sock *)sk) &&
+		    !mptcp_is_def_unavailable(sk))
+			active_valid_sks++;
+	}
+
+	return active_valid_sks;
+}
+
+static bool redsched_use_subflow(struct sock *meta_sk,
+				 int active_valid_sks,
+				 struct tcp_sock *tp,
+				 struct sk_buff *skb)
+{
+	if (!skb || !mptcp_is_available((struct sock *)tp, skb, false))
+		return false;
+
+	if (TCP_SKB_CB(skb)->path_mask != 0)
+		return subflow_is_active(tp);
+
+	if (TCP_SKB_CB(skb)->path_mask == 0) {
+		if (active_valid_sks == -1)
+			active_valid_sks = redsched_get_active_valid_sks(meta_sk);
+
+		if (subflow_is_backup(tp) && active_valid_sks > 0)
+			return false;
+		else
+			return true;
+	}
+
+	return false;
+}
+
+#define mptcp_entry_next_rcu(__mptcp)						\
+	hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(			\
+		&(__mptcp)->node)), struct mptcp_tcp_sock, node)
+
+static void redsched_update_next_subflow(struct tcp_sock *tp,
+					 struct redsched_cb *red_cb)
+{
+	struct mptcp_tcp_sock *mptcp = mptcp_entry_next_rcu(tp->mptcp);
+
+	if (mptcp)
+		red_cb->next_subflow = mptcp->tp;
+	else
+		red_cb->next_subflow = NULL;
+}
+
+static struct sock *red_get_available_subflow(struct sock *meta_sk,
+					      struct sk_buff *skb,
+					      bool zero_wnd_test)
+{
+	struct tcp_sock *meta_tp = tcp_sk(meta_sk);
+	struct mptcp_cb *mpcb = meta_tp->mpcb;
+	struct redsched_cb *red_cb = redsched_get_cb(meta_tp);
+	struct tcp_sock *first_tp = red_cb->next_subflow, *tp;
+	struct mptcp_tcp_sock *mptcp;
+	int found = 0;
+
+	/* Answer data_fin on same subflow */
+	if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
+	    skb && mptcp_is_data_fin(skb)) {
+		mptcp_for_each_sub(mpcb, mptcp) {
+			struct sock *sk = mptcp_to_sock(mptcp);
+
+			if (tcp_sk(sk)->mptcp->path_index ==
+				mpcb->dfin_path_index &&
+			    mptcp_is_available(sk, skb, zero_wnd_test))
+				return sk;
+		}
+	}
+
+	if (!first_tp && !hlist_empty(&mpcb->conn_list)) {
+		first_tp = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&mpcb->conn_list)),
+					    struct mptcp_tcp_sock, node)->tp;
+	}
+	tp = first_tp;
+
+	/* still NULL (no subflow in conn_list?) */
+	if (!first_tp)
+		return NULL;
+
+	/* Search for a subflow to send it.
+	 *
+	 * We want to pick a subflow that is after 'first_tp' in the list of subflows.
+	 * Thus, the first mptcp_for_each_sub()-loop tries to walk the list up
+	 * to the subflow 'tp' and then checks whether any one of the remaining
+	 * ones is eligible to send.
+	 * The second mptcp_for_each-sub()-loop is then iterating from the
+	 * beginning of the list up to 'first_tp'.
+	 */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		/* We go up to the subflow 'tp' and start from there */
+		if (tp == mptcp->tp)
+			found = 1;
+
+		if (!found)
+			continue;
+		tp = mptcp->tp;
+
+		if (mptcp_is_available((struct sock *)tp, skb,
+				       zero_wnd_test)) {
+			redsched_update_next_subflow(tp, red_cb);
+			return (struct sock *)tp;
+		}
+	}
+
+	mptcp_for_each_sub(mpcb, mptcp) {
+		tp = mptcp->tp;
+
+		if (tp == first_tp)
+			break;
+
+		if (mptcp_is_available((struct sock *)tp, skb,
+				       zero_wnd_test)) {
+			redsched_update_next_subflow(tp, red_cb);
+			return (struct sock *)tp;
+		}
+	}
+
+	/* No space */
+	return NULL;
+}
+
+/* Corrects the stored skb pointers if they are invalid */
+static void redsched_correct_skb_pointers(struct sock *meta_sk,
+					  struct redsched_priv *red_p)
+{
+	struct tcp_sock *meta_tp = tcp_sk(meta_sk);
+
+	if (red_p->skb && !after(red_p->skb_end_seq, meta_tp->snd_una))
+		red_p->skb = NULL;
+}
+
+/* Returns the next skb from the queue */
+static struct sk_buff *redsched_next_skb_from_queue(struct sk_buff_head *queue,
+						    struct sk_buff *previous,
+						    struct sock *meta_sk)
+{
+	struct sk_buff *skb;
+
+	if (!previous)
+		return skb_peek(queue);
+
+	/* sk_data->skb stores the last scheduled packet for this subflow.
+	 * If sk_data->skb was scheduled but not sent (e.g., due to nagle),
+	 * we have to schedule it again.
+	 *
+	 * For the redundant scheduler, there are two cases:
+	 * 1. sk_data->skb was not sent on another subflow:
+	 *    we have to schedule it again to ensure that we do not
+	 *    skip this packet.
+	 * 2. sk_data->skb was already sent on another subflow:
+	 *    with regard to the redundant semantic, we have to
+	 *    schedule it again. However, we keep it simple and ignore it,
+	 *    as it was already sent by another subflow.
+	 *    This might be changed in the future.
+	 *
+	 * For case 1, send_head is equal previous, as only a single
+	 * packet can be skipped.
+	 */
+	if (tcp_send_head(meta_sk) == previous)
+		return tcp_send_head(meta_sk);
+
+	skb = skb_rb_next(previous);
+	if (skb)
+		return skb;
+
+	return tcp_send_head(meta_sk);
+}
+
+static struct sk_buff *mptcp_red_next_segment(struct sock *meta_sk,
+					      int *reinject,
+					      struct sock **subsk,
+					      unsigned int *limit)
+{
+	struct tcp_sock *meta_tp = tcp_sk(meta_sk);
+	struct mptcp_cb *mpcb = meta_tp->mpcb;
+	struct redsched_cb *red_cb = redsched_get_cb(meta_tp);
+	struct tcp_sock *first_tp = red_cb->next_subflow, *tp;
+	struct mptcp_tcp_sock *mptcp;
+	int active_valid_sks = -1;
+	struct sk_buff *skb;
+	int found = 0;
+
+	/* As we set it, we have to reset it as well. */
+	*limit = 0;
+
+	if (skb_queue_empty(&mpcb->reinject_queue) &&
+	    skb_queue_empty(&meta_sk->sk_write_queue))
+		/* Nothing to send */
+		return NULL;
+
+	/* First try reinjections */
+	skb = skb_peek(&mpcb->reinject_queue);
+	if (skb) {
+		*subsk = get_available_subflow(meta_sk, skb, false);
+		if (!*subsk)
+			return NULL;
+		*reinject = 1;
+		return skb;
+	}
+
+	/* Then try indistinctly redundant and normal skbs */
+
+	if (!first_tp && !hlist_empty(&mpcb->conn_list)) {
+		first_tp = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&mpcb->conn_list)),
+					    struct mptcp_tcp_sock, node)->tp;
+	}
+
+	/* still NULL (no subflow in conn_list?) */
+	if (!first_tp)
+		return NULL;
+
+	tp = first_tp;
+
+	*reinject = 0;
+	active_valid_sks = redsched_get_active_valid_sks(meta_sk);
+
+	/* We want to pick a subflow that is after 'first_tp' in the list of subflows.
+	 * Thus, the first mptcp_for_each_sub()-loop tries to walk the list up
+	 * to the subflow 'tp' and then checks whether any one of the remaining
+	 * ones can send a segment.
+	 * The second mptcp_for_each-sub()-loop is then iterating from the
+	 * beginning of the list up to 'first_tp'.
+	 */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		struct redsched_priv *red_p;
+
+		if (tp == mptcp->tp)
+			found = 1;
+
+		if (!found)
+			continue;
+
+		tp = mptcp->tp;
+
+		/* Correct the skb pointers of the current subflow */
+		red_p = redsched_get_priv(tp);
+		redsched_correct_skb_pointers(meta_sk, red_p);
+
+		skb = redsched_next_skb_from_queue(&meta_sk->sk_write_queue,
+						   red_p->skb, meta_sk);
+		if (skb && redsched_use_subflow(meta_sk, active_valid_sks, tp,
+						skb)) {
+			red_p->skb = skb;
+			red_p->skb_end_seq = TCP_SKB_CB(skb)->end_seq;
+			redsched_update_next_subflow(tp, red_cb);
+			*subsk = (struct sock *)tp;
+
+			if (TCP_SKB_CB(skb)->path_mask)
+				*reinject = -1;
+			return skb;
+		}
+	}
+
+	mptcp_for_each_sub(mpcb, mptcp) {
+		struct redsched_priv *red_p;
+
+		tp = mptcp->tp;
+
+		if (tp == first_tp)
+			break;
+
+		/* Correct the skb pointers of the current subflow */
+		red_p = redsched_get_priv(tp);
+		redsched_correct_skb_pointers(meta_sk, red_p);
+
+		skb = redsched_next_skb_from_queue(&meta_sk->sk_write_queue,
+						   red_p->skb, meta_sk);
+		if (skb && redsched_use_subflow(meta_sk, active_valid_sks, tp,
+						skb)) {
+			red_p->skb = skb;
+			red_p->skb_end_seq = TCP_SKB_CB(skb)->end_seq;
+			redsched_update_next_subflow(tp, red_cb);
+			*subsk = (struct sock *)tp;
+
+			if (TCP_SKB_CB(skb)->path_mask)
+				*reinject = -1;
+			return skb;
+		}
+	}
+
+	/* Nothing to send */
+	return NULL;
+}
+
+static void redsched_release(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct redsched_cb *red_cb = redsched_get_cb(tp);
+
+	/* Check if the next subflow would be the released one. If yes correct
+	 * the pointer
+	 */
+	if (red_cb->next_subflow == tp)
+		redsched_update_next_subflow(tp, red_cb);
+}
+
+static struct mptcp_sched_ops mptcp_sched_red = {
+	.get_subflow = red_get_available_subflow,
+	.next_segment = mptcp_red_next_segment,
+	.release = redsched_release,
+	.name = "redundant",
+	.owner = THIS_MODULE,
+};
+
+static int __init red_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct redsched_priv) > MPTCP_SCHED_SIZE);
+	BUILD_BUG_ON(sizeof(struct redsched_cb) > MPTCP_SCHED_DATA_SIZE);
+
+	if (mptcp_register_scheduler(&mptcp_sched_red))
+		return -1;
+
+	return 0;
+}
+
+static void red_unregister(void)
+{
+	mptcp_unregister_scheduler(&mptcp_sched_red);
+}
+
+module_init(red_register);
+module_exit(red_unregister);
+
+MODULE_AUTHOR("Tobias Erbshaeusser, Alexander Froemmgen");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("REDUNDANT MPTCP");
+MODULE_VERSION("0.90");
--- a/net/mptcp/mptcp_rr.c
+++ b/net/mptcp/mptcp_rr.c
@ -0,0 +1,309 @@
+/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */
+
+#include <linux/module.h>
+#include <net/mptcp.h>
+
+static unsigned char num_segments __read_mostly = 1;
+module_param(num_segments, byte, 0644);
+MODULE_PARM_DESC(num_segments, "The number of consecutive segments that are part of a burst");
+
+static bool cwnd_limited __read_mostly = 1;
+module_param(cwnd_limited, bool, 0644);
+MODULE_PARM_DESC(cwnd_limited, "if set to 1, the scheduler tries to fill the congestion-window on all subflows");
+
+struct rrsched_priv {
+	unsigned char quota;
+};
+
+static struct rrsched_priv *rrsched_get_priv(const struct tcp_sock *tp)
+{
+	return (struct rrsched_priv *)&tp->mptcp->mptcp_sched[0];
+}
+
+/* If the sub-socket sk available to send the skb? */
+static bool mptcp_rr_is_available(const struct sock *sk, const struct sk_buff *skb,
+				  bool zero_wnd_test, bool cwnd_test)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int space, in_flight;
+
+	/* Set of states for which we are allowed to send data */
+	if (!mptcp_sk_can_send(sk))
+		return false;
+
+	/* We do not send data on this subflow unless it is
+	 * fully established, i.e. the 4th ack has been received.
+	 */
+	if (tp->mptcp->pre_established)
+		return false;
+
+	if (tp->pf)
+		return false;
+
+	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) {
+		/* If SACK is disabled, and we got a loss, TCP does not exit
+		 * the loss-state until something above high_seq has been acked.
+		 * (see tcp_try_undo_recovery)
+		 *
+		 * high_seq is the snd_nxt at the moment of the RTO. As soon
+		 * as we have an RTO, we won't push data on the subflow.
+		 * Thus, snd_una can never go beyond high_seq.
+		 */
+		if (!tcp_is_reno(tp))
+			return false;
+		else if (tp->snd_una != tp->high_seq)
+			return false;
+	}
+
+	if (!tp->mptcp->fully_established) {
+		/* Make sure that we send in-order data */
+		if (skb && tp->mptcp->second_packet &&
+		    tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq)
+			return false;
+	}
+
+	if (!cwnd_test)
+		goto zero_wnd_test;
+
+	in_flight = tcp_packets_in_flight(tp);
+	/* Not even a single spot in the cwnd */
+	if (in_flight >= tp->snd_cwnd)
+		return false;
+
+	/* Now, check if what is queued in the subflow's send-queue
+	 * already fills the cwnd.
+	 */
+	space = (tp->snd_cwnd - in_flight) * tp->mss_cache;
+
+	if (tp->write_seq - tp->snd_nxt > space)
+		return false;
+
+zero_wnd_test:
+	if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp)))
+		return false;
+
+	return true;
+}
+
+/* Are we not allowed to reinject this skb on tp? */
+static int mptcp_rr_dont_reinject_skb(const struct tcp_sock *tp, const struct sk_buff *skb)
+{
+	/* If the skb has already been enqueued in this sk, try to find
+	 * another one.
+	 */
+	return skb &&
+		/* Has the skb already been enqueued into this subsocket? */
+		mptcp_pi_to_flag(tp->mptcp->path_index) & TCP_SKB_CB(skb)->path_mask;
+}
+
+/* We just look for any subflow that is available */
+static struct sock *rr_get_available_subflow(struct sock *meta_sk,
+					     struct sk_buff *skb,
+					     bool zero_wnd_test)
+{
+	const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct sock *sk = NULL, *bestsk = NULL, *backupsk = NULL;
+	struct mptcp_tcp_sock *mptcp;
+
+	/* Answer data_fin on same subflow!!! */
+	if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
+	    skb && mptcp_is_data_fin(skb)) {
+		mptcp_for_each_sub(mpcb, mptcp) {
+			sk = mptcp_to_sock(mptcp);
+			if (tcp_sk(sk)->mptcp->path_index == mpcb->dfin_path_index &&
+			    mptcp_rr_is_available(sk, skb, zero_wnd_test, true))
+				return sk;
+		}
+	}
+
+	/* First, find the best subflow */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		struct tcp_sock *tp;
+
+		sk = mptcp_to_sock(mptcp);
+		tp = tcp_sk(sk);
+
+		if (!mptcp_rr_is_available(sk, skb, zero_wnd_test, true))
+			continue;
+
+		if (mptcp_rr_dont_reinject_skb(tp, skb)) {
+			backupsk = sk;
+			continue;
+		}
+
+		bestsk = sk;
+	}
+
+	if (bestsk) {
+		sk = bestsk;
+	} else if (backupsk) {
+		/* It has been sent on all subflows once - let's give it a
+		 * chance again by restarting its pathmask.
+		 */
+		if (skb)
+			TCP_SKB_CB(skb)->path_mask = 0;
+		sk = backupsk;
+	}
+
+	return sk;
+}
+
+/* Returns the next segment to be sent from the mptcp meta-queue.
+ * (chooses the reinject queue if any segment is waiting in it, otherwise,
+ * chooses the normal write queue).
+ * Sets *@reinject to 1 if the returned segment comes from the
+ * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
+ * and sets it to -1 if it is a meta-level retransmission to optimize the
+ * receive-buffer.
+ */
+static struct sk_buff *__mptcp_rr_next_segment(const struct sock *meta_sk, int *reinject)
+{
+	const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct sk_buff *skb = NULL;
+
+	*reinject = 0;
+
+	/* If we are in fallback-mode, just take from the meta-send-queue */
+	if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
+		return tcp_send_head(meta_sk);
+
+	skb = skb_peek(&mpcb->reinject_queue);
+
+	if (skb)
+		*reinject = 1;
+	else
+		skb = tcp_send_head(meta_sk);
+	return skb;
+}
+
+static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk,
+					     int *reinject,
+					     struct sock **subsk,
+					     unsigned int *limit)
+{
+	const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct sock *choose_sk = NULL;
+	struct mptcp_tcp_sock *mptcp;
+	struct sk_buff *skb = __mptcp_rr_next_segment(meta_sk, reinject);
+	unsigned char split = num_segments;
+	unsigned char iter = 0, full_subs = 0;
+
+	/* As we set it, we have to reset it as well. */
+	*limit = 0;
+
+	if (!skb)
+		return NULL;
+
+	if (*reinject) {
+		*subsk = rr_get_available_subflow(meta_sk, skb, false);
+		if (!*subsk)
+			return NULL;
+
+		return skb;
+	}
+
+retry:
+
+	/* First, we look for a subflow who is currently being used */
+	mptcp_for_each_sub(mpcb, mptcp) {
+		struct sock *sk_it = mptcp_to_sock(mptcp);
+		struct tcp_sock *tp_it = tcp_sk(sk_it);
+		struct rrsched_priv *rr_p = rrsched_get_priv(tp_it);
+
+		if (!mptcp_rr_is_available(sk_it, skb, false, cwnd_limited))
+			continue;
+
+		iter++;
+
+		/* Is this subflow currently being used? */
+		if (rr_p->quota > 0 && rr_p->quota < num_segments) {
+			split = num_segments - rr_p->quota;
+			choose_sk = sk_it;
+			goto found;
+		}
+
+		/* Or, it's totally unused */
+		if (!rr_p->quota) {
+			split = num_segments;
+			choose_sk = sk_it;
+		}
+
+		/* Or, it must then be fully used  */
+		if (rr_p->quota >= num_segments)
+			full_subs++;
+	}
+
+	/* All considered subflows have a full quota, and we considered at
+	 * least one.
+	 */
+	if (iter && iter == full_subs) {
+		/* So, we restart this round by setting quota to 0 and retry
+		 * to find a subflow.
+		 */
+		mptcp_for_each_sub(mpcb, mptcp) {
+			struct sock *sk_it = mptcp_to_sock(mptcp);
+			struct tcp_sock *tp_it = tcp_sk(sk_it);
+			struct rrsched_priv *rr_p = rrsched_get_priv(tp_it);
+
+			if (!mptcp_rr_is_available(sk_it, skb, false, cwnd_limited))
+				continue;
+
+			rr_p->quota = 0;
+		}
+
+		goto retry;
+	}
+
+found:
+	if (choose_sk) {
+		unsigned int mss_now;
+		struct tcp_sock *choose_tp = tcp_sk(choose_sk);
+		struct rrsched_priv *rr_p = rrsched_get_priv(choose_tp);
+
+		if (!mptcp_rr_is_available(choose_sk, skb, false, true))
+			return NULL;
+
+		*subsk = choose_sk;
+		mss_now = tcp_current_mss(*subsk);
+		*limit = split * mss_now;
+
+		if (skb->len > mss_now)
+			rr_p->quota += DIV_ROUND_UP(skb->len, mss_now);
+		else
+			rr_p->quota++;
+
+		return skb;
+	}
+
+	return NULL;
+}
+
+static struct mptcp_sched_ops mptcp_sched_rr = {
+	.get_subflow = rr_get_available_subflow,
+	.next_segment = mptcp_rr_next_segment,
+	.name = "roundrobin",
+	.owner = THIS_MODULE,
+};
+
+static int __init rr_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct rrsched_priv) > MPTCP_SCHED_SIZE);
+
+	if (mptcp_register_scheduler(&mptcp_sched_rr))
+		return -1;
+
+	return 0;
+}
+
+static void rr_unregister(void)
+{
+	mptcp_unregister_scheduler(&mptcp_sched_rr);
+}
+
+module_init(rr_register);
+module_exit(rr_unregister);
+
+MODULE_AUTHOR("Christoph Paasch");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ROUNDROBIN MPTCP");
+MODULE_VERSION("0.89");
--- a/net/mptcp/mptcp_sched.c
+++ b/net/mptcp/mptcp_sched.c
@ -0,0 +1,634 @@
+/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */
+
+#include <linux/module.h>
+#include <net/mptcp.h>
+#include <trace/events/tcp.h>
+
+static DEFINE_SPINLOCK(mptcp_sched_list_lock);
+static LIST_HEAD(mptcp_sched_list);
+
+struct defsched_priv {
+	u32	last_rbuf_opti;
+};
+
+static struct defsched_priv *defsched_get_priv(const struct tcp_sock *tp)
+{
+	return (struct defsched_priv *)&tp->mptcp->mptcp_sched[0];
+}
+
+bool mptcp_is_def_unavailable(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Set of states for which we are allowed to send data */
+	if (!mptcp_sk_can_send(sk))
+		return true;
+
+	/* We do not send data on this subflow unless it is
+	 * fully established, i.e. the 4th ack has been received.
+	 */
+	if (tp->mptcp->pre_established)
+		return true;
+
+	if (tp->pf)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(mptcp_is_def_unavailable);
+
+static bool mptcp_is_temp_unavailable(struct sock *sk,
+				      const struct sk_buff *skb,
+				      bool zero_wnd_test)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int mss_now, space, in_flight;
+
+	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) {
+		/* If SACK is disabled, and we got a loss, TCP does not exit
+		 * the loss-state until something above high_seq has been
+		 * acked. (see tcp_try_undo_recovery)
+		 *
+		 * high_seq is the snd_nxt at the moment of the RTO. As soon
+		 * as we have an RTO, we won't push data on the subflow.
+		 * Thus, snd_una can never go beyond high_seq.
+		 */
+		if (!tcp_is_reno(tp))
+			return true;
+		else if (tp->snd_una != tp->high_seq)
+			return true;
+	}
+
+	if (!tp->mptcp->fully_established) {
+		/* Make sure that we send in-order data */
+		if (skb && tp->mptcp->second_packet &&
+		    tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq)
+			return true;
+	}
+
+	in_flight = tcp_packets_in_flight(tp);
+	/* Not even a single spot in the cwnd */
+	if (in_flight >= tp->snd_cwnd)
+		return true;
+
+	/* Now, check if what is queued in the subflow's send-queue
+	 * already fills the cwnd.
+	 */
+	space = (tp->snd_cwnd - in_flight) * tp->mss_cache;
+
+	if (tp->write_seq - tp->snd_nxt > space)
+		return true;
+
+	if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp)))
+		return true;
+
+	mss_now = tcp_current_mss(sk);
+
+	/* Don't send on this subflow if we bypass the allowed send-window at
+	 * the per-subflow level. Similar to tcp_snd_wnd_test, but manually
+	 * calculated end_seq (because here at this point end_seq is still at
+	 * the meta-level).
+	 */
+	if (skb && zero_wnd_test &&
+	    after(tp->write_seq + min(skb->len, mss_now), tcp_wnd_end(tp)))
+		return true;
+
+	return false;
+}
+
+/* Is the sub-socket sk available to send the skb? */
+bool mptcp_is_available(struct sock *sk, const struct sk_buff *skb,
+			bool zero_wnd_test)
+{
+	return !mptcp_is_def_unavailable(sk) &&
+	       !mptcp_is_temp_unavailable(sk, skb, zero_wnd_test);
+}
+EXPORT_SYMBOL_GPL(mptcp_is_available);
+
+/* Are we not allowed to reinject this skb on tp? */
+static int mptcp_dont_reinject_skb(const struct tcp_sock *tp, const struct sk_buff *skb)
+{
+	/* If the skb has already been enqueued in this sk, try to find
+	 * another one.
+	 */
+	return skb &&
+		/* Has the skb already been enqueued into this subsocket? */
+		mptcp_pi_to_flag(tp->mptcp->path_index) & TCP_SKB_CB(skb)->path_mask;
+}
+
+bool subflow_is_backup(const struct tcp_sock *tp)
+{
+	return tp->mptcp->rcv_low_prio || tp->mptcp->low_prio;
+}
+EXPORT_SYMBOL_GPL(subflow_is_backup);
+
+bool subflow_is_active(const struct tcp_sock *tp)
+{
+	return !tp->mptcp->rcv_low_prio && !tp->mptcp->low_prio;
+}
+EXPORT_SYMBOL_GPL(subflow_is_active);
+
+/* Generic function to iterate over used and unused subflows and to select the
+ * best one
+ */
+static struct sock
+*get_subflow_from_selectors(struct mptcp_cb *mpcb, struct sk_buff *skb,
+			    bool (*selector)(const struct tcp_sock *),
+			    bool zero_wnd_test, bool *force)
+{
+	struct sock *bestsk = NULL;
+	u32 min_srtt = 0xffffffff;
+	bool found_unused = false;
+	bool found_unused_una = false;
+	struct mptcp_tcp_sock *mptcp;
+
+	mptcp_for_each_sub(mpcb, mptcp) {
+		struct sock *sk = mptcp_to_sock(mptcp);
+		struct tcp_sock *tp = tcp_sk(sk);
+		bool unused = false;
+
+		/* First, we choose only the wanted sks */
+		if (!(*selector)(tp))
+			continue;
+
+		if (!mptcp_dont_reinject_skb(tp, skb))
+			unused = true;
+		else if (found_unused)
+			/* If a unused sk was found previously, we continue -
+			 * no need to check used sks anymore.
+			 */
+			continue;
+
+		if (mptcp_is_def_unavailable(sk))
+			continue;
+
+		if (mptcp_is_temp_unavailable(sk, skb, zero_wnd_test)) {
+			if (unused)
+				found_unused_una = true;
+			continue;
+		}
+
+		if (unused) {
+			if (!found_unused) {
+				/* It's the first time we encounter an unused
+				 * sk - thus we reset the bestsk (which might
+				 * have been set to a used sk).
+				 */
+				min_srtt = 0xffffffff;
+				bestsk = NULL;
+			}
+			found_unused = true;
+		}
+
+		if (tp->srtt_us < min_srtt) {
+			min_srtt = tp->srtt_us;
+			bestsk = sk;
+		}
+	}
+
+	if (bestsk) {
+		/* The force variable is used to mark the returned sk as
+		 * previously used or not-used.
+		 */
+		if (found_unused)
+			*force = true;
+		else
+			*force = false;
+	} else {
+		/* The force variable is used to mark if there are temporally
+		 * unavailable not-used sks.
+		 */
+		if (found_unused_una)
+			*force = true;
+		else
+			*force = false;
+	}
+
+	return bestsk;
+}
+
+/* This is the scheduler. This function decides on which flow to send
+ * a given MSS. If all subflows are found to be busy, NULL is returned
+ * The flow is selected based on the shortest RTT.
+ * If all paths have full cong windows, we simply return NULL.
+ *
+ * Additionally, this function is aware of the backup-subflows.
+ */
+struct sock *get_available_subflow(struct sock *meta_sk, struct sk_buff *skb,
+				   bool zero_wnd_test)
+{
+	struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct sock *sk;
+	bool looping = false, force;
+
+	/* Answer data_fin on same subflow!!! */
+	if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
+	    skb && mptcp_is_data_fin(skb)) {
+		struct mptcp_tcp_sock *mptcp;
+
+		mptcp_for_each_sub(mpcb, mptcp) {
+			sk = mptcp_to_sock(mptcp);
+
+			if (tcp_sk(sk)->mptcp->path_index == mpcb->dfin_path_index &&
+			    mptcp_is_available(sk, skb, zero_wnd_test))
+				return sk;
+		}
+	}
+
+	/* Find the best subflow */
+restart:
+	sk = get_subflow_from_selectors(mpcb, skb, &subflow_is_active,
+					zero_wnd_test, &force);
+	if (force)
+		/* one unused active sk or one NULL sk when there is at least
+		 * one temporally unavailable unused active sk
+		 */
+		return sk;
+
+	sk = get_subflow_from_selectors(mpcb, skb, &subflow_is_backup,
+					zero_wnd_test, &force);
+	if (!force && skb) {
+		/* one used backup sk or one NULL sk where there is no one
+		 * temporally unavailable unused backup sk
+		 *
+		 * the skb passed through all the available active and backups
+		 * sks, so clean the path mask
+		 */
+		TCP_SKB_CB(skb)->path_mask = 0;
+
+		if (!looping) {
+			looping = true;
+			goto restart;
+		}
+	}
+	return sk;
+}
+EXPORT_SYMBOL_GPL(get_available_subflow);
+
+static struct sk_buff *mptcp_rcv_buf_optimization(struct sock *sk, int penal)
+{
+	struct sock *meta_sk;
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct mptcp_tcp_sock *mptcp;
+	struct sk_buff *skb_head;
+	struct defsched_priv *def_p = defsched_get_priv(tp);
+
+	meta_sk = mptcp_meta_sk(sk);
+	skb_head = tcp_rtx_queue_head(meta_sk);
+
+	if (!skb_head)
+		return NULL;
+
+	/* If penalization is optional (coming from mptcp_next_segment() and
+	 * We are not send-buffer-limited we do not penalize. The retransmission
+	 * is just an optimization to fix the idle-time due to the delay before
+	 * we wake up the application.
+	 */
+	if (!penal && sk_stream_memory_free(meta_sk))
+		goto retrans;
+
+	/* Only penalize again after an RTT has elapsed */
+	if (tcp_jiffies32 - def_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3))
+		goto retrans;
+
+	/* Half the cwnd of the slow flows */
+	mptcp_for_each_sub(tp->mpcb, mptcp) {
+		struct tcp_sock *tp_it = mptcp->tp;
+
+		if (tp_it != tp &&
+		    TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
+			if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) {
+				u32 prior_cwnd = tp_it->snd_cwnd;
+
+				tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
+
+				/* If in slow start, do not reduce the ssthresh */
+				if (prior_cwnd >= tp_it->snd_ssthresh)
+					tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U);
+
+				def_p->last_rbuf_opti = tcp_jiffies32;
+			}
+		}
+	}
+
+retrans:
+
+	/* Segment not yet injected into this path? Take it!!! */
+	if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) {
+		bool do_retrans = false;
+		mptcp_for_each_sub(tp->mpcb, mptcp) {
+			struct tcp_sock *tp_it = mptcp->tp;
+
+			if (tp_it != tp &&
+			    TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
+				if (tp_it->snd_cwnd <= 4) {
+					do_retrans = true;
+					break;
+				}
+
+				if (4 * tp->srtt_us >= tp_it->srtt_us) {
+					do_retrans = false;
+					break;
+				} else {
+					do_retrans = true;
+				}
+			}
+		}
+
+		if (do_retrans && mptcp_is_available(sk, skb_head, false)) {
+			trace_mptcp_retransmit(sk, skb_head);
+			return skb_head;
+		}
+	}
+	return NULL;
+}
+
+/* Returns the next segment to be sent from the mptcp meta-queue.
+ * (chooses the reinject queue if any segment is waiting in it, otherwise,
+ * chooses the normal write queue).
+ * Sets *@reinject to 1 if the returned segment comes from the
+ * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
+ * and sets it to -1 if it is a meta-level retransmission to optimize the
+ * receive-buffer.
+ */
+static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject)
+{
+	const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
+	struct sk_buff *skb = NULL;
+
+	*reinject = 0;
+
+	/* If we are in fallback-mode, just take from the meta-send-queue */
+	if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
+		return tcp_send_head(meta_sk);
+
+	skb = skb_peek(&mpcb->reinject_queue);
+
+	if (skb) {
+		*reinject = 1;
+	} else {
+		skb = tcp_send_head(meta_sk);
+
+		if (!skb && meta_sk->sk_socket &&
+		    test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
+		    sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
+			struct sock *subsk = get_available_subflow(meta_sk, NULL,
+								   false);
+			if (!subsk)
+				return NULL;
+
+			skb = mptcp_rcv_buf_optimization(subsk, 0);
+			if (skb)
+				*reinject = -1;
+		}
+	}
+	return skb;
+}
+
+static struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
+					  int *reinject,
+					  struct sock **subsk,
+					  unsigned int *limit)
+{
+	struct sk_buff *skb = __mptcp_next_segment(meta_sk, reinject);
+	unsigned int mss_now;
+	struct tcp_sock *subtp;
+	u16 gso_max_segs;
+	u32 max_len, max_segs, window, needed;
+
+	/* As we set it, we have to reset it as well. */
+	*limit = 0;
+
+	if (!skb)
+		return NULL;
+
+	*subsk = get_available_subflow(meta_sk, skb, false);
+	if (!*subsk)
+		return NULL;
+
+	subtp = tcp_sk(*subsk);
+	mss_now = tcp_current_mss(*subsk);
+
+	if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) {
+		skb = mptcp_rcv_buf_optimization(*subsk, 1);
+		if (skb)
+			*reinject = -1;
+		else
+			return NULL;
+	}
+
+	/* No splitting required, as we will only send one single segment */
+	if (skb->len <= mss_now)
+		return skb;
+
+	/* The following is similar to tcp_mss_split_point, but
+	 * we do not care about nagle, because we will anyways
+	 * use TCP_NAGLE_PUSH, which overrides this.
+	 *
+	 * So, we first limit according to the cwnd/gso-size and then according
+	 * to the subflow's window.
+	 */
+
+	gso_max_segs = (*subsk)->sk_gso_max_segs;
+	if (!gso_max_segs) /* No gso supported on the subflow's NIC */
+		gso_max_segs = 1;
+	max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
+	if (!max_segs)
+		return NULL;
+
+	max_len = mss_now * max_segs;
+	window = tcp_wnd_end(subtp) - subtp->write_seq;
+
+	needed = min(skb->len, window);
+	if (max_len <= skb->len)
+		/* Take max_win, which is actually the cwnd/gso-size */
+		*limit = max_len;
+	else
+		/* Or, take the window */
+		*limit = needed;
+
+	return skb;
+}
+
+static void defsched_init(struct sock *sk)
+{
+	struct defsched_priv *def_p = defsched_get_priv(tcp_sk(sk));
+
+	def_p->last_rbuf_opti = tcp_jiffies32;
+}
+
+struct mptcp_sched_ops mptcp_sched_default = {
+	.get_subflow = get_available_subflow,
+	.next_segment = mptcp_next_segment,
+	.init = defsched_init,
+	.name = "default",
+	.owner = THIS_MODULE,
+};
+
+static struct mptcp_sched_ops *mptcp_sched_find(const char *name)
+{
+	struct mptcp_sched_ops *e;
+
+	list_for_each_entry_rcu(e, &mptcp_sched_list, list) {
+		if (strcmp(e->name, name) == 0)
+			return e;
+	}
+
+	return NULL;
+}
+
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
+{
+	int ret = 0;
+
+	if (!sched->get_subflow || !sched->next_segment)
+		return -EINVAL;
+
+	spin_lock(&mptcp_sched_list_lock);
+	if (mptcp_sched_find(sched->name)) {
+		pr_notice("%s already registered\n", sched->name);
+		ret = -EEXIST;
+	} else {
+		list_add_tail_rcu(&sched->list, &mptcp_sched_list);
+		pr_info("%s registered\n", sched->name);
+	}
+	spin_unlock(&mptcp_sched_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mptcp_register_scheduler);
+
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
+{
+	spin_lock(&mptcp_sched_list_lock);
+	list_del_rcu(&sched->list);
+	spin_unlock(&mptcp_sched_list_lock);
+
+	/* Wait for outstanding readers to complete before the
+	 * module gets removed entirely.
+	 *
+	 * A try_module_get() should fail by now as our module is
+	 * in "going" state since no refs are held anymore and
+	 * module_exit() handler being called.
+	 */
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(mptcp_unregister_scheduler);
+
+void mptcp_get_default_scheduler(char *name)
+{
+	struct mptcp_sched_ops *sched;
+
+	BUG_ON(list_empty(&mptcp_sched_list));
+
+	rcu_read_lock();
+	sched = list_entry(mptcp_sched_list.next, struct mptcp_sched_ops, list);
+	strncpy(name, sched->name, MPTCP_SCHED_NAME_MAX);
+	rcu_read_unlock();
+}
+
+int mptcp_set_default_scheduler(const char *name)
+{
+	struct mptcp_sched_ops *sched;
+	int ret = -ENOENT;
+
+	spin_lock(&mptcp_sched_list_lock);
+	sched = mptcp_sched_find(name);
+#ifdef CONFIG_MODULES
+	if (!sched && capable(CAP_NET_ADMIN)) {
+		spin_unlock(&mptcp_sched_list_lock);
+
+		request_module("mptcp_%s", name);
+		spin_lock(&mptcp_sched_list_lock);
+		sched = mptcp_sched_find(name);
+	}
+#endif
+
+	if (sched) {
+		list_move(&sched->list, &mptcp_sched_list);
+		ret = 0;
+	} else {
+		pr_info("%s is not available\n", name);
+	}
+	spin_unlock(&mptcp_sched_list_lock);
+
+	return ret;
+}
+
+/* Must be called with rcu lock held */
+static struct mptcp_sched_ops *__mptcp_sched_find_autoload(const char *name)
+{
+	struct mptcp_sched_ops *sched = mptcp_sched_find(name);
+#ifdef CONFIG_MODULES
+	if (!sched && capable(CAP_NET_ADMIN)) {
+		rcu_read_unlock();
+		request_module("mptcp_%s", name);
+		rcu_read_lock();
+		sched = mptcp_sched_find(name);
+	}
+#endif
+	return sched;
+}
+
+void mptcp_init_scheduler(struct mptcp_cb *mpcb)
+{
+	struct mptcp_sched_ops *sched;
+	struct sock *meta_sk = mpcb->meta_sk;
+	struct tcp_sock *meta_tp = tcp_sk(meta_sk);
+
+	rcu_read_lock();
+	/* if scheduler was set using socket option */
+	if (meta_tp->mptcp_sched_setsockopt) {
+		sched = __mptcp_sched_find_autoload(meta_tp->mptcp_sched_name);
+		if (sched && try_module_get(sched->owner)) {
+			mpcb->sched_ops = sched;
+			goto out;
+		}
+	}
+
+	list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
+		if (try_module_get(sched->owner)) {
+			mpcb->sched_ops = sched;
+			break;
+		}
+	}
+out:
+	rcu_read_unlock();
+}
+
+/* Change scheduler for socket */
+int mptcp_set_scheduler(struct sock *sk, const char *name)
+{
+	struct mptcp_sched_ops *sched;
+	int err = 0;
+
+	rcu_read_lock();
+	sched = __mptcp_sched_find_autoload(name);
+
+	if (!sched) {
+		err = -ENOENT;
+	} else if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+		err = -EPERM;
+	} else {
+		strcpy(tcp_sk(sk)->mptcp_sched_name, name);
+		tcp_sk(sk)->mptcp_sched_setsockopt = 1;
+	}
+	rcu_read_unlock();
+
+	return err;
+}
+
+/* Manage refcounts on socket close. */
+void mptcp_cleanup_scheduler(struct mptcp_cb *mpcb)
+{
+	module_put(mpcb->sched_ops->owner);
+}
+
+/* Set default value from kernel configuration at bootup */
+static int __init mptcp_scheduler_default(void)
+{
+	BUILD_BUG_ON(sizeof(struct defsched_priv) > MPTCP_SCHED_SIZE);
+
+	return mptcp_set_default_scheduler(CONFIG_DEFAULT_MPTCP_SCHED);
+}
+late_initcall(mptcp_scheduler_default);
--- a/net/mptcp/mptcp_wvegas.c
+++ b/net/mptcp/mptcp_wvegas.c
@ -0,0 +1,271 @@
+/*
+ *	MPTCP implementation - WEIGHTED VEGAS
+ *
+ *	Algorithm design:
+ *	Yu Cao <cyAnalyst@126.com>
+ *	Mingwei Xu <xmw@csnet1.cs.tsinghua.edu.cn>
+ *	Xiaoming Fu <fu@cs.uni-goettinggen.de>
+ *
+ *	Implementation:
+ *	Yu Cao <cyAnalyst@126.com>
+ *	Enhuan Dong <deh13@mails.tsinghua.edu.cn>
+ *
+ *	Ported to the official MPTCP-kernel:
+ *	Christoph Paasch <christoph.paasch@uclouvain.be>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+#include <net/mptcp.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+
+static int initial_alpha = 2;
+static int total_alpha = 10;
+static int gamma = 1;
+
+module_param(initial_alpha, int, 0644);
+MODULE_PARM_DESC(initial_alpha, "initial alpha for all subflows");
+module_param(total_alpha, int, 0644);
+MODULE_PARM_DESC(total_alpha, "total alpha for all subflows");
+module_param(gamma, int, 0644);
+MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
+
+#define MPTCP_WVEGAS_SCALE 16
+
+/* wVegas variables */
+struct wvegas {
+	u32	beg_snd_nxt;	/* right edge during last RTT */
+	u8	doing_wvegas_now;/* if true, do wvegas for this RTT */
+
+	u16	cnt_rtt;		/* # of RTTs measured within last RTT */
+	u32 sampled_rtt; /* cumulative RTTs measured within last RTT (in usec) */
+	u32	base_rtt;	/* the min of all wVegas RTT measurements seen (in usec) */
+
+	u64 instant_rate; /* cwnd / srtt_us, unit: pkts/us * 2^16 */
+	u64 weight; /* the ratio of subflow's rate to the total rate, * 2^16 */
+	int alpha; /* alpha for each subflows */
+
+	u32 queue_delay; /* queue delay*/
+};
+
+
+static inline u64 mptcp_wvegas_scale(u32 val, int scale)
+{
+	return (u64) val << scale;
+}
+
+static void wvegas_enable(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct wvegas *wvegas = inet_csk_ca(sk);
+
+	wvegas->doing_wvegas_now = 1;
+
+	wvegas->beg_snd_nxt = tp->snd_nxt;
+
+	wvegas->cnt_rtt = 0;
+	wvegas->sampled_rtt = 0;
+
+	wvegas->instant_rate = 0;
+	wvegas->alpha = initial_alpha;
+	wvegas->weight = mptcp_wvegas_scale(1, MPTCP_WVEGAS_SCALE);
+
+	wvegas->queue_delay = 0;
+}
+
+static inline void wvegas_disable(const struct sock *sk)
+{
+	struct wvegas *wvegas = inet_csk_ca(sk);
+
+	wvegas->doing_wvegas_now = 0;
+}
+
+static void mptcp_wvegas_init(struct sock *sk)
+{
+	struct wvegas *wvegas = inet_csk_ca(sk);
+
+	wvegas->base_rtt = 0x7fffffff;
+	wvegas_enable(sk);
+}
+
+static inline u64 mptcp_wvegas_rate(u32 cwnd, u32 rtt_us)
+{
+	return div_u64(mptcp_wvegas_scale(cwnd, MPTCP_WVEGAS_SCALE), rtt_us);
+}
+
+static void mptcp_wvegas_pkts_acked(struct sock *sk,
+				    const struct ack_sample *sample)
+{
+	struct wvegas *wvegas = inet_csk_ca(sk);
+	u32 vrtt;
+
+	if (sample->rtt_us < 0)
+		return;
+
+	vrtt = sample->rtt_us + 1;
+
+	if (vrtt < wvegas->base_rtt)
+		wvegas->base_rtt = vrtt;
+
+	wvegas->sampled_rtt += vrtt;
+	wvegas->cnt_rtt++;
+}
+
+static void mptcp_wvegas_state(struct sock *sk, u8 ca_state)
+{
+	if (ca_state == TCP_CA_Open)
+		wvegas_enable(sk);
+	else
+		wvegas_disable(sk);
+}
+
+static void mptcp_wvegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+	if (event == CA_EVENT_CWND_RESTART) {
+		mptcp_wvegas_init(sk);
+	} else if (event == CA_EVENT_LOSS) {
+		struct wvegas *wvegas = inet_csk_ca(sk);
+		wvegas->instant_rate = 0;
+	}
+}
+
+static inline u32 mptcp_wvegas_ssthresh(const struct tcp_sock *tp)
+{
+	return  min(tp->snd_ssthresh, tp->snd_cwnd);
+}
+
+static u64 mptcp_wvegas_weight(const struct mptcp_cb *mpcb, const struct sock *sk)
+{
+	u64 total_rate = 0;
+	const struct wvegas *wvegas = inet_csk_ca(sk);
+	struct mptcp_tcp_sock *mptcp;
+
+	if (!mpcb)
+		return wvegas->weight;
+
+
+	mptcp_for_each_sub(mpcb, mptcp) {
+		struct sock *sub_sk = mptcp_to_sock(mptcp);
+		struct wvegas *sub_wvegas = inet_csk_ca(sub_sk);
+
+		/* sampled_rtt is initialized by 0 */
+		if (mptcp_sk_can_send(sub_sk) && (sub_wvegas->sampled_rtt > 0))
+			total_rate += sub_wvegas->instant_rate;
+	}
+
+	if (total_rate && wvegas->instant_rate)
+		return div64_u64(mptcp_wvegas_scale(wvegas->instant_rate, MPTCP_WVEGAS_SCALE), total_rate);
+	else
+		return wvegas->weight;
+}
+
+static void mptcp_wvegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct wvegas *wvegas = inet_csk_ca(sk);
+
+	if (!wvegas->doing_wvegas_now) {
+		tcp_reno_cong_avoid(sk, ack, acked);
+		return;
+	}
+
+	if (after(ack, wvegas->beg_snd_nxt)) {
+		wvegas->beg_snd_nxt  = tp->snd_nxt;
+
+		if (wvegas->cnt_rtt <= 2) {
+			tcp_reno_cong_avoid(sk, ack, acked);
+		} else {
+			u32 rtt, diff, q_delay;
+			u64 target_cwnd;
+
+			rtt = wvegas->sampled_rtt / wvegas->cnt_rtt;
+			target_cwnd = div_u64(((u64)tp->snd_cwnd * wvegas->base_rtt), rtt);
+
+			diff = div_u64((u64)tp->snd_cwnd * (rtt - wvegas->base_rtt), rtt);
+
+			if (diff > gamma && tcp_in_slow_start(tp)) {
+				tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
+				tp->snd_ssthresh = mptcp_wvegas_ssthresh(tp);
+
+			} else if (tcp_in_slow_start(tp)) {
+				tcp_slow_start(tp, acked);
+			} else {
+				if (diff >= wvegas->alpha) {
+					wvegas->instant_rate = mptcp_wvegas_rate(tp->snd_cwnd, rtt);
+					wvegas->weight = mptcp_wvegas_weight(tp->mpcb, sk);
+					wvegas->alpha = max(2U, (u32)((wvegas->weight * total_alpha) >> MPTCP_WVEGAS_SCALE));
+				}
+				if (diff > wvegas->alpha) {
+					tp->snd_cwnd--;
+					tp->snd_ssthresh = mptcp_wvegas_ssthresh(tp);
+				} else if (diff < wvegas->alpha) {
+					tp->snd_cwnd++;
+				}
+
+				/* Try to drain link queue if needed*/
+				q_delay = rtt - wvegas->base_rtt;
+				if ((wvegas->queue_delay == 0) || (wvegas->queue_delay > q_delay))
+					wvegas->queue_delay = q_delay;
+
+				if (q_delay >= 2 * wvegas->queue_delay) {
+					u32 backoff_factor = div_u64(mptcp_wvegas_scale(wvegas->base_rtt, MPTCP_WVEGAS_SCALE), 2 * rtt);
+					tp->snd_cwnd = ((u64)tp->snd_cwnd * backoff_factor) >> MPTCP_WVEGAS_SCALE;
+					wvegas->queue_delay = 0;
+				}
+			}
+
+			if (tp->snd_cwnd < 2)
+				tp->snd_cwnd = 2;
+			else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
+				tp->snd_cwnd = tp->snd_cwnd_clamp;
+
+			tp->snd_ssthresh = tcp_current_ssthresh(sk);
+		}
+
+		wvegas->cnt_rtt = 0;
+		wvegas->sampled_rtt = 0;
+	}
+	/* Use normal slow start */
+	else if (tcp_in_slow_start(tp))
+		tcp_slow_start(tp, acked);
+}
+
+
+static struct tcp_congestion_ops mptcp_wvegas __read_mostly = {
+	.init		= mptcp_wvegas_init,
+	.ssthresh	= tcp_reno_ssthresh,
+	.cong_avoid	= mptcp_wvegas_cong_avoid,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
+	.pkts_acked	= mptcp_wvegas_pkts_acked,
+	.set_state	= mptcp_wvegas_state,
+	.cwnd_event	= mptcp_wvegas_cwnd_event,
+
+	.owner		= THIS_MODULE,
+	.name		= "wvegas",
+};
+
+static int __init mptcp_wvegas_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct wvegas) > ICSK_CA_PRIV_SIZE);
+	tcp_register_congestion_control(&mptcp_wvegas);
+	return 0;
+}
+
+static void __exit mptcp_wvegas_unregister(void)
+{
+	tcp_unregister_congestion_control(&mptcp_wvegas);
+}
+
+module_init(mptcp_wvegas_register);
+module_exit(mptcp_wvegas_unregister);
+
+MODULE_AUTHOR("Yu Cao, Enhuan Dong");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MPTCP wVegas");
+MODULE_VERSION("0.1");