#include <linux/errqueue.h>
int sysctl_tcp_timestamps __read_mostly = 1;
-int sysctl_tcp_window_scaling __read_mostly = 1;
-int sysctl_tcp_sack __read_mostly = 1;
int sysctl_tcp_fack __read_mostly;
int sysctl_tcp_max_reordering __read_mostly = 300;
int sysctl_tcp_dsack __read_mostly = 1;
#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
+#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
tcp_sndbuf_expand(sk);
tp->rcvq_space.space = tp->rcv_wnd;
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
tp->rcvq_space.time = tp->tcp_mstamp;
tp->rcvq_space.seq = tp->copied_seq;
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
/* 5. Recalculate window clamp after socket hit its memory bounds. */
{
u32 delta_us;
- if (tp->rcv_rtt_est.time.v64 == 0)
+ if (tp->rcv_rtt_est.time == 0)
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
- delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time);
+ delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
tcp_rcv_rtt_update(tp, delta_us, 1);
new_measure:
const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+
if (tp->rx_opt.rcv_tsecr &&
(TCP_SKB_CB(skb)->end_seq -
- TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
- tcp_rcv_rtt_update(tp,
- jiffies_to_usecs(tcp_time_stamp -
- tp->rx_opt.rcv_tsecr),
- 0);
+ TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+ u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+ u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+ tcp_rcv_rtt_update(tp, delta_us, 0);
+ }
}
/*
int time;
int copied;
- time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time);
+ time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
return;
tcp_rcv_rtt_measure(tp);
- now = tcp_time_stamp;
+ now = tcp_jiffies32;
if (!icsk->icsk_ack.ato) {
/* The _first_ data packet received, initialize
struct tcp_sock *tp = tcp_sk(sk);
int mib_idx;
+ if (WARN_ON_ONCE(metric < 0))
+ return;
+
if (metric > tp->reordering) {
tp->reordering = min(sysctl_tcp_max_reordering, metric);
* that was SACKed. RTO needs the earliest RTT to stay conservative,
* but congestion control should still get an accurate delay signal.
*/
- struct skb_mstamp first_sackt;
- struct skb_mstamp last_sackt;
+ u64 first_sackt;
+ u64 last_sackt;
struct rate_sample *rate;
int flag;
};
struct tcp_sacktag_state *state, u8 sacked,
u32 start_seq, u32 end_seq,
int dup_sack, int pcount,
- const struct skb_mstamp *xmit_time)
+ u64 xmit_time)
{
struct tcp_sock *tp = tcp_sk(sk);
int fack_count = state->fack_count;
state->reord);
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
- if (state->first_sackt.v64 == 0)
- state->first_sackt = *xmit_time;
- state->last_sackt = *xmit_time;
+ if (state->first_sackt == 0)
+ state->first_sackt = xmit_time;
+ state->last_sackt = xmit_time;
}
if (sacked & TCPCB_LOST) {
*/
tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
start_seq, end_seq, dup_sack, pcount,
- &skb->skb_mstamp);
+ skb->skb_mstamp);
tcp_rate_skb_delivered(sk, skb, state->rate);
if (skb == tp->lost_skb_hint)
tcp_advance_highest_sack(sk, skb);
tcp_skb_collapse_tstamp(prev, skb);
- if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64))
- TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0;
+ if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
+ TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
TCP_SKB_CB(skb)->end_seq,
dup_sack,
tcp_skb_pcount(skb),
- &skb->skb_mstamp);
+ skb->skb_mstamp);
tcp_rate_skb_delivered(sk, skb, state->rate);
if (!before(TCP_SKB_CB(skb)->seq,
}
tp->snd_cwnd = 1;
tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->retrans_out = 0;
tp->lost_out = 0;
tcp_ecn_withdraw_cwr(tp);
}
}
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->undo_marker = 0;
}
if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
(tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
tp->snd_cwnd = tp->snd_ssthresh;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
}
tcp_mss_to_mtu(sk, tp->mss_cache) /
icsk->icsk_mtup.probe_size;
tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_ssthresh = tcp_current_ssthresh(sk);
icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
struct tcp_sock *tp = tcp_sk(sk);
u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
- minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp,
+ minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
rtt_us ? : jiffies_to_usecs(1));
}
-static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
- long seq_rtt_us, long sack_rtt_us,
- long ca_rtt_us)
+static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
+ long seq_rtt_us, long sack_rtt_us,
+ long ca_rtt_us, struct rate_sample *rs)
{
const struct tcp_sock *tp = tcp_sk(sk);
* See draft-ietf-tcplw-high-performance-00, section 3.3.
*/
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
- flag & FLAG_ACKED)
- seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp -
- tp->rx_opt.rcv_tsecr);
+ flag & FLAG_ACKED) {
+ u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+ u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+ seq_rtt_us = ca_rtt_us = delta_us;
+ }
+ rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
return false;
/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
{
+ struct rate_sample rs;
long rtt_us = -1L;
- if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
- struct skb_mstamp now;
+ if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
+ rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
- skb_mstamp_get(&now);
- rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
- }
-
- tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us);
+ tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
}
const struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
- tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
+ tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
}
/* Restart timer after forward progress on connection.
if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
struct sk_buff *skb = tcp_write_queue_head(sk);
- const u32 rto_time_stamp =
- tcp_skb_timestamp(skb) + rto;
- s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
- /* delta may not be positive if the socket is locked
+ u64 rto_time_stamp = skb->skb_mstamp +
+ jiffies_to_usecs(rto);
+ s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
+ /* delta_us may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
*/
- if (delta > 0)
- rto = delta;
+ if (delta_us > 0)
+ rto = usecs_to_jiffies(delta_us);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct skb_mstamp first_ackt, last_ackt;
+ u64 first_ackt, last_ackt;
struct tcp_sock *tp = tcp_sk(sk);
- struct skb_mstamp *now = &tp->tcp_mstamp;
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
bool fully_acked = true;
bool rtt_update;
int flag = 0;
- first_ackt.v64 = 0;
+ first_ackt = 0;
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
flag |= FLAG_RETRANS_DATA_ACKED;
} else if (!(sacked & TCPCB_SACKED_ACKED)) {
last_ackt = skb->skb_mstamp;
- WARN_ON_ONCE(last_ackt.v64 == 0);
- if (!first_ackt.v64)
+ WARN_ON_ONCE(last_ackt == 0);
+ if (!first_ackt)
first_ackt = last_ackt;
last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, scb->end_seq,
- &skb->skb_mstamp);
+ skb->skb_mstamp);
}
if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount;
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
- if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
- seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt);
- ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt);
+ if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
+ seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
+ ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
}
- if (sack->first_sackt.v64) {
- sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt);
- ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt);
+ if (sack->first_sackt) {
+ sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
+ ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
}
- sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
- ca_rtt_us);
+ ca_rtt_us, sack->rate);
if (flag & FLAG_ACKED) {
tcp_rearm_rto(sk);
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
- sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) {
+ sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
* after when the head was last (re)transmitted. Otherwise the
* timeout may continue to extend in loss recovery.
if (icsk->icsk_ca_ops->pkts_acked) {
struct ack_sample sample = { .pkts_acked = pkts_acked,
- .rtt_us = ca_rtt_us,
+ .rtt_us = sack->rate->rtt_us,
.in_flight = last_in_flight };
icsk->icsk_ca_ops->pkts_acked(sk, &sample);
u32 *last_oow_ack_time)
{
if (*last_oow_ack_time) {
- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+ s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
NET_INC_STATS(net, mib_idx);
}
}
- *last_oow_ack_time = tcp_time_stamp;
+ *last_oow_ack_time = tcp_jiffies32;
return false; /* not rate-limited: go ahead, send dupack now! */
}
int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
- sack_state.first_sackt.v64 = 0;
+ sack_state.first_sackt = 0;
sack_state.rate = &rs;
/* We very likely will need to access write queue head. */
if (before(ack, prior_snd_una)) {
/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
if (before(ack, prior_snd_una - tp->max_window)) {
- tcp_send_challenge_ack(sk, skb);
+ if (!(flag & FLAG_NO_CHALLENGE_ACK))
+ tcp_send_challenge_ack(sk, skb);
return -1;
}
goto old_ack;
*/
sk->sk_err_soft = 0;
icsk->icsk_probes_out = 0;
- tp->rcv_tstamp = tcp_time_stamp;
+ tp->rcv_tstamp = tcp_jiffies32;
if (!prior_packets)
goto no_queue;
* But, this can also be called on packets in the established flow when
* the fast version below fails.
*/
-void tcp_parse_options(const struct sk_buff *skb,
+void tcp_parse_options(const struct net *net,
+ const struct sk_buff *skb,
struct tcp_options_received *opt_rx, int estab,
struct tcp_fastopen_cookie *foc)
{
break;
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && th->syn &&
- !estab && sysctl_tcp_window_scaling) {
+ !estab && net->ipv4.sysctl_tcp_window_scaling) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > TCP_MAX_WSCALE) {
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
- !estab && sysctl_tcp_sack) {
+ !estab && net->ipv4.sysctl_tcp_sack) {
opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
/* Fast parse options. This hopes to only see timestamps.
* If it is wrong it falls back on tcp_parse_options().
*/
-static bool tcp_fast_parse_options(const struct sk_buff *skb,
+static bool tcp_fast_parse_options(const struct net *net,
+ const struct sk_buff *skb,
const struct tcphdr *th, struct tcp_sock *tp)
{
/* In the spirit of fast parsing, compare doff directly to constant
return true;
}
- tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
+ tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
if (tcp_should_expand_sndbuf(sk)) {
tcp_sndbuf_expand(sk);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
sk->sk_write_space(sk);
bool rst_seq_match = false;
/* RFC1323: H1. Apply PAWS check first. */
- if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+ if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
+ tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
{
struct tcp_sock *tp = tcp_sk(sk);
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
if (unlikely(!sk->sk_rx_dst))
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_set_state(sk, TCP_ESTABLISHED);
- icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+ icsk->icsk_ack.lrcvtime = tcp_jiffies32;
if (skb) {
icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
*/
- tp->lsndtime = tcp_time_stamp;
+ tp->lsndtime = tcp_jiffies32;
tcp_init_buffer_space(sk);
/* Get original SYNACK MSS value if user MSS sets mss_clamp */
tcp_clear_options(&opt);
opt.user_mss = opt.mss_clamp = 0;
- tcp_parse_options(synack, &opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
mss = opt.mss_clamp;
}
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
- tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
+ tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
- tcp_time_stamp)) {
+ tcp_time_stamp(tp))) {
NET_INC_STATS(sock_net(sk),
LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
case TCP_SYN_SENT:
tp->rx_opt.saw_tstamp = 0;
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
queued = tcp_rcv_synsent_state_process(sk, skb, th);
if (queued >= 0)
return queued;
return 0;
}
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
/* step 5: check the ACK field */
acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
- FLAG_UPDATE_TS_RECENT) > 0;
+ FLAG_UPDATE_TS_RECENT |
+ FLAG_NO_CHALLENGE_ACK) > 0;
+ if (!acceptable) {
+ if (sk->sk_state == TCP_SYN_RECV)
+ return 1; /* send one RST */
+ tcp_send_challenge_ack(sk, skb);
+ goto discard;
+ }
switch (sk->sk_state) {
case TCP_SYN_RECV:
- if (!acceptable)
- return 1;
-
if (!tp->srtt_us)
tcp_synack_rtt_meas(sk, req);
tcp_update_pacing_rate(sk);
/* Prevent spurious tcp_cwnd_restart() on first data packet */
- tp->lsndtime = tcp_time_stamp;
+ tp->lsndtime = tcp_jiffies32;
tcp_initialize_rcv_mss(sk);
tcp_fast_path_on(tp);
* our SYNACK so stop the SYNACK timer.
*/
if (req) {
- /* Return RST if ack_seq is invalid.
- * Note that RFC793 only says to generate a
- * DUPACK for it but for TCP Fast Open it seems
- * better to treat this case like TCP_SYN_RECV
- * above.
- */
- if (!acceptable)
- return 1;
/* We no longer need the request sock. */
reqsk_fastopen_remove(sk, req, false);
tcp_rearm_rto(sk);
req->cookie_ts = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- skb_mstamp_get(&tcp_rsk(req)->snt_synack);
+ tcp_rsk(req)->snt_synack = tcp_clock_us();
tcp_rsk(req)->last_oow_ack_time = 0;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
+ tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
+ want_cookie ? NULL : &foc);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);