/ duct-tape / xnu / bsd / netinet / tcp_timer.h
tcp_timer.h
  1  /*
  2   * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved.
  3   *
  4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. The rights granted to you under the License
 10   * may not be used to create, or enable the creation or redistribution of,
 11   * unlawful or unlicensed copies of an Apple operating system, or to
 12   * circumvent, violate, or enable the circumvention or violation of, any
 13   * terms of an Apple operating system software license agreement.
 14   *
 15   * Please obtain a copy of the License at
 16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
 17   *
 18   * The Original Code and all software distributed under the License are
 19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 23   * Please see the License for the specific language governing rights and
 24   * limitations under the License.
 25   *
 26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 27   */
 28  /*
 29   * Copyright (c) 1982, 1986, 1993
 30   *	The Regents of the University of California.  All rights reserved.
 31   *
 32   * Redistribution and use in source and binary forms, with or without
 33   * modification, are permitted provided that the following conditions
 34   * are met:
 35   * 1. Redistributions of source code must retain the above copyright
 36   *    notice, this list of conditions and the following disclaimer.
 37   * 2. Redistributions in binary form must reproduce the above copyright
 38   *    notice, this list of conditions and the following disclaimer in the
 39   *    documentation and/or other materials provided with the distribution.
 40   * 3. All advertising materials mentioning features or use of this software
 41   *    must display the following acknowledgement:
 42   *	This product includes software developed by the University of
 43   *	California, Berkeley and its contributors.
 44   * 4. Neither the name of the University nor the names of its contributors
 45   *    may be used to endorse or promote products derived from this software
 46   *    without specific prior written permission.
 47   *
 48   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 49   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 50   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 51   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 52   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 53   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 54   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 55   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 56   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 57   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 58   * SUCH DAMAGE.
 59   *
 60   *	@(#)tcp_timer.h	8.1 (Berkeley) 6/10/93
 61   * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.18 1999/12/29 04:41:03 peter Exp $
 62   */
 63  
 64  #ifndef _NETINET_TCP_TIMER_H_
 65  #define _NETINET_TCP_TIMER_H_
 66  #include <sys/appleapiopts.h>
 67  
 68  #ifdef BSD_KERNEL_PRIVATE
 69  #include <kern/thread_call.h>
 70  #endif /* BSD_KERNEL_PRIVATE */
 71  
 72  /* Keep the external definition the same for binary compatibility */
 73  #define TCPT_NTIMERS_EXT        4
 74  
 75  /*
 76   * Definitions of the TCP timers.
 77   *
 78   * The TCPT_PTO timer is used for probing for a tail loss in a send window.
 79   * If this probe gets acknowledged using SACK, it will allow the connection
 80   * to enter fast-recovery instead of hitting a retransmit timeout. A probe
 81   * timeout will send the last unacknowledged segment to generate more acks
 82   * with SACK information which can be used for fast-retransmiting the lost
 83   * packets. This will fire in the order of 10ms.
 84   *
 85   * The TCPT_REXMT timer is used to force retransmissions.
 86   * The TCP has the TCPT_REXMT timer set whenever segments
 87   * have been sent for which ACKs are expected but not yet
 88   * received.  If an ACK is received which advances tp->snd_una,
 89   * then the retransmit timer is cleared (if there are no more
 90   * outstanding segments) or reset to the base value (if there
 91   * are more ACKs expected).  Whenever the retransmit timer goes off,
 92   * we retransmit one unacknowledged segment, and do a backoff
 93   * on the retransmit timer.
 94   *
 95   * The TCPT_DELACK timer is used for transmitting delayed acknowledgements
 96   * if an acknowledgement was delayed in anticipation of a new segment.
 97   *
 98   * The TCPT_PERSIST timer is used to keep window size information
 99   * flowing even if the window goes shut.  If all previous transmissions
100   * have been acknowledged(so that there are no retransmissions in progress),
101   * and the window is too small to bother sending anything, then we start
102   * the TCPT_PERSIST timer.  When it expires, if the window is nonzero,
103   * we go to transmit state.  Otherwise, at intervals send a single byte
104   * into the peer's window to force him to update our window information.
105   * We do this at most as often as TCPT_PERSMIN time intervals,
106   * but no more frequently than the current estimate of round-trip
107   * packet time.  The TCPT_PERSIST timer is cleared whenever we receive
108   * a window update from the peer.
109   *
110   * The TCPT_KEEP timer is used to keep connections alive.  If an
111   * connection is idle (no segments received) for TCPTV_KEEP_INIT amount
112   * of time, but not yet established, then we drop the connection.
113   * Once the connection is established, if the connection is idle for
114   * TCPTV_KEEP_IDLE time (and keepalives have been enabled on the socket),
115   * we begin to probe the connection.  We force the peer to send us a
116   * segment by sending:
117   *	<SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
118   * This segment is (deliberately) outside the window, and should elicit
119   * an ack segment in response from the peer.  If, despite the TCPT_KEEP
120   * initiated segments we cannot elicit a response from a peer in
121   * TCPT_MAXIDLE amount of time probing, then we drop the connection.
122   *
123   * The TCPT_2MSL timer is used for keeping the conenction in Time-wait state
124   * before fully closing it so that the connection 4-tuple can be reused.
125   */
126  #ifdef BSD_KERNEL_PRIVATE
127  
128  #define TCPT_PTO        0       /* Probe timeout */
129  #define TCPT_DELAYFR    1       /* Delay recovery if there is reordering */
130  #define TCPT_REXMT      2       /* retransmit */
131  #define TCPT_DELACK     3       /* delayed ack */
132  #define TCPT_PERSIST    4       /* retransmit persistence */
133  #define TCPT_KEEP       5       /* keep alive */
134  #define TCPT_2MSL       6       /* 2*msl quiet time timer */
135  #if MPTCP
136  #define TCPT_JACK_RXMT  7       /* retransmit timer for join ack */
137  #define TCPT_CELLICON   8       /* Timer to check for cell-activity */
138  #define TCPT_MAX        8
139  #else /* MPTCP */
140  #define TCPT_MAX        6
141  #endif /* !MPTCP */
142  
143  #define TCPT_NONE       (TCPT_MAX + 1)
144  #define TCPT_NTIMERS    (TCPT_MAX + 1)
145  
146  /* External definitions */
147  #define TCPT_REXMT_EXT          0
148  #define TCPT_PERSIST_EXT        1
149  #define TCPT_KEEP_EXT           2
150  #define TCPT_2MSL_EXT           3
151  #define TCPT_DELACK_EXT         4
152  
153  #else /* !BSD_KERNEL_PRIVATE */
154  #define TCPT_REXMT      0               /* retransmit */
155  #define TCPT_PERSIST    1               /* retransmit persistence */
156  #define TCPT_KEEP       2               /* keep alive */
157  #define TCPT_2MSL       3               /* 2*msl quiet time timer */
158  #define TCPT_DELACK     4               /* delayed ack timer */
159  #if MPTCP
160  #define TCPT_JACK_RXMT  5       /* retransmit timer for join ack */
161  #define TCPT_MAX        5
162  #else /* MPTCP */
163  #define TCPT_MAX        4
164  #endif /* !MPTCP */
165  #define TCPT_NONE       (TCPT_MAX + 1)
166  #define TCPT_NTIMERS    (TCPT_MAX + 1)
167  
168  #endif /* BSD_KERNEL_PRIVATE */
169  
170  #ifdef BSD_KERNEL_PRIVATE
171  /*
172   * Time constants.
173   */
174  #define TCPTV_MSL       ( 15*TCP_RETRANSHZ)     /* max seg lifetime */
175  #define TCPTV_SRTTBASE  0       /* base roundtrip time; if 0, no idea yet */
176  #define TCPTV_RTOBASE   (  1*TCP_RETRANSHZ)     /* assumed RTO if no info */
177  #define TCPTV_SRTTDFLT  (  1*TCP_RETRANSHZ)     /* assumed RTT if no info */
178  #define TCPTV_PERSMIN   (  5*TCP_RETRANSHZ)     /* retransmit persistence */
179  #define TCPTV_PERSMAX   ( 60*TCP_RETRANSHZ)     /* maximum persist interval */
180  
181  extern int tcptv_persmin_val;
182  
183  #define TCPTV_KEEP_INIT ( 75*TCP_RETRANSHZ)     /* connect keep alive */
184  #define TCPTV_KEEP_IDLE (120*60*TCP_RETRANSHZ)  /* time before probing */
185  #define TCPTV_KEEPINTVL ( 75*TCP_RETRANSHZ)     /* default probe interval */
186  #define TCPTV_KEEPCNT   8                       /* max probes before drop */
187  
188  #define TCPTV_REXMTMAX  ( 64*TCP_RETRANSHZ )    /* max REXMT value */
189  #define TCPTV_REXMTMIN  ( TCP_RETRANSHZ/33 )    /* min REXMT for non-local connections */
190  
191  /*
192   * Window for counting received bytes to see if ack-stretching
193   * can start (default 100 ms)
194   */
195  #define TCPTV_UNACKWIN  ( TCP_RETRANSHZ/10 )
196  
197  /* Receiver idle time, avoid ack-stretching after this idle time */
198  #define TCPTV_MAXRCVIDLE (TCP_RETRANSHZ/5 )
199  
200  /*
201   * No ack stretching during slow-start, until we see some packets.
202   * By the time the receiver gets 512 packets, the senders cwnd
203   * should open by a few hundred packets consdering the
204   * slow-start progression.
205   */
206  #define TCP_RCV_SS_PKTCOUNT     512
207  
208  #define TCPTV_TWTRUNC   8               /* RTO factor to truncate TW */
209  
210  #define TCP_LINGERTIME  120             /* linger at most 2 minutes */
211  
212  #define TCP_MAXRXTSHIFT 12              /* maximum retransmits */
213  
214  #ifdef  TCPTIMERS
215  static char *tcptimers[] =
216  { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK"};
217  #endif /* TCPTIMERS */
218  
219  /*
220   * Persist, keep, 2msl and MPTCP's join-ack timer as slow timers which can
221   * be coalesced at a higher granularity (500 ms).
222   *
223   * Rexmt and delayed ack timers are considered as fast timers which run
224   * in the order of 100ms.
225   *
226   * Probe timeout is a quick timer which will run in the order of 10ms.
227   */
228  #define IS_TIMER_HZ_500MS(i)    ((i) >= TCPT_PERSIST)
229  #define IS_TIMER_HZ_100MS(i)    ((i) >= TCPT_REXMT && (i) < TCPT_PERSIST)
230  #define IS_TIMER_HZ_10MS(i)     ((i) < TCPT_REXMT)
231  
232  struct tcptimerlist;
233  
234  struct tcptimerentry {
235  	LIST_ENTRY(tcptimerentry) le;   /* links for timer list */
236  	uint32_t timer_start;   /* tcp clock when the timer was started */
237  	uint16_t index;         /* index of lowest timer that needs to run first */
238  	uint16_t mode;          /* Bit-wise OR of timers that are active */
239  	uint32_t runtime;       /* deadline at which the first timer has to fire */
240  };
241  
242  LIST_HEAD(timerlisthead, tcptimerentry);
243  
244  struct tcptimerlist {
245  	struct timerlisthead lhead;     /* head of the list */
246  	lck_mtx_t *mtx;         /* lock to protect the list */
247  	lck_attr_t *mtx_attr;   /* mutex attributes */
248  	lck_grp_t *mtx_grp;     /* mutex group definition */
249  	lck_grp_attr_t *mtx_grp_attr;   /* mutex group attributes */
250  	thread_call_t call;     /* call entry */
251  	uint32_t runtime;       /* time at which this list is going to run */
252  	uint32_t schedtime;     /* time at which this list was scheduled */
253  	uint32_t entries;       /* Number of entries on the list */
254  	uint32_t maxentries;    /* Max number of entries at any time */
255  
256  	/* Set desired mode when timer list running */
257  	boolean_t running;      /* Set when timer list is being processed */
258  	boolean_t scheduled;    /* set when the timer is scheduled */
259  #define TCP_TIMERLIST_10MS_MODE 0x1
260  #define TCP_TIMERLIST_100MS_MODE 0x2
261  #define TCP_TIMERLIST_500MS_MODE 0x4
262  	uint32_t mode;          /* Current mode of the timer */
263  	uint32_t pref_mode;     /* Preferred mode set by a connection */
264  	uint32_t pref_offset;   /* Preferred offset set by a connection */
265  	uint32_t idleruns;      /* Number of times the list has been idle in fast mode */
266  	struct tcptimerentry *next_te;  /* next timer entry pointer to process */
267  	u_int16_t probe_if_index; /* Interface index that needs to send probes */
268  };
269  
270  /* number of idle runs allowed for TCP timer list in fast or quick modes */
271  #define TCP_FASTMODE_IDLERUN_MAX 10
272  
273  /*
274   * Minimum retransmit timeout is set to 30ms. We add a slop of
275   * 200 ms to the retransmit value to account for processing
276   * variance and delayed ack. This extra 200ms will help to avoid
277   * spurious retransmits by taking into consideration the receivers
278   * that wait for delayed ack timer instead of generating an ack
279   * for every two packets.
280   *
281   * On a local link, the minimum retransmit timeout is 100ms and
282   * variance is set to 0. This will make the sender a little bit more
283   * aggressive on local link. When the connection is not established yet,
284   * there is no need to add an extra 200ms to retransmit timeout because
285   * the initial value is high (1s) and delayed ack is not a problem in
286   * that case.
287   */
288  #define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 )     /* extra 200 ms slop */
289  
290  /* macro to decide when retransmit slop (described above) should be added */
291  #define TCP_ADD_REXMTSLOP(tp) (tp->t_state >= TCPS_ESTABLISHED)
292  
293  #define TCPT_RANGESET(tv, value, tvmin, tvmax, addslop) do { \
294  	(tv) = ((addslop) ? tcp_rexmt_slop : 0) + (value); \
295  	if ((uint32_t)(tv) < (uint32_t)(tvmin)) \
296  	        (tv) = (tvmin); \
297  	else if ((uint32_t)(tv) > (uint32_t)(tvmax)) \
298  	        (tv) = (tvmax); \
299  } while(0)
300  
301  #define TCP_CONN_KEEPIDLE(tp) \
302  	((tp)->t_keepidle && \
303  	((tp)->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ? \
304  	        (tp)->t_keepidle : (uint32_t)tcp_keepidle)
305  #define TCP_CONN_KEEPINIT(tp) \
306  	(((tp)->t_keepinit > 0) ? (tp)->t_keepinit : (uint32_t)tcp_keepinit)
307  #define TCP_CONN_KEEPCNT(tp) \
308  	(((tp)->t_keepcnt > 0) ? (tp)->t_keepcnt : (uint32_t)tcp_keepcnt)
309  #define TCP_CONN_KEEPINTVL(tp) \
310  	(((tp)->t_keepintvl > 0) ? (tp)->t_keepintvl : (uint32_t)tcp_keepintvl)
311  #define TCP_CONN_MAXIDLE(tp) \
312  	(TCP_CONN_KEEPCNT(tp) * TCP_CONN_KEEPINTVL(tp))
313  
314  #define TCP_IDLETIMEOUT(tp) \
315  	(((TCP_ADD_REXMTSLOP(tp)) ? 0 : tcp_rexmt_slop) + tp->t_rxtcur)
316  
317  TAILQ_HEAD(tcptailq, tcpcb);
318  
319  extern int tcp_keepinit;        /* time to establish connection */
320  extern int tcp_keepidle;        /* time before keepalive probes begin */
321  extern int tcp_keepintvl;       /* time between keepalive probes */
322  extern int tcp_keepcnt;         /* number of keepalives */
323  extern int tcp_delack;          /* delayed ack timer */
324  extern int tcp_maxpersistidle;
325  extern int tcp_msl;
326  extern int tcp_ttl;             /* time to live for TCP segs */
327  extern int tcp_backoff[];
328  extern int tcp_rexmt_slop;
329  extern u_int32_t tcp_max_persist_timeout;       /* Maximum persistence for Zero Window Probes */
330  
331  #define OFFSET_FROM_START(tp, off) ((tcp_now + (off)) - (tp)->tentry.timer_start)
332  
333  #endif /* BSD_KERNEL_PRIVATE */
334  #endif /* !_NETINET_TCP_TIMER_H_ */