/ duct-tape / xnu / bsd / net / dlil.h
dlil.h
  1  /*
  2   * Copyright (c) 1999-2020 Apple Inc. All rights reserved.
  3   *
  4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. The rights granted to you under the License
 10   * may not be used to create, or enable the creation or redistribution of,
 11   * unlawful or unlicensed copies of an Apple operating system, or to
 12   * circumvent, violate, or enable the circumvention or violation of, any
 13   * terms of an Apple operating system software license agreement.
 14   *
 15   * Please obtain a copy of the License at
 16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
 17   *
 18   * The Original Code and all software distributed under the License are
 19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 23   * Please see the License for the specific language governing rights and
 24   * limitations under the License.
 25   *
 26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 27   */
 28  #ifndef DLIL_H
 29  #define DLIL_H
 30  #ifdef KERNEL
 31  
 32  #include <sys/kernel_types.h>
 33  #include <net/kpi_interface.h>
 34  
 35  enum {
 36  	BPF_TAP_DISABLE,
 37  	BPF_TAP_INPUT,
 38  	BPF_TAP_OUTPUT,
 39  	BPF_TAP_INPUT_OUTPUT
 40  };
 41  
 42  /*
 43   * DLIL_DESC_ETYPE2 - native_type must point to 2 byte ethernet raw protocol,
 44   *                    variants.native_type_length must be set to 2
 45   * DLIL_DESC_SAP - native_type must point to 3 byte SAP protocol
 46   *                 variants.native_type_length must be set to 3
 47   * DLIL_DESC_SNAP - native_type must point to 5 byte SNAP protocol
 48   *                  variants.native_type_length must be set to 5
 49   *
 50   * All protocols must be in Network byte order.
 51   *
 52   * Future interface families may define more protocol types they know about.
 53   * The type implies the offset and context of the protocol data at native_type.
 54   * The length of the protocol data specified at native_type must be set in
 55   * variants.native_type_length.
 56   */
 57  /* Ethernet specific types */
 58  #define DLIL_DESC_ETYPE2        4
 59  #define DLIL_DESC_SAP           5
 60  #define DLIL_DESC_SNAP          6
 61  
 62  #ifdef KERNEL_PRIVATE
 63  #include <net/if.h>
 64  #include <net/if_var.h>
 65  #include <net/classq/classq.h>
 66  #include <net/flowadv.h>
 67  #include <sys/kern_event.h>
 68  #include <kern/thread.h>
 69  #include <kern/locks.h>
 70  
 71  #ifdef BSD_KERNEL_PRIVATE
 72  /* Operations on timespecs. */
 73  #define net_timerclear(tvp)     (tvp)->tv_sec = (tvp)->tv_nsec = 0
 74  
 75  #define net_timerisset(tvp)     ((tvp)->tv_sec || (tvp)->tv_nsec)
 76  
 77  #define net_timercmp(tvp, uvp, cmp)                                     \
 78  	(((tvp)->tv_sec == (uvp)->tv_sec) ?                             \
 79  	((tvp)->tv_nsec cmp (uvp)->tv_nsec) :                           \
 80  	((tvp)->tv_sec cmp (uvp)->tv_sec))
 81  
 82  #define net_timeradd(tvp, uvp, vvp) do {                                \
 83  	(vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec;                  \
 84  	(vvp)->tv_nsec = (tvp)->tv_nsec + (uvp)->tv_nsec;               \
 85  	if ((vvp)->tv_nsec >= (long)NSEC_PER_SEC) {                     \
 86  	        (vvp)->tv_sec++;                                        \
 87  	        (vvp)->tv_nsec -= NSEC_PER_SEC;                         \
 88  	}                                                               \
 89  } while (0)
 90  
 91  #define net_timersub(tvp, uvp, vvp) do {                                \
 92  	(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;                  \
 93  	(vvp)->tv_nsec = (tvp)->tv_nsec - (uvp)->tv_nsec;               \
 94  	if ((vvp)->tv_nsec < 0) {                                       \
 95  	        (vvp)->tv_sec--;                                        \
 96  	        (vvp)->tv_nsec += NSEC_PER_SEC;                         \
 97  	}                                                               \
 98  } while (0)
 99  
100  #define net_timerusec(tvp, nsp) do {                                    \
101  	*(nsp) = (tvp)->tv_nsec / NSEC_PER_USEC;                        \
102  	if ((tvp)->tv_sec > 0)                                          \
103  	        *(nsp) += ((tvp)->tv_sec * USEC_PER_SEC);               \
104  } while (0)
105  
106  #define net_timernsec(tvp, nsp) do {                                    \
107  	*(nsp) = (tvp)->tv_nsec;                                        \
108  	if ((tvp)->tv_sec > 0)                                          \
109  	        *(nsp) += ((tvp)->tv_sec * NSEC_PER_SEC);               \
110  } while (0)
111  
112  #if defined(__x86_64__) || defined(__arm64__)
113  #define net_nsectimer(nsp, tvp) do {                                    \
114  	u_int64_t __nsp = *(nsp);                                       \
115  	net_timerclear(tvp);                                            \
116  	uint64_t __sec = __nsp / NSEC_PER_SEC;                          \
117  	(tvp)->tv_sec = (__darwin_time_t)__sec;                         \
118  	(tvp)->tv_nsec = (long)(__nsp - __sec * NSEC_PER_SEC);          \
119  } while (0)
120  #else /* 32 bit */
121  /*
122   * NSEC needs to be < 2^31*10^9 to be representable in a struct timespec
123   * because __darwin_time_t is 32 bit on 32-bit platforms. This bound
124   * is < 2^61. We get a first approximation to convert into seconds using
125   * the following values.
126   * a = floor(NSEC / 2^29)
127   * inv = floor(2^61 / 10^9)
128   *
129   * The approximation of seconds is correct or too low by 1 unit.
130   * So we fix it by computing the remainder.
131   */
132  #define net_nsectimer(nsp, tvp) do {                                    \
133  	u_int64_t __nsp = *(nsp);                                       \
134  	net_timerclear(tvp);                                            \
135  	uint32_t __a = (uint32_t)(__nsp >> 29);                         \
136  	const uint32_t __inv = 0x89705F41;                              \
137  	uint32_t __sec = (uint32_t)(((uint64_t)__a * __inv) >> 32);     \
138  	uint32_t __rem = (uint32_t)(__nsp - __sec * NSEC_PER_SEC);      \
139  	__sec += ((__rem >= NSEC_PER_SEC) ? 1 : 0);                     \
140  	(tvp)->tv_sec = (__darwin_time_t)__sec;                         \
141  	(tvp)->tv_nsec =                                                \
142  	    (long)((__rem >= NSEC_PER_SEC) ? (__rem - NSEC_PER_SEC) : __rem);   \
143  } while(0)
144  #endif /* 32 bit */
145  
146  struct ifnet;
147  struct mbuf;
148  struct ether_header;
149  struct sockaddr_dl;
150  struct iff_filter;
151  
152  #define DLIL_THREADNAME_LEN     32
153  
154  /*
155   * DLIL threading info
156   */
157  struct dlil_threading_info {
158  	decl_lck_mtx_data(, dlth_lock);
159  	class_queue_t   dlth_pkts;      /* queue of pkts */
160  	struct ifnet    *dlth_ifp;      /* pointer to interface */
161  	struct ifnet_stat_increment_param dlth_stats; /* incremental stats */
162  	uint32_t       dlth_flags;      /* thread flags (see below) */
163  	uint32_t       dlth_wtot;       /* # of wakeup requests */
164  
165  	/* strategy (sync or async) */
166  	errno_t (*dlth_strategy)(struct dlil_threading_info *,
167  	    struct ifnet *, struct mbuf *, struct mbuf *,
168  	    const struct ifnet_stat_increment_param *, boolean_t,
169  	    struct thread *);
170  
171  	/*
172  	 * Thread affinity (workloop and DLIL threads).
173  	 */
174  	boolean_t       dlth_affinity;          /* affinity set is available */
175  	uint32_t        dlth_affinity_tag;      /* affinity tag */
176  	struct thread   *dlth_thread;           /* DLIL worker thread */
177  	struct thread   *dlth_driver_thread;    /* driver/workloop thread */
178  	struct thread   *dlth_poller_thread;    /* poll thread */
179  
180  	lck_grp_t       *dlth_lock_grp; /* lock group (for lock stats) */
181  	char            dlth_name[DLIL_THREADNAME_LEN]; /* name storage */
182  
183  #if IFNET_INPUT_SANITY_CHK
184  	/*
185  	 * For debugging.
186  	 */
187  	uint64_t        dlth_pkts_cnt;          /* total # of packets */
188  #endif
189  };
190  
191  /*
192   * DLIL input thread info (for main/loopback input thread)
193   */
194  struct dlil_main_threading_info {
195  	struct dlil_threading_info      inp;
196  	class_queue_t                   lo_rcvq_pkts; /* queue of lo0 pkts */
197  };
198  
199  /*
200   * Valid values for dlth_flags.
201   *
202   * The following are shared with kpi_protocol.c so that it may wakeup
203   * the input thread to run through packets queued for protocol input.
204   */
205  #define DLIL_INPUT_RUNNING              0x80000000
206  #define DLIL_INPUT_WAITING              0x40000000
207  #define DLIL_PROTO_REGISTER             0x20000000
208  #define DLIL_PROTO_WAITING              0x10000000
209  #define DLIL_INPUT_TERMINATE            0x08000000
210  #define DLIL_INPUT_TERMINATE_COMPLETE   0x04000000
211  #define DLIL_INPUT_EMBRYONIC            0x00000001
212  
213  /*
214   * Flags for dlil_attach_filter()
215   */
216  #define DLIL_IFF_TSO            0x01    /* Interface filter supports TSO */
217  #define DLIL_IFF_INTERNAL       0x02    /* Apple internal -- do not count towards stats */
218  
219  /* Input poll interval definitions */
220  #define IF_RXPOLL_INTERVALTIME_MIN      (1ULL * 1000)           /* 1 us */
221  #define IF_RXPOLL_INTERVALTIME          (1ULL * 1000 * 1000)    /* 1 ms */
222  
223  extern int dlil_verbose;
224  extern uint32_t hwcksum_dbg;
225  extern uint32_t hwcksum_tx;
226  extern uint32_t hwcksum_rx;
227  extern struct dlil_threading_info *dlil_main_input_thread;
228  extern unsigned int net_rxpoll;
229  extern uint32_t if_rxpoll;
230  extern uint32_t if_rxpoll_decay;
231  extern uint32_t if_rxpoll_interval_pkts;
232  extern uint32_t if_rcvq_maxlen;
233  
234  extern void dlil_init(void);
235  
236  extern errno_t ifp_if_ioctl(struct ifnet *, unsigned long, void *);
237  extern errno_t ifp_if_output(struct ifnet *, struct mbuf *);
238  extern void ifp_if_start(struct ifnet *);
239  
240  extern errno_t dlil_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
241  
242  /*
243   * Send arp internal bypasses the check for IPv4LL.
244   */
245  extern errno_t dlil_send_arp_internal(ifnet_t, u_int16_t,
246      const struct sockaddr_dl *, const struct sockaddr *,
247      const struct sockaddr_dl *, const struct sockaddr *);
248  
249  /*
250   * The following constants are used with the net_thread_mark_apply and
251   * net_thread_is_unmarked functions to control the bits in the uu_network_marks
252   * field of the uthread structure.
253   */
254  #define NET_THREAD_HELD_PF      0x1     /* thread is holding PF lock */
255  #define NET_THREAD_HELD_DOMAIN  0x2     /* thread is holding domain_proto_mtx */
256  #define NET_THREAD_CKREQ_LLADDR 0x4     /* thread reqs MACF check for LLADDR */
257  
258  /*
259   * net_thread_marks_t is a pointer to a phantom structure type used for
260   * manipulating the uthread:uu_network_marks field.  As an example...
261   *
262   *   static const u_int32_t bits = NET_THREAD_CKREQ_LLADDR;
263   *   struct uthread *uth = get_bsdthread_info(current_thread());
264   *
265   *   net_thread_marks_t marks = net_thread_marks_push(bits);
266   *   VERIFY((uth->uu_network_marks & NET_THREAD_CKREQ_LLADDR) != 0);
267   *   net_thread_marks_pop(marks);
268   *
269   * The net_thread_marks_push() function returns an encoding of the bits
270   * that were changed from zero to one in the uu_network_marks field. When
271   * the net_thread_marks_pop() function later processes that value, it
272   * resets the bits to their previous value.
273   *
274   * The net_thread_unmarks_push() and net_thread_unmarks_pop() functions
275   * are similar to net_thread_marks_push() and net_thread_marks_pop() except
276   * they clear the marks bits in the guarded section rather than set them.
277   *
278   * The net_thread_is_marked() and net_thread_is_unmarked() functions return
279   * the subset of the bits that are currently set or cleared (respectively)
280   * in the uthread:uu_network_marks field.
281   *
282   * Finally, the value of the net_thread_marks_none constant is provided for
283   * comparing for equality with the value returned when no bits in the marks
284   * field are changed by the push.
285   *
286   * It is not significant that a value of type net_thread_marks_t may
287   * compare as equal to the NULL pointer.
288   */
289  struct net_thread_marks;
290  typedef const struct net_thread_marks *net_thread_marks_t;
291  
292  extern const net_thread_marks_t net_thread_marks_none;
293  
294  extern net_thread_marks_t net_thread_marks_push(u_int32_t);
295  extern net_thread_marks_t net_thread_unmarks_push(u_int32_t);
296  extern void net_thread_marks_pop(net_thread_marks_t);
297  extern void net_thread_unmarks_pop(net_thread_marks_t);
298  extern u_int32_t net_thread_is_marked(u_int32_t);
299  extern u_int32_t net_thread_is_unmarked(u_int32_t);
300  
301  extern int dlil_output(ifnet_t, protocol_family_t, mbuf_t, void *,
302      const struct sockaddr *, int, struct flowadv *);
303  
304  extern void dlil_input_packet_list(struct ifnet *, struct mbuf *);
305  extern void dlil_input_packet_list_extended(struct ifnet *, struct mbuf *,
306      u_int32_t, ifnet_model_t);
307  
308  extern errno_t dlil_resolve_multi(struct ifnet *,
309      const struct sockaddr *, struct sockaddr *, size_t);
310  
311  extern errno_t dlil_send_arp(ifnet_t, u_int16_t, const struct sockaddr_dl *,
312      const struct sockaddr *, const struct sockaddr_dl *,
313      const struct sockaddr *, u_int32_t);
314  
315  extern int dlil_attach_filter(ifnet_t, const struct iff_filter *,
316      interface_filter_t *, u_int32_t);
317  extern void dlil_detach_filter(interface_filter_t);
318  extern boolean_t dlil_has_ip_filter(void);
319  extern boolean_t dlil_has_if_filter(struct ifnet *);
320  
321  extern void dlil_proto_unplumb_all(ifnet_t);
322  
323  extern int dlil_post_msg(struct ifnet *, u_int32_t, u_int32_t,
324      struct net_event_data *, u_int32_t);
325  
326  extern void dlil_post_sifflags_msg(struct ifnet *);
327  
328  extern int dlil_post_complete_msg(struct ifnet *, struct kev_msg *);
329  
330  extern int dlil_alloc_local_stats(struct ifnet *);
331  
332  extern void ifnet_filter_update_tso(boolean_t filter_enable);
333  extern errno_t dlil_rxpoll_validate_params(struct ifnet_poll_params *);
334  extern void dlil_rxpoll_update_params(struct ifnet *,
335      struct ifnet_poll_params *);
336  extern void ifnet_poll(struct ifnet *);
337  extern errno_t ifnet_input_poll(struct ifnet *, struct mbuf *,
338      struct mbuf *, const struct ifnet_stat_increment_param *);
339  
340  
341  /*
342   * dlil_if_acquire is obsolete. Use ifnet_allocate.
343   */
344  extern int dlil_if_acquire(u_int32_t, const void *, size_t, const char *, struct ifnet **);
345  /*
346   * dlil_if_release is obsolete. The equivalent is called automatically when
347   * an interface is detached.
348   */
349  extern void dlil_if_release(struct ifnet *ifp);
350  
351  extern errno_t dlil_if_ref(struct ifnet *);
352  extern errno_t dlil_if_free(struct ifnet *);
353  
354  extern int dlil_node_present(struct ifnet *, struct sockaddr *, int32_t, int,
355      int, u_int8_t[48]);
356  extern void dlil_node_absent(struct ifnet *, struct sockaddr *);
357  extern int dlil_node_present_v2(struct ifnet *, struct sockaddr *, struct sockaddr_dl *, int32_t, int,
358      int, u_int8_t[48]);
359  
360  extern const void *dlil_ifaddr_bytes(const struct sockaddr_dl *, size_t *,
361      kauth_cred_t *);
362  
363  extern void dlil_report_issues(struct ifnet *, u_int8_t[DLIL_MODIDLEN],
364      u_int8_t[DLIL_MODARGLEN]);
365  
366  #define PROTO_HASH_SLOTS        5
367  
368  extern int proto_hash_value(u_int32_t);
369  
370  extern const char *dlil_kev_dl_code_str(u_int32_t);
371  
372  extern errno_t dlil_rxpoll_set_params(struct ifnet *,
373      struct ifnet_poll_params *, boolean_t);
374  extern errno_t dlil_rxpoll_get_params(struct ifnet *,
375      struct ifnet_poll_params *);
376  
377  extern errno_t dlil_output_handler(struct ifnet *, struct mbuf *);
378  extern errno_t dlil_input_handler(struct ifnet *, struct mbuf *,
379      struct mbuf *, const struct ifnet_stat_increment_param *,
380      boolean_t, struct thread *);
381  
382  
383  /*
384   * This is mostly called from the context of the DLIL input thread;
385   * because of that there is no need for atomic operations.
386   */
387  __attribute__((always_inline))
388  static inline void
389  ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
390  {
391  	if (!(m->m_flags & M_PKTHDR)) {
392  		return;
393  	}
394  
395  	switch (m_get_traffic_class(m)) {
396  	case MBUF_TC_BE:
397  		ifp->if_tc.ifi_ibepackets++;
398  		ifp->if_tc.ifi_ibebytes += (u_int64_t)m->m_pkthdr.len;
399  		break;
400  	case MBUF_TC_BK:
401  		ifp->if_tc.ifi_ibkpackets++;
402  		ifp->if_tc.ifi_ibkbytes += (u_int64_t)m->m_pkthdr.len;
403  		break;
404  	case MBUF_TC_VI:
405  		ifp->if_tc.ifi_ivipackets++;
406  		ifp->if_tc.ifi_ivibytes += (u_int64_t)m->m_pkthdr.len;
407  		break;
408  	case MBUF_TC_VO:
409  		ifp->if_tc.ifi_ivopackets++;
410  		ifp->if_tc.ifi_ivobytes += (u_int64_t)m->m_pkthdr.len;
411  		break;
412  	default:
413  		break;
414  	}
415  
416  	if (mbuf_is_traffic_class_privileged(m)) {
417  		ifp->if_tc.ifi_ipvpackets++;
418  		ifp->if_tc.ifi_ipvbytes += (u_int64_t)m->m_pkthdr.len;
419  	}
420  }
421  
422  /*
423   * This is called from DLIL output, hence multiple threads could end
424   * up modifying the statistics.  We trade off acccuracy for performance
425   * by not using atomic operations here.
426   */
427  __attribute__((always_inline))
428  static inline void
429  ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
430  {
431  	if (!(m->m_flags & M_PKTHDR)) {
432  		return;
433  	}
434  
435  	switch (m_get_traffic_class(m)) {
436  	case MBUF_TC_BE:
437  		ifp->if_tc.ifi_obepackets++;
438  		ifp->if_tc.ifi_obebytes += (u_int64_t)m->m_pkthdr.len;
439  		break;
440  	case MBUF_TC_BK:
441  		ifp->if_tc.ifi_obkpackets++;
442  		ifp->if_tc.ifi_obkbytes += (u_int64_t)m->m_pkthdr.len;
443  		break;
444  	case MBUF_TC_VI:
445  		ifp->if_tc.ifi_ovipackets++;
446  		ifp->if_tc.ifi_ovibytes += (u_int64_t)m->m_pkthdr.len;
447  		break;
448  	case MBUF_TC_VO:
449  		ifp->if_tc.ifi_ovopackets++;
450  		ifp->if_tc.ifi_ovobytes += (u_int64_t)m->m_pkthdr.len;
451  		break;
452  	default:
453  		break;
454  	}
455  
456  	if (mbuf_is_traffic_class_privileged(m)) {
457  		ifp->if_tc.ifi_opvpackets++;
458  		ifp->if_tc.ifi_opvbytes += (u_int64_t)m->m_pkthdr.len;
459  	}
460  }
461  #endif /* BSD_KERNEL_PRIVATE */
462  #endif /* KERNEL_PRIVATE */
463  #endif /* KERNEL */
464  #endif /* DLIL_H */