/ duct-tape / pthread / kern_synch.c
kern_synch.c
   1  /*
   2   * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3   *
   4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5   *
   6   * This file contains Original Code and/or Modifications of Original Code
   7   * as defined in and that are subject to the Apple Public Source License
   8   * Version 2.0 (the 'License'). You may not use this file except in
   9   * compliance with the License. The rights granted to you under the License
  10   * may not be used to create, or enable the creation or redistribution of,
  11   * unlawful or unlicensed copies of an Apple operating system, or to
  12   * circumvent, violate, or enable the circumvention or violation of, any
  13   * terms of an Apple operating system software license agreement.
  14   *
  15   * Please obtain a copy of the License at
  16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17   *
  18   * The Original Code and all software distributed under the License are
  19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23   * Please see the License for the specific language governing rights and
  24   * limitations under the License.
  25   *
  26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27   */
  28  /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
  29  /*
  30   *	pthread_support.c
  31   */
  32  
  33  #ifdef __DARLING__
  34  #undef _NLINK_T
  35  #include <sys/proc.h>
  36  #include <net/if_var.h>
  37  #include "kern_internal.h"
  38  
  39  #include <darlingserver/duct-tape/log.h>
  40  #endif // __DARLING__
  41  
  42  #include <sys/param.h>
  43  #include <sys/queue.h>
  44  #include <sys/resourcevar.h>
  45  //#include <sys/proc_internal.h>
  46  #include <sys/kauth.h>
  47  #include <sys/systm.h>
  48  #include <sys/timeb.h>
  49  #include <sys/times.h>
  50  #include <sys/time.h>
  51  #include <sys/acct.h>
  52  #include <sys/kernel.h>
  53  #include <sys/wait.h>
  54  #include <sys/signalvar.h>
  55  #include <sys/syslog.h>
  56  #include <sys/stat.h>
  57  #include <sys/lock.h>
  58  #include <sys/kdebug.h>
  59  //#include <sys/sysproto.h>
  60  //#include <sys/pthread_internal.h>
  61  #include <sys/vm.h>
  62  #include <sys/user.h>
  63  
  64  #include <mach/mach_types.h>
  65  #include <mach/vm_prot.h>
  66  #include <mach/semaphore.h>
  67  #include <mach/sync_policy.h>
  68  #include <mach/task.h>
  69  #include <kern/kern_types.h>
  70  #include <kern/task.h>
  71  #include <kern/clock.h>
  72  #include <mach/kern_return.h>
  73  #include <kern/thread.h>
  74  #include <kern/sched_prim.h>
  75  #include <kern/thread_call.h>
  76  #include <kern/kalloc.h>
  77  #include <kern/zalloc.h>
  78  #include <kern/sched_prim.h>
  79  #include <kern/processor.h>
  80  #include <kern/block_hint.h>
  81  #include <kern/turnstile.h>
  82  //#include <kern/mach_param.h>
  83  #include <mach/mach_vm.h>
  84  #include <mach/mach_param.h>
  85  #include <mach/thread_policy.h>
  86  #include <mach/message.h>
  87  #include <mach/port.h>
  88  //#include <vm/vm_protos.h>
  89  #include <vm/vm_map.h>
  90  #include <mach/vm_region.h>
  91  
  92  #include <libkern/OSAtomic.h>
  93  
  94  #include <pexpert/pexpert.h>
  95  
  96  #include "kern_internal.h"
  97  #include "synch_internal.h"
  98  #include "kern_trace.h"
  99  
 100  typedef struct uthread *uthread_t;
 101  
 102  //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
 103  
 104  #ifdef __DARLING__
 105  #define __FAILEDUSERTEST__(s) do { dtape_log_error("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
 106  #define __FAILEDUSERTEST2__(s, x...) do { dtape_log_error("PSYNCH: pid[%d]: " s "\n", proc_pid(current_proc()), x); } while (0)
 107  #else
 108  #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
 109  #define __FAILEDUSERTEST2__(s, x...) do { printf("PSYNCH: pid[%d]: " s "\n", proc_pid(current_proc()), x); } while (0)
 110  #endif // __DARLING__
 111  
 112  lck_mtx_t *pthread_list_mlock;
 113  
 114  #define PTH_HASHSIZE 100
 115  
 116  static LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl;
 117  static unsigned long pthhash;
 118  
 119  static LIST_HEAD(, ksyn_wait_queue) pth_free_list;
 120  
 121  static zone_t kwq_zone; /* zone for allocation of ksyn_queue */
 122  static zone_t kwe_zone;	/* zone for allocation of ksyn_waitq_element */
 123  
 124  #define SEQFIT 0
 125  #define FIRSTFIT 1
 126  
 127  struct ksyn_queue {
 128  	TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist;
 129  	uint32_t	ksynq_count;		/* number of entries in queue */
 130  	uint32_t	ksynq_firstnum;		/* lowest seq in queue */
 131  	uint32_t	ksynq_lastnum;		/* highest seq in queue */
 132  };
 133  typedef struct ksyn_queue *ksyn_queue_t;
 134  
 135  typedef enum {
 136  	KSYN_QUEUE_READ = 0,
 137  	KSYN_QUEUE_WRITE,
 138  	KSYN_QUEUE_MAX,
 139  } kwq_queue_type_t;
 140  
 141  typedef enum {
 142  	KWQ_INTR_NONE = 0,
 143  	KWQ_INTR_READ = 0x1,
 144  	KWQ_INTR_WRITE = 0x2,
 145  } kwq_intr_type_t;
 146  
 147  struct ksyn_wait_queue {
 148  	LIST_ENTRY(ksyn_wait_queue) kw_hash;
 149  	LIST_ENTRY(ksyn_wait_queue) kw_list;
 150  	user_addr_t kw_addr;
 151  	thread_t kw_owner;		/* current owner or THREAD_NULL, has a +1 */
 152  	uint64_t kw_object;		/* object backing in shared mode */
 153  	uint64_t kw_offset;		/* offset inside the object in shared mode */
 154  	int	kw_pflags;		/* flags under listlock protection */
 155  	struct timeval kw_ts;		/* timeval need for upkeep before free */
 156  	int	kw_iocount;		/* inuse reference */
 157  	int 	kw_dropcount;		/* current users unlocking... */
 158  	
 159  	int	kw_type;		/* queue type like mutex, cvar, etc */
 160  	uint32_t kw_inqueue;		/* num of waiters held */
 161  	uint32_t kw_fakecount;		/* number of error/prepost fakes */
 162  	uint32_t kw_highseq;		/* highest seq in the queue */
 163  	uint32_t kw_lowseq;		/* lowest seq in the queue */
 164  	uint32_t kw_lword;		/* L value from userland */
 165  	uint32_t kw_uword;		/* U world value from userland */
 166  	uint32_t kw_sword;		/* S word value from userland */
 167  	uint32_t kw_lastunlockseq;	/* the last seq that unlocked */
 168  	/* for CV to be used as the seq kernel has seen so far */
 169  #define kw_cvkernelseq kw_lastunlockseq
 170  	uint32_t kw_lastseqword;		/* the last seq that unlocked */
 171  	/* for mutex and cvar we need to track I bit values */
 172  	uint32_t kw_nextseqword;	/* the last seq that unlocked; with num of waiters */
 173  	struct {
 174  		uint32_t count; /* prepost count */
 175  		uint32_t lseq; /* prepost target seq */
 176  		uint32_t sseq; /* prepost target sword, in cvar used for mutexowned */
 177  	} kw_prepost;
 178  	struct {
 179  		kwq_intr_type_t type; /* type of failed wakueps */
 180  		uint32_t count; /* prepost of missed wakeup due to intrs */
 181  		uint32_t seq; /* prepost of missed wakeup limit seq */
 182  		uint32_t returnbits; /* return bits value for missed wakeup threads */
 183  	} kw_intr;
 184  	
 185  	int 	kw_kflags;
 186  	int		kw_qos_override;	/* QoS of max waiter during contention period */
 187  	struct turnstile *kw_turnstile;
 188  	struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX];	/* queues to hold threads */
 189  	lck_spin_t kw_lock;		/* spinlock protecting this structure */
 190  };
 191  typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
 192  
 193  #define TID_ZERO (uint64_t)0
 194  
 195  /* bits needed in handling the rwlock unlock */
 196  #define PTH_RW_TYPE_READ	0x01
 197  #define PTH_RW_TYPE_WRITE	0x04
 198  #define PTH_RW_TYPE_MASK	0xff
 199  #define PTH_RW_TYPE_SHIFT	8
 200  
 201  #define PTH_RWSHFT_TYPE_READ	0x0100
 202  #define PTH_RWSHFT_TYPE_WRITE	0x0400
 203  #define PTH_RWSHFT_TYPE_MASK	0xff00
 204  
 205  /*
 206   * Mutex pshared attributes
 207   */
 208  #define PTHREAD_PROCESS_SHARED		_PTHREAD_MTX_OPT_PSHARED
 209  #define PTHREAD_PROCESS_PRIVATE		0x20
 210  #define PTHREAD_PSHARED_FLAGS_MASK	0x30
 211  
 212  /*
 213   * Mutex policy attributes
 214   */
 215  #define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE	0x040	/* 1 */
 216  #define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT	0x080	/* 2 */
 217  #define _PTHREAD_MTX_OPT_POLICY_MASK		0x1c0
 218  
 219  /* pflags */
 220  #define KSYN_WQ_INHASH	2
 221  #define KSYN_WQ_SHARED	4
 222  #define KSYN_WQ_WAITING 8	/* threads waiting for this wq to be available */
 223  #define KSYN_WQ_FLIST 	0X10	/* in free list to be freed after a short delay */
 224  
 225  /* kflags */
 226  #define KSYN_KWF_INITCLEARED	0x1	/* the init status found and preposts cleared */
 227  #define KSYN_KWF_ZEROEDOUT	0x2	/* the lword, etc are inited to 0 */
 228  #define KSYN_KWF_QOS_APPLIED	0x4	/* QoS override applied to owner */
 229  #define KSYN_KWF_OVERLAP_GUARD	0x8	/* overlap guard */
 230  
 231  #define KSYN_CLEANUP_DEADLINE 10
 232  static int psynch_cleanupset;
 233  thread_call_t psynch_thcall;
 234  
 235  #define KSYN_WQTYPE_INWAIT	0x1000
 236  #define KSYN_WQTYPE_INDROP	0x2000
 237  #define KSYN_WQTYPE_MTX		0x01
 238  #define KSYN_WQTYPE_CVAR	0x02
 239  #define KSYN_WQTYPE_RWLOCK	0x04
 240  #define KSYN_WQTYPE_SEMA	0x08
 241  #define KSYN_WQTYPE_MASK	0xff
 242  
 243  #define KSYN_WQTYPE_MUTEXDROP	(KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
 244  
 245  static inline int
 246  _kwq_type(ksyn_wait_queue_t kwq)
 247  {
 248  	return (kwq->kw_type & KSYN_WQTYPE_MASK);
 249  }
 250  
 251  static inline bool
 252  _kwq_use_turnstile(ksyn_wait_queue_t kwq)
 253  {
 254  	// <rdar://problem/15926625> If we had writer-owner information from the
 255  	// rwlock then we could use the turnstile to push on it. For now, only
 256  	// plain mutexes use it.
 257  	return (_kwq_type(kwq) == KSYN_WQTYPE_MTX);
 258  }
 259  
 260  #define KW_UNLOCK_PREPOST 		0x01
 261  #define KW_UNLOCK_PREPOST_READLOCK 	0x08
 262  #define KW_UNLOCK_PREPOST_WRLOCK 	0x20
 263  
 264  static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t object, uint64_t offset);
 265  static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
 266  static void ksyn_wqrelease(ksyn_wait_queue_t mkwq, int qfreenow, int wqtype);
 267  static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp);
 268  
 269  static int _wait_result_to_errno(wait_result_t result);
 270  
 271  static int ksyn_wait(ksyn_wait_queue_t, kwq_queue_type_t, uint32_t, int, uint64_t, uint16_t, thread_continue_t, block_hint_t);
 272  static kern_return_t ksyn_signal(ksyn_wait_queue_t, kwq_queue_type_t, ksyn_waitq_element_t, uint32_t);
 273  static void ksyn_freeallkwe(ksyn_queue_t kq);
 274  
 275  static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t, thread_t *);
 276  
 277  static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
 278  
 279  static void ksyn_queue_init(ksyn_queue_t kq);
 280  static int ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int firstfit);
 281  static void ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe);
 282  static void ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all);
 283  
 284  static void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq);
 285  static uint32_t find_nextlowseq(ksyn_wait_queue_t kwq);
 286  static uint32_t find_nexthighseq(ksyn_wait_queue_t kwq);
 287  static int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp);
 288  
 289  static uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto);
 290  
 291  static ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen);
 292  static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep);
 293  static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
 294  static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
 295  
 296  static void __dead2 psynch_cvcontinue(void *, wait_result_t);
 297  static void __dead2 psynch_mtxcontinue(void *, wait_result_t);
 298  static void __dead2 psynch_rw_rdcontinue(void *, wait_result_t);
 299  static void __dead2 psynch_rw_wrcontinue(void *, wait_result_t);
 300  
 301  static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
 302  static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
 303  static ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq);
 304  
 305  static void
 306  UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc)
 307  {
 308  	int sinit = ((rw_wc & PTH_RWS_CV_CBIT) != 0);
 309  	
 310  	// assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
 311  	
 312  	if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) {
 313  		/* the values of L,U and S are cleared out due to L==S in previous transition */
 314  		kwq->kw_lword = mgen;
 315  		kwq->kw_uword = ugen;
 316  		kwq->kw_sword = rw_wc;
 317  		kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT;
 318  	} else {
 319  		if (is_seqhigher(mgen, kwq->kw_lword)) {
 320  			kwq->kw_lword = mgen;
 321  		}
 322  		if (is_seqhigher(ugen, kwq->kw_uword)) {
 323  			kwq->kw_uword = ugen;
 324  		}
 325  		if (sinit && is_seqhigher(rw_wc, kwq->kw_sword)) {
 326  			kwq->kw_sword = rw_wc;
 327  		}
 328  	}
 329  	if (sinit && is_seqlower(kwq->kw_cvkernelseq, rw_wc)) {
 330  		kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK);
 331  	}
 332  }
 333  
 334  static inline void
 335  _kwq_clear_preposted_wakeup(ksyn_wait_queue_t kwq)
 336  {
 337  	kwq->kw_prepost.lseq = 0;
 338  	kwq->kw_prepost.sseq = PTHRW_RWS_INIT;
 339  	kwq->kw_prepost.count = 0;
 340  }
 341  
 342  static inline void
 343  _kwq_mark_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t count,
 344  		uint32_t lseq, uint32_t sseq)
 345  {
 346  	kwq->kw_prepost.count = count;
 347  	kwq->kw_prepost.lseq = lseq;
 348  	kwq->kw_prepost.sseq = sseq;
 349  }
 350  
 351  static inline void
 352  _kwq_clear_interrupted_wakeup(ksyn_wait_queue_t kwq)
 353  {
 354  	kwq->kw_intr.type = KWQ_INTR_NONE;
 355  	kwq->kw_intr.count = 0;
 356  	kwq->kw_intr.seq = 0;
 357  	kwq->kw_intr.returnbits = 0;
 358  }
 359  
 360  static inline void
 361  _kwq_mark_interruped_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
 362  		uint32_t count, uint32_t lseq, uint32_t returnbits)
 363  {
 364  	kwq->kw_intr.count = count;
 365  	kwq->kw_intr.seq = lseq;
 366  	kwq->kw_intr.returnbits = returnbits;
 367  	kwq->kw_intr.type = type;
 368  }
 369  
 370  static void
 371  _kwq_destroy(ksyn_wait_queue_t kwq)
 372  {
 373  	if (kwq->kw_owner) {
 374  		thread_deallocate(kwq->kw_owner);
 375  	}
 376  	lck_spin_destroy(&kwq->kw_lock, pthread_lck_grp);
 377  	zfree(kwq_zone, kwq);
 378  }
 379  
 380  #define KWQ_SET_OWNER_TRANSFER_REF  0x1
 381  
 382  static inline thread_t
 383  _kwq_set_owner(ksyn_wait_queue_t kwq, thread_t new_owner, int flags)
 384  {
 385  	thread_t old_owner = kwq->kw_owner;
 386  	if (old_owner == new_owner) {
 387  		if (flags & KWQ_SET_OWNER_TRANSFER_REF) return new_owner;
 388  		return THREAD_NULL;
 389  	}
 390  	if ((flags & KWQ_SET_OWNER_TRANSFER_REF) == 0) {
 391  		thread_reference(new_owner);
 392  	}
 393  	kwq->kw_owner = new_owner;
 394  	return old_owner;
 395  }
 396  
 397  static inline thread_t
 398  _kwq_clear_owner(ksyn_wait_queue_t kwq)
 399  {
 400  	return _kwq_set_owner(kwq, THREAD_NULL, KWQ_SET_OWNER_TRANSFER_REF);
 401  }
 402  
 403  static inline void
 404  _kwq_cleanup_old_owner(thread_t *thread)
 405  {
 406  	if (*thread) {
 407  		thread_deallocate(*thread);
 408  		*thread = THREAD_NULL;
 409  	}
 410  }
 411  
 412  static void
 413  CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
 414  {
 415  	if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
 416  		if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
 417  			panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
 418  		}
 419  	};
 420  	if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
 421  		kwq->kw_nextseqword = PTHRW_RWS_INIT;
 422  		kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
 423  	};
 424  	_kwq_clear_preposted_wakeup(kwq);
 425  	kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
 426  	kwq->kw_lastseqword = PTHRW_RWS_INIT;
 427  	_kwq_clear_interrupted_wakeup(kwq);
 428  	kwq->kw_lword = 0;
 429  	kwq->kw_uword = 0;
 430  	kwq->kw_sword = PTHRW_RWS_INIT;
 431  }
 432  
 433  static bool
 434  _kwq_handle_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t type,
 435  		uint32_t lseq, uint32_t *retval)
 436  {
 437  	if (kwq->kw_prepost.count == 0 ||
 438  			!is_seqlower_eq(lseq, kwq->kw_prepost.lseq)) {
 439  		return false;
 440  	}
 441  
 442  	kwq->kw_prepost.count--;
 443  	if (kwq->kw_prepost.count > 0) {
 444  		return false;
 445  	}
 446  
 447  	int error, should_block = 0;
 448  	uint32_t updatebits = 0;
 449  	uint32_t pp_lseq = kwq->kw_prepost.lseq;
 450  	uint32_t pp_sseq = kwq->kw_prepost.sseq;
 451  	_kwq_clear_preposted_wakeup(kwq);
 452  
 453  	kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
 454  
 455  	error = kwq_handle_unlock(kwq, pp_lseq, pp_sseq, &updatebits,
 456  			(type | KW_UNLOCK_PREPOST), &should_block, lseq);
 457  	if (error) {
 458  		panic("_kwq_handle_preposted_wakeup: kwq_handle_unlock failed %d",
 459  				error);
 460  	}
 461  
 462  	if (should_block) {
 463  		return false;
 464  	}
 465  	*retval = updatebits;
 466  	return true;
 467  }
 468  
 469  static bool
 470  _kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t type, uint32_t lgenval, 
 471  		uint32_t rw_wc, uint32_t *retval)
 472  {
 473  	int res = 0;
 474  
 475  	// overlaps only occur on read lockers
 476  	if (type != PTH_RW_TYPE_READ) {
 477  		return false;
 478  	}
 479  
 480  	// check for overlap and no pending W bit (indicates writers)
 481  	if ((kwq->kw_kflags & KSYN_KWF_OVERLAP_GUARD) &&
 482  			!is_rws_savemask_set(rw_wc) && !is_rwl_wbit_set(lgenval)) {
 483  		/* overlap is set, so no need to check for valid state for overlap */
 484  
 485  		if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
 486  			/* increase the next expected seq by one */
 487  			kwq->kw_nextseqword += PTHRW_INC;
 488  			/* set count by one & bits from the nextseq and add M bit */
 489  			*retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
 490  			res = 1;
 491  		}
 492  	}
 493  	return res;
 494  }
 495  
 496  static inline bool
 497  _kwq_is_used(ksyn_wait_queue_t kwq)
 498  {
 499  	return (kwq->kw_inqueue != 0 || kwq->kw_prepost.count != 0 ||
 500  			kwq->kw_intr.count != 0);
 501  }
 502  
 503  /*
 504   * consumes a pending interrupted waiter, returns true if the current
 505   * thread should return back to userspace because it was previously
 506   * interrupted.
 507   */
 508  static inline bool
 509  _kwq_handle_interrupted_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
 510  		uint32_t lseq, uint32_t *retval)
 511  {
 512  	if (kwq->kw_intr.count != 0 && kwq->kw_intr.type == type &&
 513  			(!kwq->kw_intr.seq || is_seqlower_eq(lseq, kwq->kw_intr.seq))) {
 514  		kwq->kw_intr.count--;
 515  		*retval = kwq->kw_intr.returnbits;
 516  		if (kwq->kw_intr.returnbits == 0) {
 517  			_kwq_clear_interrupted_wakeup(kwq);
 518  		}
 519  		return true;
 520  	}
 521  	return false;
 522  }
 523  
 524  static void
 525  pthread_list_lock(void)
 526  {
 527  	lck_mtx_lock_spin(pthread_list_mlock);
 528  }
 529  
 530  static void
 531  pthread_list_unlock(void)
 532  {
 533  	lck_mtx_unlock(pthread_list_mlock);
 534  }
 535  
 536  static void
 537  ksyn_wqlock(ksyn_wait_queue_t kwq)
 538  {
 539  	lck_spin_lock(&kwq->kw_lock);
 540  }
 541  
 542  static void
 543  ksyn_wqunlock(ksyn_wait_queue_t kwq)
 544  {
 545  	lck_spin_unlock(&kwq->kw_lock);
 546  }
 547  
 548  /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
 549  static uint32_t
 550  _psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen,
 551  		int flags)
 552  {
 553  	kern_return_t ret;
 554  	uint32_t returnbits = 0;
 555  	uint32_t updatebits = 0;
 556  	int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) ==
 557  			_PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
 558  	uint32_t nextgen = (ugen + PTHRW_INC);
 559  	thread_t old_owner = THREAD_NULL;
 560  
 561  	ksyn_wqlock(kwq);
 562  	kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
 563  
 564  redrive:
 565  	updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
 566  			(PTH_RWL_EBIT | PTH_RWL_KBIT);
 567  
 568  	if (firstfit) {
 569  		if (kwq->kw_inqueue == 0) {
 570  			uint32_t count = kwq->kw_prepost.count + 1;
 571  			// Increment the number of preposters we have waiting
 572  			_kwq_mark_preposted_wakeup(kwq, count, mgen & PTHRW_COUNT_MASK, 0);
 573  			// We don't know the current owner as we've determined this mutex
 574  			// drop should have a preposted locker inbound into the kernel but
 575  			// we have no way of knowing who it is. When it arrives, the lock
 576  			// path will update the turnstile owner and return it to userspace.
 577  			old_owner = _kwq_clear_owner(kwq);
 578  			pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
 579  					&kwq->kw_turnstile);
 580  			PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
 581  					kwq->kw_prepost.lseq, count, 0);
 582  		} else {
 583  			// signal first waiter
 584  			ret = ksyn_mtxsignal(kwq, NULL, updatebits, &old_owner);
 585  			if (ret == KERN_NOT_WAITING) {
 586  				// <rdar://problem/39093536> ksyn_mtxsignal attempts to signal
 587  				// the thread but it sets up the turnstile inheritor first.
 588  				// That means we can't redrive the mutex in a loop without
 589  				// dropping the wq lock and cleaning up the turnstile state.
 590  				ksyn_wqunlock(kwq);
 591  				pthread_kern->psynch_wait_cleanup();
 592  				_kwq_cleanup_old_owner(&old_owner);
 593  				ksyn_wqlock(kwq);
 594  				goto redrive;
 595  			}
 596  		}
 597  	} else {	
 598  		bool prepost = false;
 599  		if (kwq->kw_inqueue == 0) {
 600  			// No waiters in the queue.
 601  			prepost = true;
 602  		} else {
 603  			uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum & PTHRW_COUNT_MASK);
 604  			if (low_writer == nextgen) {
 605  				/* next seq to be granted found */
 606  				/* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
 607  				ret = ksyn_mtxsignal(kwq, NULL,
 608  						updatebits | PTH_RWL_MTX_WAIT, &old_owner);
 609  				if (ret == KERN_NOT_WAITING) {
 610  					/* interrupt post */
 611  					_kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
 612  							nextgen, updatebits);
 613  				}
 614  			} else if (is_seqhigher(low_writer, nextgen)) {
 615  				prepost = true;
 616  			} else {
 617  				//__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
 618  				ksyn_waitq_element_t kwe;
 619  				kwe = ksyn_queue_find_seq(kwq,
 620  						&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], nextgen);
 621  				if (kwe != NULL) {
 622  					/* next seq to be granted found */
 623  					/* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
 624  					ret = ksyn_mtxsignal(kwq, kwe,
 625  							updatebits | PTH_RWL_MTX_WAIT, &old_owner);
 626  					if (ret == KERN_NOT_WAITING) {
 627  						goto redrive;
 628  					}
 629  				} else {
 630  					prepost = true;
 631  				}
 632  			}
 633  		}
 634  		if (prepost) {
 635  			if (kwq->kw_prepost.count != 0) {
 636  				__FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
 637  			} else {
 638  				_kwq_mark_preposted_wakeup(kwq, 1, nextgen & PTHRW_COUNT_MASK,
 639  						0);
 640  			}
 641  			old_owner = _kwq_clear_owner(kwq);
 642  			pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
 643  					&kwq->kw_turnstile);
 644  		}
 645  	}
 646  
 647  	ksyn_wqunlock(kwq);
 648  	pthread_kern->psynch_wait_cleanup();
 649  	_kwq_cleanup_old_owner(&old_owner);
 650  	ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
 651  	return returnbits;
 652  }
 653  
 654  static int
 655  _ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
 656  {
 657  	int res = (lgenval & PTHRW_RWL_INIT) != 0;
 658  	if (res) {
 659  		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
 660  			/* first to notice the reset of the lock, clear preposts */
 661  			CLEAR_REINIT_BITS(kwq);
 662  			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
 663  		}
 664  	}
 665  	return res;
 666  }
 667  
 668  /*
 669   * psynch_mutexwait: This system call is used for contended psynch mutexes to
 670   * block.
 671   */
 672  int
 673  _psynch_mutexwait(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
 674  		uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t *retval)
 675  {
 676  	ksyn_wait_queue_t kwq;
 677  	int error = 0;
 678  	int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
 679  			== _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
 680  	int ins_flags = SEQFIT;
 681  	uint32_t lseq = (mgen & PTHRW_COUNT_MASK);
 682  	uint32_t updatebits = 0;
 683  	thread_t tid_th = THREAD_NULL, old_owner = THREAD_NULL;
 684  
 685  	if (firstfit) {
 686  		/* first fit */
 687  		ins_flags = FIRSTFIT;
 688  	}
 689  
 690  	error = ksyn_wqfind(mutex, mgen, ugen, 0, flags,
 691  			(KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX), &kwq);
 692  	if (error != 0) {
 693  		return error;
 694  	}
 695  
 696  again:
 697  	ksyn_wqlock(kwq);
 698  
 699  	if (_kwq_handle_interrupted_wakeup(kwq, KWQ_INTR_WRITE, lseq, retval)) {
 700  		old_owner = _kwq_set_owner(kwq, current_thread(), 0);
 701  		pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
 702  				&kwq->kw_turnstile);
 703  		ksyn_wqunlock(kwq);
 704  		goto out;
 705  	}
 706  
 707  	if (kwq->kw_prepost.count && (firstfit || (lseq == kwq->kw_prepost.lseq))) {
 708  		/* got preposted lock */
 709  		kwq->kw_prepost.count--;
 710  
 711  		if (!firstfit) {
 712  			if (kwq->kw_prepost.count > 0) {
 713  				__FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
 714  				kwq->kw_prepost.lseq += PTHRW_INC; /* look for next one */
 715  				ksyn_wqunlock(kwq);
 716  				error = EINVAL;
 717  				goto out;
 718  			}
 719  			_kwq_clear_preposted_wakeup(kwq);
 720  		}
 721  
 722  		if (kwq->kw_inqueue == 0) {
 723  			updatebits = lseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
 724  		} else {
 725  			updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
 726  					(PTH_RWL_KBIT | PTH_RWL_EBIT);
 727  		}
 728  		updatebits &= ~PTH_RWL_MTX_WAIT;
 729  
 730  		if (updatebits == 0) {
 731  			__FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
 732  		}
 733  
 734  		PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
 735  				kwq->kw_prepost.lseq, kwq->kw_prepost.count, 1);
 736  
 737  		old_owner = _kwq_set_owner(kwq, current_thread(), 0);
 738  		pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
 739  				&kwq->kw_turnstile);
 740  
 741  		ksyn_wqunlock(kwq);
 742  		*retval = updatebits;
 743  		goto out;
 744  	}
 745  
 746  	// mutexwait passes in an owner hint at the time userspace contended for
 747  	// the mutex, however, the owner tid in the userspace data structure may be
 748  	// unset or SWITCHING (-1), or it may correspond to a stale snapshot after
 749  	// the lock has subsequently been unlocked by another thread.
 750  	if (tid == thread_tid(kwq->kw_owner)) {
 751  		// userspace and kernel agree
 752  	} else if (tid == 0) {
 753  		// contender came in before owner could write TID
 754  		// let's assume that what the kernel knows is accurate
 755  		// for all we know this waiter came in late in the kernel
 756  	} else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT &&
 757  			   is_seqlower(ugen, kwq->kw_lastunlockseq)) {
 758  		// owner is stale, someone has come in and unlocked since this
 759  		// contended read the TID, so assume what is known in the kernel is
 760  		// accurate
 761  	} else if (tid == PTHREAD_MTX_TID_SWITCHING) {
 762  		// userspace didn't know the owner because it was being unlocked, but
 763  		// that unlocker hasn't reached the kernel yet. So assume what is known
 764  		// in the kernel is accurate
 765  	} else {
 766  		// hint is being passed in for a specific thread, and we have no reason
 767  		// not to trust it (like the kernel unlock sequence being higher)
 768  		//
 769  		// So resolve the hint to a thread_t if we haven't done so yet
 770  		// and redrive as we dropped the lock
 771  		if (tid_th == THREAD_NULL) {
 772  			ksyn_wqunlock(kwq);
 773  			tid_th = pthread_kern->task_findtid(current_task(), tid);
 774  			if (tid_th == THREAD_NULL) tid = 0;
 775  			goto again;
 776  		}
 777  		tid_th = _kwq_set_owner(kwq, tid_th, KWQ_SET_OWNER_TRANSFER_REF);
 778  	}
 779  
 780  	if (tid_th) {
 781  		// We are on our way to block, and can't drop the spinlock anymore
 782  		pthread_kern->thread_deallocate_safe(tid_th);
 783  		tid_th = THREAD_NULL;
 784  	}
 785  	assert(old_owner == THREAD_NULL);
 786  	error = ksyn_wait(kwq, KSYN_QUEUE_WRITE, mgen, ins_flags, 0, 0,
 787  			psynch_mtxcontinue, kThreadWaitPThreadMutex);
 788  	// ksyn_wait drops wait queue lock
 789  out:
 790  	pthread_kern->psynch_wait_cleanup();
 791  	ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
 792  	if (tid_th) {
 793  		thread_deallocate(tid_th);
 794  	}
 795  	if (old_owner) {
 796  		thread_deallocate(old_owner);
 797  	}
 798  	return error;
 799  }
 800  
 801  void __dead2
 802  psynch_mtxcontinue(void *parameter, wait_result_t result)
 803  {
 804  	uthread_t uth = current_uthread();
 805  	ksyn_wait_queue_t kwq = parameter;
 806  	ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
 807  
 808  	ksyn_wqlock(kwq);
 809  
 810  	int error = _wait_result_to_errno(result);
 811  	if (error != 0) {
 812  		if (kwe->kwe_kwqqueue) {
 813  			ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
 814  		}
 815  	} else {
 816  		uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
 817  		pthread_kern->uthread_set_returnval(uth, updatebits);
 818  
 819  		if (updatebits == 0) {
 820  			__FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
 821  		}
 822  	}
 823  
 824  	pthread_kern->psynch_wait_complete(kwq, &kwq->kw_turnstile);
 825  
 826  	ksyn_wqunlock(kwq);
 827  	pthread_kern->psynch_wait_cleanup();
 828  	ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
 829  	pthread_kern->unix_syscall_return(error);
 830  	__builtin_unreachable();
 831  }
 832  
 833  static void __dead2
 834  _psynch_rw_continue(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
 835  		wait_result_t result)
 836  {
 837  	uthread_t uth = current_uthread();
 838  	ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
 839  
 840  	ksyn_wqlock(kwq);
 841  
 842  	int error = _wait_result_to_errno(result);
 843  	if (error != 0) {
 844  		if (kwe->kwe_kwqqueue) {
 845  			ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
 846  		}
 847  	} else {
 848  		pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
 849  	}
 850  
 851  	ksyn_wqunlock(kwq);
 852  	ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
 853  
 854  	pthread_kern->unix_syscall_return(error);
 855  	__builtin_unreachable();
 856  }
 857  
 858  void __dead2
 859  psynch_rw_rdcontinue(void *parameter, wait_result_t result)
 860  {
 861  	_psynch_rw_continue(parameter, KSYN_QUEUE_READ, result);
 862  }
 863  
 864  void __dead2
 865  psynch_rw_wrcontinue(void *parameter, wait_result_t result)
 866  {
 867  	_psynch_rw_continue(parameter, KSYN_QUEUE_WRITE, result);
 868  }
 869  
 870  /*
 871   * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
 872   */
 873  int
 874  _psynch_mutexdrop(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
 875  		uint32_t ugen, uint64_t tid __unused, uint32_t flags, uint32_t *retval)
 876  {
 877  	int res;
 878  	ksyn_wait_queue_t kwq;
 879  
 880  	res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
 881  	if (res == 0) {
 882  		uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
 883  		/* drops the kwq reference */
 884  		if (retval) {
 885  			*retval = updateval;
 886  		}
 887  	}
 888  
 889  	return res;
 890  }
 891  
 892  static kern_return_t
 893  ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe,
 894  		uint32_t updateval, thread_t *old_owner)
 895  {
 896  	kern_return_t ret;
 897  
 898  	if (!kwe) {
 899  		kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_kwelist);
 900  		if (!kwe) {
 901  			panic("ksyn_mtxsignal: panic signaling empty queue");
 902  		}
 903  	}
 904  
 905  	PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_START, kwq->kw_addr, kwe,
 906  			thread_tid(kwe->kwe_thread), kwq->kw_inqueue);
 907  
 908  	ret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, kwe, updateval);
 909  	if (ret == KERN_SUCCESS) {
 910  		*old_owner = _kwq_set_owner(kwq, kwe->kwe_thread, 0);
 911  	} else {
 912  		*old_owner = _kwq_clear_owner(kwq);
 913  	}
 914  	PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_END, kwq->kw_addr, kwe,
 915  			ret, 0);
 916  	return ret;
 917  }
 918  
 919  
 920  static void
 921  ksyn_prepost(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t state,
 922  	     uint32_t lockseq)
 923  {
 924  	bzero(kwe, sizeof(*kwe));
 925  	kwe->kwe_state = state;
 926  	kwe->kwe_lockseq = lockseq;
 927  	kwe->kwe_count = 1;
 928  
 929  	(void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITE, kwe, lockseq, SEQFIT);
 930  	kwq->kw_fakecount++;
 931  }
 932  
 933  static void
 934  ksyn_cvsignal(ksyn_wait_queue_t ckwq, thread_t th, uint32_t uptoseq,
 935  		uint32_t signalseq, uint32_t *updatebits, int *broadcast,
 936  		ksyn_waitq_element_t *nkwep)
 937  {
 938  	ksyn_waitq_element_t kwe = NULL;
 939  	ksyn_waitq_element_t nkwe = NULL;
 940  	ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
 941  
 942  	uptoseq &= PTHRW_COUNT_MASK;
 943  
 944  	// Find the specified thread to wake.
 945  	if (th != THREAD_NULL) {
 946  		uthread_t uth = pthread_kern->get_bsdthread_info(th);
 947  		kwe = pthread_kern->uthread_get_uukwe(uth);
 948  		if (kwe->kwe_kwqqueue != ckwq ||
 949  		    is_seqhigher(kwe->kwe_lockseq, uptoseq)) {
 950  			// Unless it's no longer waiting on this CV...
 951  			kwe = NULL;
 952  			// ...in which case we post a broadcast instead.
 953  			*broadcast = 1;
 954  			return;
 955  		}
 956  	}
 957  
 958  	// If no thread was specified, find any thread to wake (with the right
 959  	// sequence number).
 960  	while (th == THREAD_NULL) {
 961  		if (kwe == NULL) {
 962  			kwe = ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq);
 963  		}
 964  		if (kwe == NULL && nkwe == NULL) {
 965  			// No eligible entries; need to allocate a new
 966  			// entry to prepost. Loop to rescan after
 967  			// reacquiring the lock after allocation in
 968  			// case anything new shows up.
 969  			ksyn_wqunlock(ckwq);
 970  			nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
 971  			ksyn_wqlock(ckwq);
 972  		} else {
 973  			break;
 974  		}
 975  	}
 976  
 977  	if (kwe != NULL) {
 978  		// If we found a thread to wake...
 979  		if (kwe->kwe_state == KWE_THREAD_INWAIT) {
 980  			if (is_seqlower(kwe->kwe_lockseq, signalseq)) {
 981  				/*
 982  				 * A valid thread in our range, but lower than our signal.
 983  				 * Matching it may leave our match with nobody to wake it if/when
 984  				 * it arrives (the signal originally meant for this thread might
 985  				 * not successfully wake it).
 986  				 *
 987  				 * Convert to broadcast - may cause some spurious wakeups
 988  				 * (allowed by spec), but avoids starvation (better choice).
 989  				 */
 990  				*broadcast = 1;
 991  			} else {
 992  				(void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
 993  				*updatebits += PTHRW_INC;
 994  			}
 995  		} else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
 996  			// Merge with existing prepost at same uptoseq.
 997  			kwe->kwe_count += 1;
 998  		} else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
 999  			// Existing broadcasts subsume this signal.
1000  		} else {
1001  			panic("unknown kwe state\n");
1002  		}
1003  		if (nkwe) {
1004  			/*
1005  			 * If we allocated a new kwe above but then found a different kwe to
1006  			 * use then we need to deallocate the spare one.
1007  			 */
1008  			zfree(kwe_zone, nkwe);
1009  			nkwe = NULL;
1010  		}
1011  	} else if (nkwe != NULL) {
1012  		// ... otherwise, insert the newly allocated prepost.
1013  		ksyn_prepost(ckwq, nkwe, KWE_THREAD_PREPOST, uptoseq);
1014  		nkwe = NULL;
1015  	} else {
1016  		panic("failed to allocate kwe\n");
1017  	}
1018  
1019  	*nkwep = nkwe;
1020  }
1021  
1022  static int
1023  __psynch_cvsignal(user_addr_t cv, uint32_t cgen, uint32_t cugen,
1024  		uint32_t csgen, uint32_t flags, int broadcast,
1025  		mach_port_name_t threadport, uint32_t *retval)
1026  {
1027  	int error = 0;
1028  	thread_t th = THREAD_NULL;
1029  	ksyn_wait_queue_t kwq;
1030  	
1031  	uint32_t uptoseq = cgen & PTHRW_COUNT_MASK;
1032  	uint32_t fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
1033  	
1034  	// validate sane L, U, and S values
1035  	if ((threadport == 0 && is_seqhigher(fromseq, uptoseq)) || is_seqhigher(csgen, uptoseq)) {
1036  		__FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n");
1037  		return EINVAL;
1038  	}
1039  	
1040  	if (threadport != 0) {
1041  		th = port_name_to_thread((mach_port_name_t)threadport);
1042  		if (th == THREAD_NULL) {
1043  			return ESRCH;
1044  		}
1045  	}
1046  	
1047  	error = ksyn_wqfind(cv, cgen, cugen, csgen, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &kwq);
1048  	if (error == 0) {
1049  		uint32_t updatebits = 0;
1050  		ksyn_waitq_element_t nkwe = NULL;
1051  		
1052  		ksyn_wqlock(kwq);
1053  		
1054  		// update L, U and S...
1055  		UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
1056  
1057  		PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_START, kwq->kw_addr,
1058  				fromseq, uptoseq, broadcast);
1059  
1060  		if (!broadcast) {
1061  			// No need to signal if the CV is already balanced.
1062  			if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
1063  				ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits,
1064  						&broadcast, &nkwe);
1065  				PTHREAD_TRACE(psynch_cvar_signal, kwq->kw_addr, broadcast, 0,0);
1066  			}
1067  		}
1068  		
1069  		if (broadcast) {
1070  			ksyn_handle_cvbroad(kwq, uptoseq, &updatebits);
1071  		}
1072  		
1073  		kwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
1074  		// set C or P bits and free if needed
1075  		ksyn_cvupdate_fixup(kwq, &updatebits);
1076  		*retval = updatebits;
1077  
1078  		PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_END, kwq->kw_addr,
1079  				updatebits, 0, 0);
1080  		
1081  		ksyn_wqunlock(kwq);
1082  
1083  		pthread_kern->psynch_wait_cleanup();
1084  		
1085  		if (nkwe != NULL) {
1086  			zfree(kwe_zone, nkwe);
1087  		}
1088  		
1089  		ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
1090  	}
1091  	
1092  	if (th != NULL) {
1093  		thread_deallocate(th);
1094  	}
1095  	
1096  	return error;
1097  }
1098  
1099  /*
1100   * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
1101   */
1102  int
1103  _psynch_cvbroad(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
1104  		uint64_t cvudgen, uint32_t flags, __unused user_addr_t mutex,
1105  		__unused uint64_t mugen, __unused uint64_t tid, uint32_t *retval)
1106  {
1107  	uint32_t diffgen = cvudgen & 0xffffffff;
1108  	uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
1109  	if (count > pthread_kern->get_task_threadmax()) {
1110  		__FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n");
1111  		return EBUSY;
1112  	}
1113  	
1114  	uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1115  	uint32_t cgen = cvlsgen & 0xffffffff;
1116  	uint32_t cugen = (cvudgen >> 32) & 0xffffffff;
1117  	
1118  	return __psynch_cvsignal(cv, cgen, cugen, csgen, flags, 1, 0, retval);
1119  }
1120  
1121  /*
1122   * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
1123   */
1124  int
1125  _psynch_cvsignal(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
1126  		 uint32_t cvugen, int threadport, __unused user_addr_t mutex,
1127  		 __unused uint64_t mugen, __unused uint64_t tid, uint32_t flags,
1128  		 uint32_t *retval)
1129  {
1130  	uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1131  	uint32_t cgen = cvlsgen & 0xffffffff;
1132  	
1133  	return __psynch_cvsignal(cv, cgen, cvugen, csgen, flags, 0, threadport, retval);
1134  }
1135  
1136  /*
1137   * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
1138   */
1139  int
1140  _psynch_cvwait(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
1141  		uint32_t cvugen, user_addr_t mutex, uint64_t mugen, uint32_t flags,
1142  		int64_t sec, uint32_t nsec, uint32_t *retval)
1143  {
1144  	int error = 0;
1145  	uint32_t updatebits = 0;
1146  	ksyn_wait_queue_t ckwq = NULL;
1147  	ksyn_waitq_element_t kwe, nkwe = NULL;
1148  	
1149  	/* for conformance reasons */
1150  	pthread_kern->__pthread_testcancel(0);
1151  	
1152  	uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1153  	uint32_t cgen = cvlsgen & 0xffffffff;
1154  	uint32_t ugen = (mugen >> 32) & 0xffffffff;
1155  	uint32_t mgen = mugen & 0xffffffff;
1156  	
1157  	uint32_t lockseq = (cgen & PTHRW_COUNT_MASK);
1158  	
1159  	/*
1160  	 * In cvwait U word can be out of range as cv could be used only for
1161  	 * timeouts. However S word needs to be within bounds and validated at
1162  	 * user level as well.
1163  	 */
1164  	if (is_seqhigher_eq(csgen, lockseq) != 0) {
1165  		__FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
1166  		return EINVAL;
1167  	}
1168  
1169  	PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_START, cv, mutex, cgen, 0);
1170  	
1171  	error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
1172  	if (error != 0) {
1173  		return error;
1174  	}
1175  	
1176  	if (mutex != 0) {
1177  		uint32_t mutexrv = 0;
1178  		error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, &mutexrv);
1179  		if (error != 0) {
1180  			goto out;
1181  		}
1182  	}
1183  	
1184  	ksyn_wqlock(ckwq);
1185  	
1186  	// update L, U and S...
1187  	UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
1188  	
1189  	/* Look for the sequence for prepost (or conflicting thread */
1190  	ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
1191  	kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
1192  	if (kwe != NULL) {
1193  		if (kwe->kwe_state == KWE_THREAD_PREPOST) {
1194  			if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) {
1195  				/* we can safely consume a reference, so do so */
1196  				if (--kwe->kwe_count == 0) {
1197  					ksyn_queue_remove_item(ckwq, kq, kwe);
1198  					ckwq->kw_fakecount--;
1199  					nkwe = kwe;
1200  				}
1201  			} else {
1202  				/*
1203  				 * consuming a prepost higher than our lock sequence is valid, but
1204  				 * can leave the higher thread without a match. Convert the entry
1205  				 * to a broadcast to compensate for this.
1206  				 */
1207  				ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits);
1208  #if __TESTPANICS__
1209  				if (updatebits != 0)
1210  					panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", updatebits);
1211  #endif /* __TESTPANICS__ */
1212  			}
1213  		} else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
1214  			// XXX
1215  			// Nothing to do.
1216  		} else if (kwe->kwe_state == KWE_THREAD_INWAIT) {
1217  			__FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n");
1218  			error = EBUSY;
1219  		} else {
1220  			panic("psync_cvwait: unexpected wait queue element type\n");
1221  		}
1222  		
1223  		if (error == 0) {
1224  			updatebits |= PTHRW_INC;
1225  			ckwq->kw_sword += PTHRW_INC;
1226  			
1227  			/* set C or P bits and free if needed */
1228  			ksyn_cvupdate_fixup(ckwq, &updatebits);
1229  			*retval = updatebits;
1230  		}
1231  	} else {
1232  		uint64_t abstime = 0;
1233  		uint16_t kwe_flags = 0;
1234  
1235  		if (sec != 0 || (nsec & 0x3fffffff) != 0) {
1236  			struct timespec ts;
1237  			ts.tv_sec = (__darwin_time_t)sec;
1238  			ts.tv_nsec = (nsec & 0x3fffffff);
1239  			nanoseconds_to_absolutetime(
1240  					(uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
1241  			clock_absolutetime_interval_to_deadline(abstime, &abstime);
1242  		}
1243  
1244  		PTHREAD_TRACE(psynch_cvar_kwait, cv, mutex, kwe_flags, 1);
1245  		
1246  		error = ksyn_wait(ckwq, KSYN_QUEUE_WRITE, cgen, SEQFIT, abstime,
1247  				kwe_flags, psynch_cvcontinue, kThreadWaitPThreadCondVar);
1248  		// ksyn_wait drops wait queue lock
1249  	}
1250  	
1251  	ksyn_wqunlock(ckwq);
1252  
1253  	if (nkwe != NULL) {
1254  		zfree(kwe_zone, nkwe);
1255  	}
1256  out:
1257  
1258  	PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, cv, error, updatebits, 2);
1259  
1260  	ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1261  	return error;
1262  }
1263  
1264  
1265  void __dead2
1266  psynch_cvcontinue(void *parameter, wait_result_t result)
1267  {
1268  	uthread_t uth = current_uthread();
1269  	ksyn_wait_queue_t ckwq = parameter;
1270  	ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1271  
1272  	int error = _wait_result_to_errno(result);
1273  	if (error != 0) {
1274  		ksyn_wqlock(ckwq);
1275  		/* just in case it got woken up as we were granting */
1276  		int retval = kwe->kwe_psynchretval;
1277  		pthread_kern->uthread_set_returnval(uth, retval);
1278  
1279  		if (kwe->kwe_kwqqueue) {
1280  			ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
1281  		}
1282  		if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
1283  			/* the condition var granted.
1284  			 * reset the error so that the thread returns back.
1285  			 */
1286  			error = 0;
1287  			/* no need to set any bits just return as cvsig/broad covers this */
1288  		} else {
1289  			ckwq->kw_sword += PTHRW_INC;
1290  			
1291  			/* set C and P bits, in the local error */
1292  			if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
1293  				PTHREAD_TRACE(psynch_cvar_zeroed, ckwq->kw_addr,
1294  						ckwq->kw_lword, ckwq->kw_sword, ckwq->kw_inqueue);
1295  				error |= ECVCLEARED;
1296  				if (ckwq->kw_inqueue != 0) {
1297  					ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 1);
1298  				}
1299  				ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
1300  				ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
1301  			} else {
1302  				/* everythig in the queue is a fake entry ? */
1303  				if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
1304  					error |= ECVPREPOST;
1305  				}
1306  			}
1307  		}
1308  		ksyn_wqunlock(ckwq);
1309  
1310  		PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
1311  				error, 0, 3);
1312  	} else {
1313  		int val = 0;
1314  		// PTH_RWL_MTX_WAIT is removed
1315  		if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
1316  			val = PTHRW_INC | PTH_RWS_CV_CBIT;
1317  		}
1318  		PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
1319  				val, 0, 4);
1320  		pthread_kern->uthread_set_returnval(uth, val);
1321  	}
1322  	
1323  	ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1324  	pthread_kern->unix_syscall_return(error);
1325  	__builtin_unreachable();
1326  }
1327  
1328  /*
1329   * psynch_cvclrprepost: This system call clears pending prepost if present.
1330   */
1331  int
1332  _psynch_cvclrprepost(__unused proc_t p, user_addr_t cv, uint32_t cvgen,
1333  		uint32_t cvugen, uint32_t cvsgen, __unused uint32_t prepocnt,
1334  		uint32_t preposeq, uint32_t flags, int *retval)
1335  {
1336  	int error = 0;
1337  	int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
1338  	int wqtype = (mutex ? KSYN_WQTYPE_MTX : KSYN_WQTYPE_CVAR) | KSYN_WQTYPE_INDROP;
1339  	ksyn_wait_queue_t kwq = NULL;
1340  	
1341  	*retval = 0;
1342  	
1343  	error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype,
1344  			&kwq);
1345  	if (error != 0) {
1346  		return error;
1347  	}
1348  	
1349  	ksyn_wqlock(kwq);
1350  	
1351  	if (mutex) {
1352  		int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
1353  				== _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
1354  		if (firstfit && kwq->kw_prepost.count) {
1355  			if (is_seqlower_eq(kwq->kw_prepost.lseq, cvgen)) {
1356  				PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
1357  						kwq->kw_prepost.lseq, 0, 2);
1358  				_kwq_clear_preposted_wakeup(kwq);
1359  			}
1360  		}
1361  	} else {
1362  		PTHREAD_TRACE(psynch_cvar_clrprepost, kwq->kw_addr, wqtype,
1363  				preposeq, 0);
1364  		ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITE, preposeq, 0);
1365  	}
1366  	
1367  	ksyn_wqunlock(kwq);
1368  	ksyn_wqrelease(kwq, 1, wqtype);
1369  	return error;
1370  }
1371  
1372  /* ***************** pthread_rwlock ************************ */
1373  
1374  static int
1375  __psynch_rw_lock(int type, user_addr_t rwlock, uint32_t lgenval,
1376  		 uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
1377  {
1378  	uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
1379  	ksyn_wait_queue_t kwq;
1380  	int error, prepost_type, kqi;
1381  	thread_continue_t tc;
1382  
1383  	if (type == PTH_RW_TYPE_READ) {
1384  		prepost_type = KW_UNLOCK_PREPOST_READLOCK;
1385  		kqi = KSYN_QUEUE_READ;
1386  		tc = psynch_rw_rdcontinue;
1387  	} else {
1388  		prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
1389  		kqi = KSYN_QUEUE_WRITE;
1390  		tc = psynch_rw_wrcontinue;
1391  	}
1392  
1393  	error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
1394  			(KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq);
1395  	if (error != 0) {
1396  		return error;
1397  	}
1398  
1399  	ksyn_wqlock(kwq);
1400  	_ksyn_check_init(kwq, lgenval);
1401  	if (_kwq_handle_interrupted_wakeup(kwq, type, lockseq, retval) ||
1402  			// handle overlap first as they are not counted against pre_rwwc
1403  			// handle_overlap uses the flags in lgenval (vs. lockseq)
1404  			_kwq_handle_overlap(kwq, type, lgenval, rw_wc, retval) ||
1405  			_kwq_handle_preposted_wakeup(kwq, prepost_type, lockseq, retval)) {
1406  		ksyn_wqunlock(kwq);
1407  		goto out;
1408  	}
1409  
1410  	block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
1411  		kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
1412  	error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, 0, tc, block_hint);
1413  	// ksyn_wait drops wait queue lock
1414  out:
1415  	ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
1416  	return error;
1417  }
1418  
1419  /*
1420   * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
1421   */
1422  int
1423  _psynch_rw_rdlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
1424  		uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
1425  {
1426  	return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc,
1427  			flags, retval);
1428  }
1429  
1430  /*
1431   * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
1432   */
1433  int
1434  _psynch_rw_longrdlock(__unused proc_t p, __unused user_addr_t rwlock,
1435  		__unused uint32_t lgenval, __unused uint32_t ugenval,
1436  		__unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
1437  {
1438  	return ESRCH;
1439  }
1440  
1441  
1442  /*
1443   * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
1444   */
1445  int
1446  _psynch_rw_wrlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
1447  		uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
1448  {
1449  	return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval,
1450  			rw_wc, flags, retval);
1451  }
1452  
1453  /*
1454   * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
1455   */
1456  int
1457  _psynch_rw_yieldwrlock(__unused proc_t p, __unused user_addr_t rwlock,
1458  		__unused uint32_t lgenval, __unused uint32_t ugenval,
1459  		__unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
1460  {
1461  	return ESRCH;
1462  }
1463  
1464  /*
1465   * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate
1466   *			reader/writer variety lock.
1467   */
1468  int
1469  _psynch_rw_unlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
1470  		uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
1471  {
1472  	int error = 0;
1473  	ksyn_wait_queue_t kwq;
1474  	uint32_t updatebits = 0;
1475  	int diff;
1476  	uint32_t count = 0;
1477  	uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
1478  	int clearedkflags = 0;
1479  
1480  	error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
1481  			(KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
1482  	if (error != 0) {
1483  		return(error);
1484  	}
1485  	
1486  	ksyn_wqlock(kwq);
1487  	int isinit = _ksyn_check_init(kwq, lgenval);
1488  
1489  	/* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
1490  	if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) &&
1491  			(is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
1492  		error = 0;
1493  		goto out;
1494  	}
1495  	
1496  	/* If L-U != num of waiters, then it needs to be preposted or spr */
1497  	diff = find_diff(lgenval, ugenval);
1498  	
1499  	if (find_seq_till(kwq, curgen, diff, &count) == 0) {
1500  		if ((count == 0) || (count < (uint32_t)diff))
1501  			goto prepost;
1502  	}
1503  	
1504  	/* no prepost and all threads are in place, reset the bit */
1505  	if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
1506  		kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
1507  		clearedkflags = 1;
1508  	}
1509  	
1510  	/* can handle unlock now */
1511  	
1512  	_kwq_clear_preposted_wakeup(kwq);
1513  	
1514  	error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
1515  #if __TESTPANICS__
1516  	if (error != 0)
1517  		panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error);
1518  #endif /* __TESTPANICS__ */
1519  out:
1520  	if (error == 0) {
1521  		/* update bits?? */
1522  		*retval = updatebits;
1523  	}
1524  
1525  	// <rdar://problem/22244050> If any of the wakeups failed because they
1526  	// already returned to userspace because of a signal then we need to ensure
1527  	// that the reset state is not cleared when that thread returns. Otherwise,
1528  	// _pthread_rwlock_lock will clear the interrupted state before it is read.
1529  	if (clearedkflags != 0 && kwq->kw_intr.count > 0) {
1530  		kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
1531  	}
1532  	
1533  	ksyn_wqunlock(kwq);
1534  	pthread_kern->psynch_wait_cleanup();
1535  	ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
1536  	
1537  	return(error);
1538  	
1539  prepost:
1540  	/* update if the new seq is higher than prev prepost, or first set */
1541  	if (is_rws_sbit_set(kwq->kw_prepost.sseq) ||
1542  			is_seqhigher_eq(rw_wc, kwq->kw_prepost.sseq)) {
1543  		_kwq_mark_preposted_wakeup(kwq, diff - count, curgen, rw_wc);
1544  		updatebits = lgenval;	/* let this not do unlock handling */
1545  	}
1546  	error = 0;
1547  	goto out;
1548  }
1549  
1550  
1551  /* ************************************************************************** */
1552  void
1553  pth_global_hashinit(void)
1554  {
1555  	pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash);
1556  }
1557  
1558  void
1559  _pth_proc_hashinit(proc_t p)
1560  {
1561  	void *ptr = hashinit(PTH_HASHSIZE, M_PCB, &pthhash);
1562  	if (ptr == NULL) {
1563  		panic("pth_proc_hashinit: hash init returned 0\n");
1564  	}
1565  	
1566  	pthread_kern->proc_set_pthhash(p, ptr);
1567  }
1568  
1569  
1570  static int
1571  ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags,
1572  		ksyn_wait_queue_t *out_kwq, struct pthhashhead **out_hashptr,
1573  		uint64_t object, uint64_t offset)
1574  {
1575  	int res = 0;
1576  	ksyn_wait_queue_t kwq;
1577  	struct pthhashhead *hashptr;
1578  	if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1579  		hashptr = pth_glob_hashtbl;
1580  		LIST_FOREACH(kwq, &hashptr[object & pthhash], kw_hash) {
1581  			if (kwq->kw_object == object && kwq->kw_offset == offset) {
1582  				break;
1583  			}
1584  		}
1585  	} else {
1586  		hashptr = pthread_kern->proc_get_pthhash(p);
1587  		LIST_FOREACH(kwq, &hashptr[uaddr & pthhash], kw_hash) {
1588  			if (kwq->kw_addr == uaddr) {
1589  				break;
1590  			}
1591  		}
1592  	}
1593  	*out_kwq = kwq;
1594  	*out_hashptr = hashptr;
1595  	return res;
1596  }
1597  
1598  void
1599  _pth_proc_hashdelete(proc_t p)
1600  {
1601  	struct pthhashhead * hashptr;
1602  	ksyn_wait_queue_t kwq;
1603  	unsigned long hashsize = pthhash + 1;
1604  	unsigned long i;
1605  	
1606  	hashptr = pthread_kern->proc_get_pthhash(p);
1607  	pthread_kern->proc_set_pthhash(p, NULL);
1608  	if (hashptr == NULL) {
1609  		return;
1610  	}
1611  	
1612  	pthread_list_lock();
1613  	for(i= 0; i < hashsize; i++) {
1614  		while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) {
1615  			if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) {
1616  				kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1617  				LIST_REMOVE(kwq, kw_hash);
1618  			}
1619  			if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1620  				kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1621  				LIST_REMOVE(kwq, kw_list);
1622  			}
1623  			pthread_list_unlock();
1624  			/* release fake entries if present for cvars */
1625  			if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
1626  				ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE]);
1627  			_kwq_destroy(kwq);
1628  			pthread_list_lock();
1629  		}
1630  	}
1631  	pthread_list_unlock();
1632  	FREE(hashptr, M_PROC);
1633  }
1634  
1635  /* no lock held for this as the waitqueue is getting freed */
1636  void
1637  ksyn_freeallkwe(ksyn_queue_t kq)
1638  {
1639  	ksyn_waitq_element_t kwe;
1640  	while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
1641  		TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
1642  		if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1643  			zfree(kwe_zone, kwe);
1644  		}
1645  	}
1646  }
1647  
1648  static inline void
1649  _kwq_report_inuse(ksyn_wait_queue_t kwq)
1650  {
1651  	if (kwq->kw_prepost.count != 0) {
1652  		__FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [pre %d:0x%x:0x%x]",
1653  				(uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_prepost.count,
1654  				kwq->kw_prepost.lseq, kwq->kw_prepost.sseq);
1655  		PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
1656  				kwq->kw_type, 1, 0);
1657  	}
1658  	if (kwq->kw_intr.count != 0) {
1659  		__FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [intr %d:0x%x:0x%x:0x%x]",
1660  				(uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_intr.count,
1661  				kwq->kw_intr.type, kwq->kw_intr.seq,
1662  				kwq->kw_intr.returnbits);
1663  		PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
1664  				kwq->kw_type, 2, 0);
1665  	}
1666  	if (kwq->kw_iocount) {
1667  		__FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [ioc %d:%d]",
1668  				(uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_iocount,
1669  				kwq->kw_dropcount);
1670  		PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
1671  				kwq->kw_type, 3, 0);
1672  	}
1673  	if (kwq->kw_inqueue) {
1674  		__FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [inq %d:%d]",
1675  				(uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_inqueue,
1676  				kwq->kw_fakecount);
1677  		PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, kwq->kw_type,
1678  				4, 0);
1679  	}
1680  }
1681  
1682  /* find kernel waitqueue, if not present create one. Grants a reference  */
1683  int
1684  ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen,
1685  		int flags, int wqtype, ksyn_wait_queue_t *kwqp)
1686  {
1687  	int res = 0;
1688  	ksyn_wait_queue_t kwq = NULL;
1689  	ksyn_wait_queue_t nkwq = NULL;
1690  	struct pthhashhead *hashptr;
1691  	proc_t p = current_proc();
1692  	
1693  	uint64_t object = 0, offset = 0;
1694  	if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1695  		res = ksyn_findobj(uaddr, &object, &offset);
1696  		hashptr = pth_glob_hashtbl;
1697  	} else {
1698  		hashptr = pthread_kern->proc_get_pthhash(p);
1699  	}
1700  
1701  	while (res == 0) {
1702  		pthread_list_lock();
1703  		res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr,
1704  				object, offset);
1705  		if (res != 0) {
1706  			pthread_list_unlock();
1707  			break;
1708  		}
1709  		if (kwq == NULL && nkwq == NULL) {
1710  			// Drop the lock to allocate a new kwq and retry.
1711  			pthread_list_unlock();
1712  
1713  			nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone);
1714  			bzero(nkwq, sizeof(struct ksyn_wait_queue));
1715  			int i;
1716  			for (i = 0; i < KSYN_QUEUE_MAX; i++) {
1717  				ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
1718  			}
1719  			lck_spin_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
1720  			continue;
1721  		} else if (kwq == NULL && nkwq != NULL) {
1722  			// Still not found, add the new kwq to the hash.
1723  			kwq = nkwq;
1724  			nkwq = NULL; // Don't free.
1725  			if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1726  				kwq->kw_pflags |= KSYN_WQ_SHARED;
1727  				LIST_INSERT_HEAD(&hashptr[object & pthhash], kwq, kw_hash);
1728  			} else {
1729  				LIST_INSERT_HEAD(&hashptr[uaddr & pthhash], kwq, kw_hash);
1730  			}
1731  			kwq->kw_pflags |= KSYN_WQ_INHASH;
1732  		} else if (kwq != NULL) {
1733  			// Found an existing kwq, use it.
1734  			if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1735  				LIST_REMOVE(kwq, kw_list);
1736  				kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1737  			}
1738  			if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
1739  				if (!_kwq_is_used(kwq)) {
1740  					if (kwq->kw_iocount == 0) {
1741  						kwq->kw_type = 0; // mark for reinitialization
1742  					} else if (kwq->kw_iocount == 1 &&
1743  							kwq->kw_dropcount == kwq->kw_iocount) {
1744  						/* if all users are unlockers then wait for it to finish */
1745  						kwq->kw_pflags |= KSYN_WQ_WAITING;
1746  						// Drop the lock and wait for the kwq to be free.
1747  						(void)msleep(&kwq->kw_pflags, pthread_list_mlock,
1748  								PDROP, "ksyn_wqfind", 0);
1749  						continue;
1750  					} else {
1751  						_kwq_report_inuse(kwq);
1752  						res = EINVAL;
1753  					}
1754  				} else {
1755  					_kwq_report_inuse(kwq);
1756  					res = EINVAL;
1757  				}
1758  			}
1759  		}
1760  		if (res == 0) {
1761  			if (kwq->kw_type == 0) {
1762  				kwq->kw_addr = uaddr;
1763  				kwq->kw_object = object;
1764  				kwq->kw_offset = offset;
1765  				kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
1766  				CLEAR_REINIT_BITS(kwq);
1767  				kwq->kw_lword = mgen;
1768  				kwq->kw_uword = ugen;
1769  				kwq->kw_sword = sgen;
1770  				kwq->kw_owner = THREAD_NULL;
1771  				kwq->kw_kflags = 0;
1772  				kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
1773  				PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_START, uaddr,
1774  						kwq->kw_type, kwq, 0);
1775  				PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_END, uaddr,
1776  						mgen, ugen, sgen);
1777  			}
1778  			kwq->kw_iocount++;
1779  			if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1780  				kwq->kw_dropcount++;
1781  			}
1782  		}
1783  		pthread_list_unlock();
1784  		break;
1785  	}
1786  	if (kwqp != NULL) {
1787  		*kwqp = kwq;
1788  	}
1789  	if (nkwq) {
1790  		_kwq_destroy(nkwq);
1791  	}
1792  	return res;
1793  }
1794  
1795  /* Reference from find is dropped here. Starts the free process if needed */
1796  void
1797  ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
1798  {
1799  	uint64_t deadline;
1800  	ksyn_wait_queue_t free_elem = NULL;
1801  	
1802  	pthread_list_lock();
1803  	if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1804  		kwq->kw_dropcount--;
1805  	}
1806  	if (--kwq->kw_iocount == 0) {
1807  		if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
1808  			/* some one is waiting for the waitqueue, wake them up */
1809  			kwq->kw_pflags &= ~KSYN_WQ_WAITING;
1810  			wakeup(&kwq->kw_pflags);
1811  		}
1812  		
1813  		if (!_kwq_is_used(kwq)) {
1814  			if (kwq->kw_turnstile) {
1815  				panic("kw_turnstile still non-null upon release");
1816  			}
1817  
1818  			PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_START,
1819  					kwq->kw_addr, kwq->kw_type, qfreenow, 0);
1820  			PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_END,
1821  					kwq->kw_addr, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword);
1822  
1823  			if (qfreenow == 0) {
1824  				microuptime(&kwq->kw_ts);
1825  				LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
1826  				kwq->kw_pflags |= KSYN_WQ_FLIST;
1827  				if (psynch_cleanupset == 0) {
1828  					struct timeval t;
1829  					microuptime(&t);
1830  					t.tv_sec += KSYN_CLEANUP_DEADLINE;
1831  					deadline = tvtoabstime(&t);
1832  					thread_call_enter_delayed(psynch_thcall, deadline);
1833  					psynch_cleanupset = 1;
1834  				}
1835  			} else {
1836  				kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1837  				LIST_REMOVE(kwq, kw_hash);
1838  				free_elem = kwq;
1839  			}
1840  		}
1841  	}
1842  	pthread_list_unlock();
1843  	if (free_elem != NULL) {
1844  		_kwq_destroy(free_elem);
1845  	}
1846  }
1847  
1848  /* responsible to free the waitqueues */
1849  void
1850  psynch_wq_cleanup(__unused void *param, __unused void * param1)
1851  {
1852  	ksyn_wait_queue_t kwq, tmp;
1853  	struct timeval t;
1854  	int reschedule = 0;
1855  	uint64_t deadline = 0;
1856  	LIST_HEAD(, ksyn_wait_queue) freelist;
1857  	LIST_INIT(&freelist);
1858  
1859  	pthread_list_lock();
1860  	
1861  	microuptime(&t);
1862  	
1863  	LIST_FOREACH(kwq, &pth_free_list, kw_list) {
1864  		if (_kwq_is_used(kwq) || kwq->kw_iocount != 0) {
1865  			// still in use
1866  			continue;
1867  		}
1868  		__darwin_time_t diff = t.tv_sec - kwq->kw_ts.tv_sec;
1869  		if (diff < 0)
1870  			diff *= -1;
1871  		if (diff >= KSYN_CLEANUP_DEADLINE) {
1872  			kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
1873  			LIST_REMOVE(kwq, kw_hash);
1874  			LIST_REMOVE(kwq, kw_list);
1875  			LIST_INSERT_HEAD(&freelist, kwq, kw_list);
1876  		} else {
1877  			reschedule = 1;
1878  		}
1879  		
1880  	}
1881  	if (reschedule != 0) {
1882  		t.tv_sec += KSYN_CLEANUP_DEADLINE;
1883  		deadline = tvtoabstime(&t);
1884  		thread_call_enter_delayed(psynch_thcall, deadline);
1885  		psynch_cleanupset = 1;
1886  	} else {
1887  		psynch_cleanupset = 0;
1888  	}
1889  	pthread_list_unlock();
1890  
1891  	LIST_FOREACH_SAFE(kwq, &freelist, kw_list, tmp) {
1892  		_kwq_destroy(kwq);
1893  	}
1894  }
1895  
1896  static int
1897  _wait_result_to_errno(wait_result_t result)
1898  {
1899  	int res = 0;
1900  	switch (result) {
1901  		case THREAD_TIMED_OUT:
1902  			res = ETIMEDOUT;
1903  			break;
1904  		case THREAD_INTERRUPTED:
1905  			res = EINTR;
1906  			break;
1907  	}
1908  	return res;
1909  }
1910  
1911  int
1912  ksyn_wait(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, uint32_t lockseq,
1913  		int fit, uint64_t abstime, uint16_t kwe_flags,
1914  		thread_continue_t continuation, block_hint_t block_hint)
1915  {
1916  	thread_t th = current_thread();
1917  	uthread_t uth = pthread_kern->get_bsdthread_info(th);
1918  	struct turnstile **tstore = NULL;
1919  	int res;
1920  
1921  	assert(continuation != THREAD_CONTINUE_NULL);
1922  
1923  	ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1924  	bzero(kwe, sizeof(*kwe));
1925  	kwe->kwe_count = 1;
1926  	kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
1927  	kwe->kwe_state = KWE_THREAD_INWAIT;
1928  	kwe->kwe_uth = uth;
1929  	kwe->kwe_thread = th;
1930  	kwe->kwe_flags = kwe_flags;
1931  
1932  	res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
1933  	if (res != 0) {
1934  		//panic("psynch_rw_wrlock: failed to enqueue\n"); // XXX
1935  		ksyn_wqunlock(kwq);
1936  		return res;
1937  	}
1938  
1939  	PTHREAD_TRACE(psynch_mutex_kwqwait, kwq->kw_addr, kwq->kw_inqueue,
1940  			kwq->kw_prepost.count, kwq->kw_intr.count);
1941  
1942  	if (_kwq_use_turnstile(kwq)) {
1943  		// pthread mutexes and rwlocks both (at least sometimes) know their
1944  		// owner and can use turnstiles. Otherwise, we pass NULL as the
1945  		// tstore to the shims so they wait on the global waitq.
1946  		tstore = &kwq->kw_turnstile;
1947  	}
1948  
1949  	pthread_kern->psynch_wait_prepare((uintptr_t)kwq, tstore, kwq->kw_owner,
1950  			block_hint, abstime);
1951  
1952  	ksyn_wqunlock(kwq);
1953  
1954  	if (tstore) {
1955  		pthread_kern->psynch_wait_update_complete(kwq->kw_turnstile);
1956  	}
1957  	
1958  	thread_block_parameter(continuation, kwq);
1959  
1960  	// NOT REACHED
1961  	panic("ksyn_wait continuation returned");
1962  	__builtin_unreachable();
1963  }
1964  
1965  kern_return_t
1966  ksyn_signal(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
1967  		ksyn_waitq_element_t kwe, uint32_t updateval)
1968  {
1969  	kern_return_t ret;
1970  	struct turnstile **tstore = NULL;
1971  
1972  	// If no wait element was specified, wake the first.
1973  	if (!kwe) {
1974  		kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[kqi].ksynq_kwelist);
1975  		if (!kwe) {
1976  			panic("ksyn_signal: panic signaling empty queue");
1977  		}
1978  	}
1979  
1980  	if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1981  		panic("ksyn_signal: panic signaling non-waiting element");
1982  	}
1983  
1984  	ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1985  	kwe->kwe_psynchretval = updateval;
1986  
1987  	if (_kwq_use_turnstile(kwq)) {
1988  		tstore = &kwq->kw_turnstile;
1989  	}
1990  
1991  	ret = pthread_kern->psynch_wait_wakeup(kwq, kwe, tstore);
1992  
1993  	if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
1994  		panic("ksyn_signal: panic waking up thread %x\n", ret);
1995  	}
1996  	return ret;
1997  }
1998  
1999  int
2000  ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
2001  {
2002  	kern_return_t ret;
2003  	vm_page_info_basic_data_t info;
2004  	mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
2005  	ret = pthread_kern->vm_map_page_info((pthread_kern->current_map)(), uaddr,
2006  			VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
2007  	if (ret != KERN_SUCCESS) {
2008  		return EINVAL;
2009  	}
2010  	
2011  	if (objectp != NULL) {
2012  		*objectp = (uint64_t)info.object_id;
2013  	}
2014  	if (offsetp != NULL) {
2015  		*offsetp = (uint64_t)info.offset;
2016  	}
2017  	
2018  	return(0);
2019  }
2020  
2021  
2022  /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
2023  int
2024  kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen,
2025  		int *typep, uint32_t lowest[])
2026  {
2027  	uint32_t kw_fr, kw_fwr, low;
2028  	int type = 0, lowtype, typenum[2] = { 0 };
2029  	uint32_t numbers[2] = { 0 };
2030  	int count = 0, i;
2031  	
2032  	if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) ||
2033  			((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
2034  		type |= PTH_RWSHFT_TYPE_READ;
2035  		/* read entries are present */
2036  		if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
2037  			kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
2038  			if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) &&
2039  					(is_seqlower(premgen, kw_fr) != 0))
2040  				kw_fr = premgen;
2041  		} else
2042  			kw_fr = premgen;
2043  		
2044  		lowest[KSYN_QUEUE_READ] = kw_fr;
2045  		numbers[count]= kw_fr;
2046  		typenum[count] = PTH_RW_TYPE_READ;
2047  		count++;
2048  	} else
2049  		lowest[KSYN_QUEUE_READ] = 0;
2050  	
2051  	if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) ||
2052  			((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
2053  		type |= PTH_RWSHFT_TYPE_WRITE;
2054  		/* read entries are present */
2055  		if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
2056  			kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum;
2057  			if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) &&
2058  					(is_seqlower(premgen, kw_fwr) != 0))
2059  				kw_fwr = premgen;
2060  		} else
2061  			kw_fwr = premgen;
2062  		
2063  		lowest[KSYN_QUEUE_WRITE] = kw_fwr;
2064  		numbers[count]= kw_fwr;
2065  		typenum[count] = PTH_RW_TYPE_WRITE;
2066  		count++;
2067  	} else
2068  		lowest[KSYN_QUEUE_WRITE] = 0;
2069  	
2070  #if __TESTPANICS__
2071  	if (count == 0)
2072  		panic("nothing in the queue???\n");
2073  #endif /* __TESTPANICS__ */
2074  	
2075  	low = numbers[0];
2076  	lowtype = typenum[0];
2077  	if (count > 1) {
2078  		for (i = 1; i< count; i++) {
2079  			if (is_seqlower(numbers[i] , low) != 0) {
2080  				low = numbers[i];
2081  				lowtype = typenum[i];
2082  			}
2083  		}
2084  	}
2085  	type |= lowtype;
2086  	
2087  	if (typep != 0)
2088  		*typep = type;
2089  	return(0);
2090  }
2091  
2092  /* wakeup readers to upto the writer limits */
2093  int
2094  ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders,
2095  		uint32_t updatebits, int *wokenp)
2096  {
2097  	ksyn_queue_t kq;
2098  	int failedwakeup = 0;
2099  	int numwoken = 0;
2100  	kern_return_t kret = KERN_SUCCESS;
2101  	uint32_t lbits = 0;
2102  	
2103  	lbits = updatebits;
2104  	
2105  	kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2106  	while ((kq->ksynq_count != 0) &&
2107  			(allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
2108  		kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
2109  		if (kret == KERN_NOT_WAITING) {
2110  			failedwakeup++;
2111  		}
2112  		numwoken++;
2113  	}
2114  	
2115  	if (wokenp != NULL)
2116  		*wokenp = numwoken;
2117  	return(failedwakeup);
2118  }
2119  
2120  
2121  /*
2122   * This handles the unlock grants for next set on rw_unlock() or on arrival
2123   * of all preposted waiters.
2124   */
2125  int
2126  kwq_handle_unlock(ksyn_wait_queue_t kwq, __unused uint32_t mgen, uint32_t rw_wc,
2127  		uint32_t *updatep, int flags, int *blockp, uint32_t premgen)
2128  {
2129  	uint32_t low_writer, limitrdnum;
2130  	int rwtype, error=0;
2131  	int allreaders, nfailed;
2132  	uint32_t updatebits=0, numneeded = 0;;
2133  	int prepost = flags & KW_UNLOCK_PREPOST;
2134  	thread_t preth = THREAD_NULL;
2135  	ksyn_waitq_element_t kwe;
2136  	uthread_t uth;
2137  	thread_t th;
2138  	int woken = 0;
2139  	int block = 1;
2140  	uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
2141  	kern_return_t kret = KERN_SUCCESS;
2142  	ksyn_queue_t kq;
2143  	int curthreturns = 0;
2144  	
2145  	if (prepost != 0) {
2146  		preth = current_thread();
2147  	}
2148  	
2149  	kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2150  	kwq->kw_lastseqword = rw_wc;
2151  	kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
2152  	kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
2153  	
2154  	error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
2155  #if __TESTPANICS__
2156  	if (error != 0)
2157  		panic("rwunlock: cannot fails to slot next round of threads");
2158  #endif /* __TESTPANICS__ */
2159  	
2160  	low_writer = lowest[KSYN_QUEUE_WRITE];
2161  	
2162  	allreaders = 0;
2163  	updatebits = 0;
2164  	
2165  	switch (rwtype & PTH_RW_TYPE_MASK) {
2166  		case PTH_RW_TYPE_READ: {
2167  			// XXX
2168  			/* what about the preflight which is LREAD or READ ?? */
2169  			if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
2170  				if (rwtype & PTH_RWSHFT_TYPE_WRITE) {
2171  					updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
2172  				}
2173  			}
2174  			limitrdnum = 0;
2175  			if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2176  				limitrdnum = low_writer;
2177  			} else {
2178  				allreaders = 1;
2179  			}
2180  			
2181  			numneeded = 0;
2182  			
2183  			if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2184  				limitrdnum = low_writer;
2185  				numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
2186  				if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
2187  					curthreturns = 1;
2188  					numneeded += 1;
2189  				}
2190  			} else {
2191  				// no writers at all
2192  				// no other waiters only readers
2193  				kwq->kw_kflags |= KSYN_KWF_OVERLAP_GUARD;
2194  				numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
2195  				if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
2196  					curthreturns = 1;
2197  					numneeded += 1;
2198  				}
2199  			}
2200  			
2201  			updatebits += (numneeded << PTHRW_COUNT_SHIFT);
2202  			
2203  			kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2204  			
2205  			if (curthreturns != 0) {
2206  				block = 0;
2207  				uth = current_uthread();
2208  				kwe = pthread_kern->uthread_get_uukwe(uth);
2209  				kwe->kwe_psynchretval = updatebits;
2210  			}
2211  			
2212  			
2213  			nfailed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders,
2214  					updatebits, &woken);
2215  			if (nfailed != 0) {
2216  				_kwq_mark_interruped_wakeup(kwq, KWQ_INTR_READ, nfailed,
2217  						limitrdnum, updatebits);
2218  			}
2219  			
2220  			error = 0;
2221  			
2222  			if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) && 
2223  					((updatebits & PTH_RWL_WBIT) == 0)) {
2224  				panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
2225  			}
2226  		}
2227  			break;
2228  			
2229  		case PTH_RW_TYPE_WRITE: {
2230  			
2231  			/* only one thread is goin to be granted */
2232  			updatebits |= (PTHRW_INC);
2233  			updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
2234  			
2235  			if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
2236  				block = 0;
2237  				if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
2238  					updatebits |= PTH_RWL_WBIT;
2239  				}
2240  				th = preth;
2241  				uth = pthread_kern->get_bsdthread_info(th);
2242  				kwe = pthread_kern->uthread_get_uukwe(uth);
2243  				kwe->kwe_psynchretval = updatebits;
2244  			} else {
2245  				/* we are not granting writelock to the preposting thread */
2246  				/* if there are writers present or the preposting write thread then W bit is to be set */
2247  				if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count > 1 ||
2248  				    (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
2249  					updatebits |= PTH_RWL_WBIT;
2250  				}
2251  				/* setup next in the queue */
2252  				kret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, NULL, updatebits);
2253  				if (kret == KERN_NOT_WAITING) {
2254  					_kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
2255  							low_writer, updatebits);
2256  				}
2257  				error = 0;
2258  			}
2259  			kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2260  			if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != 
2261  					(PTH_RWL_KBIT | PTH_RWL_EBIT)) {
2262  				panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
2263  			}
2264  		}
2265  			break;
2266  			
2267  		default:
2268  			panic("rwunlock: invalid type for lock grants");
2269  			
2270  	};
2271  	
2272  	if (updatep != NULL)
2273  		*updatep = updatebits;
2274  	if (blockp != NULL)
2275  		*blockp = block;
2276  	return(error);
2277  }
2278  
2279  /************* Indiv queue support routines ************************/
2280  void
2281  ksyn_queue_init(ksyn_queue_t kq)
2282  {
2283  	TAILQ_INIT(&kq->ksynq_kwelist);
2284  	kq->ksynq_count = 0;
2285  	kq->ksynq_firstnum = 0;
2286  	kq->ksynq_lastnum = 0;
2287  }
2288  
2289  int
2290  ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe,
2291  		uint32_t mgen, int fit)
2292  {
2293  	ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2294  	uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
2295  	int res = 0;
2296  
2297  	if (kwe->kwe_kwqqueue != NULL) {
2298  		panic("adding enqueued item to another queue");
2299  	}
2300  
2301  	if (kq->ksynq_count == 0) {
2302  		TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2303  		kq->ksynq_firstnum = lockseq;
2304  		kq->ksynq_lastnum = lockseq;
2305  	} else if (fit == FIRSTFIT) {
2306  		/* TBD: if retry bit is set for mutex, add it to the head */
2307  		/* firstfit, arriving order */
2308  		TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2309  		if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2310  			kq->ksynq_firstnum = lockseq;
2311  		}
2312  		if (is_seqhigher(lockseq, kq->ksynq_lastnum)) {
2313  			kq->ksynq_lastnum = lockseq;
2314  		}
2315  	} else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
2316  		/* During prepost when a thread is getting cancelled, we could have
2317  		 * two with same seq */
2318  		res = EBUSY;
2319  		if (kwe->kwe_state == KWE_THREAD_PREPOST) {
2320  			ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
2321  			if (tmp != NULL && tmp->kwe_uth != NULL &&
2322  					pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
2323  				TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2324  				res = 0;
2325  			}
2326  		}
2327  	} else if (is_seqlower(kq->ksynq_lastnum, lockseq)) { // XXX is_seqhigher
2328  		TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2329  		kq->ksynq_lastnum = lockseq;
2330  	} else if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2331  		TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2332  		kq->ksynq_firstnum = lockseq;
2333  	} else {
2334  		ksyn_waitq_element_t q_kwe, r_kwe;
2335  		
2336  		res = ESRCH;
2337  		TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2338  			if (is_seqhigher(q_kwe->kwe_lockseq, lockseq)) {
2339  				TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list);
2340  				res = 0;
2341  				break;
2342  			}
2343  		}
2344  	}
2345  	
2346  	if (res == 0) {
2347  		kwe->kwe_kwqqueue = kwq;
2348  		kq->ksynq_count++;
2349  		kwq->kw_inqueue++;
2350  		update_low_high(kwq, lockseq);
2351  	}
2352  	return res;
2353  }
2354  
2355  void
2356  ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq,
2357  		ksyn_waitq_element_t kwe)
2358  {
2359  	if (kq->ksynq_count == 0) {
2360  		panic("removing item from empty queue");
2361  	}
2362  
2363  	if (kwe->kwe_kwqqueue != kwq) {
2364  		panic("removing item from wrong queue");
2365  	}
2366  
2367  	TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
2368  	kwe->kwe_list.tqe_next = NULL;
2369  	kwe->kwe_list.tqe_prev = NULL;
2370  	kwe->kwe_kwqqueue = NULL;
2371  	
2372  	if (--kq->ksynq_count > 0) {
2373  		ksyn_waitq_element_t tmp;
2374  		tmp = TAILQ_FIRST(&kq->ksynq_kwelist);
2375  		kq->ksynq_firstnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2376  		tmp = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head);
2377  		kq->ksynq_lastnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2378  	} else {
2379  		kq->ksynq_firstnum = 0;
2380  		kq->ksynq_lastnum = 0;
2381  	}
2382  	
2383  	if (--kwq->kw_inqueue > 0) {
2384  		uint32_t curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
2385  		if (kwq->kw_lowseq == curseq) {
2386  			kwq->kw_lowseq = find_nextlowseq(kwq);
2387  		}
2388  		if (kwq->kw_highseq == curseq) {
2389  			kwq->kw_highseq = find_nexthighseq(kwq);
2390  		}
2391  	} else {
2392  		kwq->kw_lowseq = 0;
2393  		kwq->kw_highseq = 0;
2394  	}
2395  }
2396  
2397  ksyn_waitq_element_t
2398  ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
2399  		uint32_t seq)
2400  {
2401  	ksyn_waitq_element_t kwe;
2402  	
2403  	// XXX: should stop searching when higher sequence number is seen
2404  	TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2405  		if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) {
2406  			return kwe;
2407  		}
2408  	}
2409  	return NULL;
2410  }
2411  
2412  /* find the thread at the target sequence (or a broadcast/prepost at or above) */
2413  ksyn_waitq_element_t
2414  ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
2415  {
2416  	ksyn_waitq_element_t result = NULL;
2417  	ksyn_waitq_element_t kwe;
2418  	uint32_t lgen = (cgen & PTHRW_COUNT_MASK);
2419  	
2420  	TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2421  		if (is_seqhigher_eq(kwe->kwe_lockseq, cgen)) {
2422  			result = kwe;
2423  			
2424  			// KWE_THREAD_INWAIT must be strictly equal
2425  			if (kwe->kwe_state == KWE_THREAD_INWAIT &&
2426  					(kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
2427  				result = NULL;
2428  			}
2429  			break;
2430  		}
2431  	}
2432  	return result;
2433  }
2434  
2435  /* look for a thread at lockseq, a */
2436  ksyn_waitq_element_t
2437  ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
2438  		uint32_t uptoseq, uint32_t signalseq)
2439  {
2440  	ksyn_waitq_element_t result = NULL;
2441  	ksyn_waitq_element_t q_kwe, r_kwe;
2442  	
2443  	// XXX
2444  	/* case where wrap in the tail of the queue exists */
2445  	TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2446  		if (q_kwe->kwe_state == KWE_THREAD_PREPOST) {
2447  			if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2448  				return result;
2449  			}
2450  		}
2451  		if (q_kwe->kwe_state == KWE_THREAD_PREPOST |
2452  				q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
2453  			/* match any prepost at our same uptoseq or any broadcast above */
2454  			if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
2455  				continue;
2456  			}
2457  			return q_kwe;
2458  		} else if (q_kwe->kwe_state == KWE_THREAD_INWAIT) {
2459  			/*
2460  			 * Match any (non-cancelled) thread at or below our upto sequence -
2461  			 * but prefer an exact match to our signal sequence (if present) to
2462  			 * keep exact matches happening.
2463  			 */
2464  			if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2465  				return result;
2466  			}
2467  			if (q_kwe->kwe_kwqqueue == kwq) {
2468  				if (!pthread_kern->uthread_is_cancelled(q_kwe->kwe_uth)) {
2469  					/* if equal or higher than our signal sequence, return this one */
2470  					if (is_seqhigher_eq(q_kwe->kwe_lockseq, signalseq)) {
2471  						return q_kwe;
2472  					}
2473  					
2474  					/* otherwise, just remember this eligible thread and move on */
2475  					if (result == NULL) {
2476  						result = q_kwe;
2477  					}
2478  				}
2479  			}
2480  		} else {
2481  			panic("ksyn_queue_find_signalseq(): unknown wait queue element type (%d)\n", q_kwe->kwe_state);
2482  		}
2483  	}
2484  	return result;
2485  }
2486  
2487  void
2488  ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
2489  {
2490  	ksyn_waitq_element_t kwe;
2491  	uint32_t tseq = upto & PTHRW_COUNT_MASK;
2492  	ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2493  	uint32_t freed = 0, signaled = 0;
2494  
2495  	PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_START, kwq->kw_addr,
2496  			kqi, upto, all);
2497  	
2498  	while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
2499  		if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
2500  			break;
2501  		}
2502  		if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2503  			/*
2504  			 * This scenario is typically noticed when the cvar is
2505  			 * reinited and the new waiters are waiting. We can
2506  			 * return them as spurious wait so the cvar state gets
2507  			 * reset correctly.
2508  			 */
2509  
2510  			PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
2511  					kwq->kw_inqueue, 1);
2512  			
2513  			/* skip canceled ones */
2514  			/* wake the rest */
2515  			/* set M bit to indicate to waking CV to retun Inc val */
2516  			(void)ksyn_signal(kwq, kqi, kwe,
2517  					PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
2518  			signaled++;
2519  		} else {
2520  			PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
2521  					kwq->kw_inqueue, 2);
2522  			ksyn_queue_remove_item(kwq, kq, kwe);
2523  			zfree(kwe_zone, kwe);
2524  			kwq->kw_fakecount--;
2525  			freed++;
2526  		}
2527  	}
2528  
2529  	PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_END, kwq->kw_addr, freed,
2530  			signaled, kwq->kw_inqueue);
2531  }
2532  
2533  /*************************************************************************/
2534  
2535  void
2536  update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
2537  {
2538  	if (kwq->kw_inqueue == 1) {
2539  		kwq->kw_lowseq = lockseq;
2540  		kwq->kw_highseq = lockseq;
2541  	} else {
2542  		if (is_seqlower(lockseq, kwq->kw_lowseq)) {
2543  			kwq->kw_lowseq = lockseq;
2544  		}
2545  		if (is_seqhigher(lockseq, kwq->kw_highseq)) {
2546  			kwq->kw_highseq = lockseq;
2547  		}
2548  	}
2549  }
2550  
2551  uint32_t
2552  find_nextlowseq(ksyn_wait_queue_t kwq)
2553  {
2554  	uint32_t lowest = 0;
2555  	int first = 1;
2556  	int i;
2557  	
2558  	for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2559  		if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2560  			uint32_t current = kwq->kw_ksynqueues[i].ksynq_firstnum;
2561  			if (first || is_seqlower(current, lowest)) {
2562  				lowest = current;
2563  				first = 0;
2564  			}
2565  		}
2566  	}
2567  	
2568  	return lowest;
2569  }
2570  
2571  uint32_t
2572  find_nexthighseq(ksyn_wait_queue_t kwq)
2573  {
2574  	uint32_t highest = 0;
2575  	int first = 1;
2576  	int i;
2577  	
2578  	for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2579  		if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2580  			uint32_t current = kwq->kw_ksynqueues[i].ksynq_lastnum;
2581  			if (first || is_seqhigher(current, highest)) {
2582  				highest = current;
2583  				first = 0;
2584  			}
2585  		}
2586  	}
2587  	
2588  	return highest;
2589  }
2590  
2591  int
2592  find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters,
2593  		uint32_t *countp)
2594  {
2595  	int i;
2596  	uint32_t count = 0;
2597  	
2598  	for (i = 0; i< KSYN_QUEUE_MAX; i++) {
2599  		count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto);
2600  		if (count >= nwaiters) {
2601  			break;
2602  		}
2603  	}
2604  	
2605  	if (countp != NULL) {
2606  		*countp = count;
2607  	}
2608  	
2609  	if (count == 0) {
2610  		return 0;
2611  	} else if (count >= nwaiters) {
2612  		return 1;
2613  	} else {
2614  		return 0;
2615  	}
2616  }
2617  
2618  
2619  uint32_t
2620  ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
2621  {
2622  	uint32_t i = 0;
2623  	ksyn_waitq_element_t kwe, newkwe;
2624  	
2625  	if (kq->ksynq_count == 0 || is_seqhigher(kq->ksynq_firstnum, upto)) {
2626  		return 0;
2627  	}
2628  	if (upto == kq->ksynq_firstnum) {
2629  		return 1;
2630  	}
2631  	TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2632  		uint32_t curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK);
2633  		if (is_seqhigher(curval, upto)) {
2634  			break;
2635  		}
2636  		++i;
2637  		if (upto == curval) {
2638  			break;
2639  		}
2640  	}
2641  	return i;
2642  }
2643  
2644  /* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */
2645  void
2646  ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
2647  {
2648  	ksyn_waitq_element_t kwe, newkwe;
2649  	uint32_t updatebits = 0;
2650  	ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
2651  	
2652  	struct ksyn_queue kfreeq;
2653  	ksyn_queue_init(&kfreeq);
2654  
2655  	PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_START, ckwq->kw_addr, upto,
2656  			ckwq->kw_inqueue, 0);
2657  	
2658  retry:
2659  	TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2660  		if (is_seqhigher(kwe->kwe_lockseq, upto)) {
2661  			// outside our range
2662  			break;
2663  		}
2664  
2665  		if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2666  			// Wake only non-canceled threads waiting on this CV.
2667  			if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
2668  				PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe, 0, 1);
2669  				(void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
2670  				updatebits += PTHRW_INC;
2671  			}
2672  		} else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
2673  			   kwe->kwe_state == KWE_THREAD_PREPOST) {
2674  			PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe,
2675  					kwe->kwe_state, 2);
2676  			ksyn_queue_remove_item(ckwq, kq, kwe);
2677  			TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2678  			ckwq->kw_fakecount--;
2679  		} else {
2680  			panic("unknown kwe state\n");
2681  		}
2682  	}
2683  	
2684  	/* Need to enter a broadcast in the queue (if not already at L == S) */
2685  	
2686  	if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
2687  		PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, ckwq->kw_lword,
2688  				ckwq->kw_sword, 3);
2689  
2690  		newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
2691  		if (newkwe == NULL) {
2692  			ksyn_wqunlock(ckwq);
2693  			newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
2694  			TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2695  			ksyn_wqlock(ckwq);
2696  			goto retry;
2697  		} else {
2698  			TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2699  			ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
2700  			PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, newkwe, 0, 4);
2701  		}
2702  	}
2703  	
2704  	// free up any remaining things stumbled across above
2705  	while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
2706  		TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2707  		zfree(kwe_zone, kwe);
2708  	}
2709  
2710  	PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_END, ckwq->kw_addr,
2711  			updatebits, 0, 0);
2712  	
2713  	if (updatep != NULL) {
2714  		*updatep |= updatebits;
2715  	}
2716  }
2717  
2718  void
2719  ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
2720  {
2721  	if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
2722  		if (ckwq->kw_inqueue != 0) {
2723  			/* FREE THE QUEUE */
2724  			ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 0);
2725  #if __TESTPANICS__
2726  			if (ckwq->kw_inqueue != 0)
2727  				panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
2728  #endif /* __TESTPANICS__ */
2729  		}
2730  		ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
2731  		ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
2732  		*updatebits |= PTH_RWS_CV_CBIT;
2733  	} else if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
2734  		// only fake entries are present in the queue
2735  		*updatebits |= PTH_RWS_CV_PBIT;
2736  	}
2737  }
2738  
2739  void
2740  psynch_zoneinit(void)
2741  {
2742  	kwq_zone = zinit(sizeof(struct ksyn_wait_queue),
2743  			8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
2744  	kwe_zone = zinit(sizeof(struct ksyn_waitq_element),
2745  			8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
2746  }
2747  
2748  void *
2749  _pthread_get_thread_kwq(thread_t thread)
2750  {
2751  	assert(thread);
2752  	struct uthread * uthread = pthread_kern->get_bsdthread_info(thread);
2753  	assert(uthread);
2754  	ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uthread);
2755  	assert(kwe);
2756  	ksyn_wait_queue_t kwq = kwe->kwe_kwqqueue;
2757  	return kwq;
2758  }
2759  
2760  /* This function is used by stackshot to determine why a thread is blocked, and report
2761   * who owns the object that the thread is blocked on. It should *only* be called if the
2762   * `block_hint' field in the relevant thread's struct is populated with something related
2763   * to pthread sync objects.
2764   */
2765  void
2766  _pthread_find_owner(thread_t thread,
2767  		struct stackshot_thread_waitinfo * waitinfo)
2768  {
2769  	ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread);
2770  	switch (waitinfo->wait_type) {
2771  		case kThreadWaitPThreadMutex:
2772  			assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX);
2773  			waitinfo->owner   = thread_tid(kwq->kw_owner);
2774  			waitinfo->context = kwq->kw_addr;
2775  			break;
2776  		/* Owner of rwlock not stored in kernel space due to races. Punt
2777  		 * and hope that the userspace address is helpful enough. */
2778  		case kThreadWaitPThreadRWLockRead:
2779  		case kThreadWaitPThreadRWLockWrite:
2780  			assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK);
2781  			waitinfo->owner   = 0;
2782  			waitinfo->context = kwq->kw_addr;
2783  			break;
2784  		/* Condvars don't have owners, so just give the userspace address. */
2785  		case kThreadWaitPThreadCondVar:
2786  			assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
2787  			waitinfo->owner   = 0;
2788  			waitinfo->context = kwq->kw_addr;
2789  			break;
2790  		case kThreadWaitNone:
2791  		default:
2792  			waitinfo->owner = 0;
2793  			waitinfo->context = 0;
2794  			break;
2795  	}
2796  }
2797  
2798  #ifdef __DARLING__
2799  void dtape_psynch_thread_dying(thread_t thread, struct ksyn_waitq_element* kwe) {
2800  	if (kwe->kwe_kwqqueue) {
2801  		kq_index_t kqi = (thread->block_hint == kThreadWaitPThreadRWLockRead) ? KSYN_QUEUE_READ : KSYN_QUEUE_WRITE;
2802  		ksyn_queue_remove_item(kwe->kwe_kwqqueue, &((struct ksyn_wait_queue*)kwe->kwe_kwqqueue)->kw_ksynqueues[kqi], kwe);
2803  	}
2804  };
2805  #endif