/ src / server.cpp
server.cpp
  1  /**
  2   * This file is part of Darling.
  3   *
  4   * Copyright (C) 2021 Darling developers
  5   *
  6   * Darling is free software: you can redistribute it and/or modify
  7   * it under the terms of the GNU General Public License as published by
  8   * the Free Software Foundation, either version 3 of the License, or
  9   * (at your option) any later version.
 10   *
 11   * Darling is distributed in the hope that it will be useful,
 12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
 13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14   * GNU General Public License for more details.
 15   *
 16   * You should have received a copy of the GNU General Public License
 17   * along with Darling.  If not, see <http://www.gnu.org/licenses/>.
 18   */
 19  
 20  #include <cstdint>
 21  #include <darlingserver/server.hpp>
 22  #include <sys/socket.h>
 23  #include <stdexcept>
 24  #include <errno.h>
 25  #include <cstring>
 26  #include <unistd.h>
 27  #include <sys/un.h>
 28  #include <sys/epoll.h>
 29  #include <fcntl.h>
 30  #include <system_error>
 31  #include <thread>
 32  #include <array>
 33  #include <darlingserver/registry.hpp>
 34  #include <sys/eventfd.h>
 35  #include <darlingserver/duct-tape.h>
 36  #include <sys/timerfd.h>
 37  #include <sys/mman.h>
 38  #include <sys/stat.h>
 39  #include <sys/wait.h>
 40  
 41  #include <darlingserver/logging.hpp>
 42  
 43  static DarlingServer::Server* sharedInstancePointer = nullptr;
 44  
 45  struct DTapeHooks {
 46  	static void dtape_hook_thread_suspend(void* thread_context, dtape_thread_continuation_callback_f continuationCallback, void* continuationContext, libsimple_lock_t* unlockMe) {
 47  		if (auto thread = DarlingServer::Thread::currentThread()) {
 48  			if (auto fakeThread = thread->impersonatingThread()) {
 49  				if (thread_context == fakeThread.get()) {
 50  					return dtape_hook_thread_suspend(thread.get(), continuationCallback, continuationContext, unlockMe);
 51  				}
 52  			}
 53  		}
 54  		if (continuationCallback) {
 55  			static_cast<DarlingServer::Thread*>(thread_context)->suspend([=]() {
 56  				continuationCallback(continuationContext);
 57  			}, unlockMe);
 58  		} else {
 59  			static_cast<DarlingServer::Thread*>(thread_context)->suspend(nullptr, unlockMe);
 60  		}
 61  	};
 62  
 63  	static void dtape_hook_thread_resume(void* thread_context) {
 64  		static_cast<DarlingServer::Thread*>(thread_context)->resume();
 65  	};
 66  
 67  	static dtape_task_t* dtape_hook_current_task(void) {
 68  		auto thread = DarlingServer::Thread::currentThread();
 69  		if (!thread) {
 70  			return NULL;
 71  		}
 72  		if (auto fakeThread = thread->impersonatingThread()) {
 73  			thread = fakeThread;
 74  		}
 75  		auto process = thread->process();
 76  		if (!process) {
 77  			return NULL;
 78  		}
 79  		return process->_dtapeTask;
 80  	};
 81  
 82  	static dtape_thread_t* dtape_hook_current_thread(void) {
 83  		auto thread = DarlingServer::Thread::currentThread();
 84  		if (!thread) {
 85  			return NULL;
 86  		}
 87  		if (auto fakeThread = thread->impersonatingThread()) {
 88  			thread = fakeThread;
 89  		}
 90  		return thread->_dtapeThread;
 91  	};
 92  
 93  	static void dtape_hook_timer_arm(uint64_t deadline_ns, bool override) {
 94  		auto& server = DarlingServer::Server::sharedInstance();
 95  
 96  		if (deadline_ns == UINT64_MAX) {
 97  			deadline_ns = 0;
 98  		}
 99  
100  		struct itimerspec newSpec;
101  		memset(&newSpec.it_interval, 0, sizeof(newSpec.it_interval));
102  		newSpec.it_value.tv_sec = deadline_ns / 1000000000ull;
103  		newSpec.it_value.tv_nsec = deadline_ns % 1000000000ull;
104  
105  		std::unique_lock lock(server._timerLock);
106  
107  		if (!override && server._currentTimerDeadline != 0 && deadline_ns >= server._currentTimerDeadline) {
108  			return;
109  		}
110  
111  		server._currentTimerDeadline = deadline_ns;
112  
113  		if (timerfd_settime(server._timerFD, TFD_TIMER_ABSTIME, &newSpec, NULL) < 0) {
114  			throw std::system_error(errno, std::generic_category(), "Failed to set timerfd expiration deadline");
115  		}
116  	};
117  
118  	static void dtape_hook_log(dtape_log_level_t level, const char* message) {
119  		static const auto log = DarlingServer::Log("dtape");
120  		auto process = DarlingServer::Process::currentProcess();
121  		auto thread = DarlingServer::Thread::currentThread();
122  		pid_t pid = process ? process->id() : -1;
123  		pid_t nspid = process ? process->nsid() : -1;
124  		pid_t tid = thread ? thread->id() : -1;
125  		pid_t nstid = thread ? thread->nsid() : -1;
126  		switch (level) {
127  			case dtape_log_level_debug:
128  				log.debug() << pid << "(" << nspid << "):" << tid << "(" << nstid << "): " << message << log.endLog;
129  				break;
130  			case dtape_log_level_info:
131  				log.info() << pid << "(" << nspid << "):" << tid << "(" << nstid << "): " << message << log.endLog;
132  				break;
133  			case dtape_log_level_warning:
134  				log.warning() << pid << "(" << nspid << "):" << tid << "(" << nstid << "): " << message << log.endLog;
135  				break;
136  			case dtape_log_level_error:
137  			default:
138  				log.error() << pid << "(" << nspid << "):" << tid << "(" << nstid << "): " << message << log.endLog;
139  				break;
140  		}
141  	};
142  
143  	static void dtape_hook_get_load_info(dtape_load_info_t* load_info) {
144  		load_info->task_count = DarlingServer::processRegistry().size();
145  		load_info->thread_count = DarlingServer::threadRegistry().size();
146  	};
147  
148  	static void dtape_hook_thread_terminate(void* thread_context) {
149  		static_cast<DarlingServer::Thread*>(thread_context)->terminate();
150  	};
151  
152  	static dtape_thread_t* dtape_hook_thread_create_kernel(void) {
153  		auto thread = std::make_shared<DarlingServer::Thread>(DarlingServer::Thread::KernelThreadConstructorTag());
154  		thread->registerWithProcess();
155  		DarlingServer::threadRegistry().registerEntry(thread, true);
156  		return thread->_dtapeThread;
157  	};
158  
159  	static void dtape_hook_thread_setup(void* thread_context, dtape_thread_continuation_callback_f startupCallback, void* startupCallbackContext) {
160  		static_cast<DarlingServer::Thread*>(thread_context)->setupKernelThread([=]() {
161  			startupCallback(startupCallbackContext);
162  		});
163  	};
164  
165  	static void dtape_hook_thread_set_pending_signal(void* thread_context, int pending_signal) {
166  		static_cast<DarlingServer::Thread*>(thread_context)->setPendingSignal(pending_signal);
167  	};
168  
169  	static void dtape_hook_thread_set_pending_call_override(void* thread_context, bool pending_call_override) {
170  		static_cast<DarlingServer::Thread*>(thread_context)->setPendingCallOverride(pending_call_override);
171  	};
172  
173  	static dtape_thread_t* dtape_hook_thread_lookup(int id, bool id_is_nsid, bool retain) {
174  		auto& registry = DarlingServer::threadRegistry();
175  		auto maybeThread = (id_is_nsid) ? registry.lookupEntryByNSID(id) : registry.lookupEntryByID(id);
176  		if (!maybeThread) {
177  			return nullptr;
178  		}
179  		auto thread = *maybeThread;
180  		if (retain) {
181  			dtape_thread_retain(thread->_dtapeThread);
182  		}
183  		return thread->_dtapeThread;
184  	};
185  
186  	static dtape_thread_t* dtape_hook_thread_lookup_eternal(dtape_eternal_id_t eid, bool retain) {
187  		auto& registry = DarlingServer::threadRegistry();
188  		auto maybeThread = registry.lookupEntryByEternalID(eid);
189  		if (!maybeThread) {
190  			return nullptr;
191  		}
192  		auto thread = *maybeThread;
193  		if (retain) {
194  			dtape_thread_retain(thread->_dtapeThread);
195  		}
196  		return thread->_dtapeThread;
197  	};
198  
199  	static dtape_thread_state_t dtape_hook_thread_get_state(void* thread_context) {
200  		return static_cast<dtape_thread_state_t>(static_cast<DarlingServer::Thread*>(thread_context)->getRunState());
201  	};
202  
203  	static int dtape_hook_thread_send_signal(void* thread_context, int signal) {
204  		try {
205  			static_cast<DarlingServer::Thread*>(thread_context)->sendSignal(signal);
206  			return 0;
207  		} catch (std::system_error e) {
208  			return -e.code().value();
209  		}
210  	};
211  
212  	static void dtape_hook_thread_context_dispose(void* thread_context) {
213  		static_cast<DarlingServer::Thread*>(thread_context)->_dispose();
214  	};
215  
216  	static dtape_eternal_id_t dtape_hook_thread_eternal_id(void* thread_context) {
217  		if (!thread_context) {
218  			return DarlingServer::EternalIDInvalid;
219  		}
220  		return static_cast<DarlingServer::Thread*>(thread_context)->eternalID();
221  	};
222  
223  	static void dtape_hook_current_thread_interrupt_disable(void) {
224  		DarlingServer::Thread::interruptDisable();
225  	};
226  
227  	static void dtape_hook_current_thread_interrupt_enable(void) {
228  		DarlingServer::Thread::interruptEnable();
229  	};
230  
231  	static void dtape_hook_current_thread_syscall_return(int result_code) {
232  		DarlingServer::Thread::syscallReturn(result_code);
233  	};
234  
235  	static void dtape_hook_current_thread_set_bsd_retval(uint32_t retval) {
236  		DarlingServer::Thread::currentThread()->_bsdReturnValue = retval;
237  	};
238  
239  	static bool dtape_hook_task_read_memory(void* task_context, uintptr_t remote_address, void* local_buffer, size_t length) {
240  		return static_cast<DarlingServer::Process*>(task_context)->readMemory(remote_address, local_buffer, length);
241  	};
242  
243  	static bool dtape_hook_task_write_memory(void* task_context, uintptr_t remote_address, const void* local_buffer, size_t length) {
244  		return static_cast<DarlingServer::Process*>(task_context)->writeMemory(remote_address, local_buffer, length);
245  	};
246  
247  	static dtape_task_t* dtape_hook_task_lookup(int id, bool id_is_nsid, bool retain) {
248  		auto& registry = DarlingServer::processRegistry();
249  		auto maybeProcess = (id_is_nsid) ? registry.lookupEntryByNSID(id) : registry.lookupEntryByID(id);
250  		if (!maybeProcess) {
251  			return nullptr;
252  		}
253  		auto process = *maybeProcess;
254  		if (retain) {
255  			dtape_task_retain(process->_dtapeTask);
256  		}
257  		return process->_dtapeTask;
258  	};
259  
260  	static dtape_task_t* dtape_hook_task_lookup_eternal(dtape_eternal_id_t eid, bool retain) {
261  		auto& registry = DarlingServer::processRegistry();
262  		auto maybeProcess = registry.lookupEntryByEternalID(eid);
263  		if (!maybeProcess) {
264  			return nullptr;
265  		}
266  		auto process = *maybeProcess;
267  		if (retain) {
268  			dtape_task_retain(process->_dtapeTask);
269  		}
270  		return process->_dtapeTask;
271  	};
272  
273  	static void dtape_hook_task_get_memory_info(void* task_context, dtape_memory_info_t* memory_info) {
274  		auto info = static_cast<DarlingServer::Process*>(task_context)->memoryInfo();
275  		memory_info->virtual_size = info.virtualSize;
276  		memory_info->resident_size = info.residentSize;
277  		memory_info->page_size = info.pageSize;
278  		memory_info->region_count = info.regionCount;
279  	};
280  
281  	static bool dtape_hook_task_get_memory_region_info(void* task_context, uintptr_t address, dtape_memory_region_info_t* memory_region_info) {
282  		int protection;
283  		try {
284  			auto info = static_cast<DarlingServer::Process*>(task_context)->memoryRegionInfo(address);
285  			memory_region_info->start_address = info.startAddress;
286  			memory_region_info->page_count = info.pageCount;
287  			memory_region_info->map_offset = info.mapOffset;
288  			protection = info.protection;
289  			memory_region_info->shared = info.shared;
290  		} catch (...) {
291  			return false;
292  		}
293  		memory_region_info->protection = dtape_memory_protection_none;
294  		if (protection & PROT_READ) {
295  			// for some reason, we can't just do `|=`;
296  			// the compiler complains about "can't assign `int` to `dtape_memory_protection`" or something like that
297  			memory_region_info->protection = (dtape_memory_protection_t)(memory_region_info->protection | dtape_memory_protection_read);
298  		}
299  		if (protection & PROT_WRITE) {
300  			memory_region_info->protection = (dtape_memory_protection_t)(memory_region_info->protection | dtape_memory_protection_write);
301  		}
302  		if (protection & PROT_EXEC) {
303  			memory_region_info->protection = (dtape_memory_protection_t)(memory_region_info->protection | dtape_memory_protection_execute);
304  		}
305  		return true;
306  	};
307  
308  	static uintptr_t dtape_hook_task_allocate_pages(void* task_context, size_t page_count, int protection, uintptr_t address_hint, dtape_memory_flags_t flags) {
309  		try {
310  			return static_cast<DarlingServer::Process*>(task_context)->allocatePages(page_count, protection, address_hint, flags & dtape_memory_flag_fixed, flags & dtape_memory_flag_overwrite);
311  		} catch (std::system_error e) {
312  			return 0;
313  		}
314  	};
315  
316  	static int dtape_hook_task_free_pages(void* task_context, uintptr_t address, size_t page_count) {
317  		try {
318  			static_cast<DarlingServer::Process*>(task_context)->freePages(address, page_count);
319  			return 0;
320  		} catch (std::system_error e) {
321  			return -1;
322  		}
323  	};
324  
325  	static uintptr_t dtape_hook_task_map_file(void* task_context, int fd, size_t page_count, int protection, uintptr_t address_hint, size_t page_offset, dtape_memory_flags_t flags) {
326  		try {
327  			return static_cast<DarlingServer::Process*>(task_context)->mapFile(fd, page_count, protection, address_hint, page_offset, flags & dtape_memory_flag_fixed, flags & dtape_memory_flag_overwrite);
328  		} catch (std::system_error e) {
329  			return 0;
330  		}
331  	};
332  
333  	static uintptr_t dtape_hook_task_get_next_region(void* task_context, uintptr_t address) {
334  		return static_cast<DarlingServer::Process*>(task_context)->getNextRegion(address);
335  	};
336  
337  	static bool dtape_hook_task_change_protection(void* task_context, uintptr_t address, size_t page_count, int protection) {
338  		try {
339  			static_cast<DarlingServer::Process*>(task_context)->changeProtection(address, page_count, protection);
340  			return true;
341  		} catch (std::system_error e) {
342  			return false;
343  		}
344  	};
345  
346  	static bool dtape_hook_task_sync_memory(void* task_context, uintptr_t address, size_t size, int sync_flags) {
347  		try {
348  			static_cast<DarlingServer::Process*>(task_context)->syncMemory(address, size, sync_flags);
349  			return true;
350  		} catch (std::system_error e) {
351  			return false;
352  		}
353  	};
354  
355  	static void dtape_hook_task_context_dispose(void* task_context) {
356  		static_cast<DarlingServer::Process*>(task_context)->_dispose();
357  	};
358  
359  	static dtape_eternal_id_t dtape_hook_task_eternal_id(void* task_context) {
360  		if (!task_context) {
361  			return DarlingServer::EternalIDInvalid;
362  		}
363  		return static_cast<DarlingServer::Process*>(task_context)->eternalID();
364  	};
365  
366  #if DSERVER_EXTENDED_DEBUG
367  	static void dtape_hook_task_register_name(void* task_context, uint32_t name, uintptr_t pointer) {
368  		static_cast<DarlingServer::Process*>(task_context)->_registerName(name, pointer);
369  	};
370  
371  	static void dtape_hook_task_unregister_name(void* task_context, uint32_t name) {
372  		static_cast<DarlingServer::Process*>(task_context)->_unregisterName(name);
373  	};
374  
375  	static void dtape_hook_task_add_port_set_member(void* task_context, dtape_port_set_id_t port_set, dtape_port_id_t member) {
376  		static_cast<DarlingServer::Process*>(task_context)->_addPortSetMember(port_set, member);
377  	};
378  
379  	static void dtape_hook_task_remove_port_set_member(void* task_context, dtape_port_set_id_t port_set, dtape_port_id_t member) {
380  		static_cast<DarlingServer::Process*>(task_context)->_removePortSetMember(port_set, member);
381  	};
382  
383  	static void dtape_hook_task_clear_port_set(void* task_context, dtape_port_set_id_t port_set) {
384  		static_cast<DarlingServer::Process*>(task_context)->_clearPortSet(port_set);
385  	};
386  #endif
387  
388  	static constexpr dtape_hooks_t dtape_hooks = {
389  		.current_task = dtape_hook_current_task,
390  		.current_thread = dtape_hook_current_thread,
391  
392  		.timer_arm = dtape_hook_timer_arm,
393  
394  		.log = dtape_hook_log,
395  		.get_load_info = dtape_hook_get_load_info,
396  
397  		.thread_suspend = dtape_hook_thread_suspend,
398  		.thread_resume = dtape_hook_thread_resume,
399  		.thread_terminate = dtape_hook_thread_terminate,
400  		.thread_create_kernel = dtape_hook_thread_create_kernel,
401  		.thread_setup = dtape_hook_thread_setup,
402  		.thread_set_pending_signal = dtape_hook_thread_set_pending_signal,
403  		.thread_set_pending_call_override = dtape_hook_thread_set_pending_call_override,
404  		.thread_lookup = dtape_hook_thread_lookup,
405  		.thread_lookup_eternal = dtape_hook_thread_lookup_eternal,
406  		.thread_get_state = dtape_hook_thread_get_state,
407  		.thread_send_signal = dtape_hook_thread_send_signal,
408  		.thread_context_dispose = dtape_hook_thread_context_dispose,
409  		.thread_eternal_id = dtape_hook_thread_eternal_id,
410  
411  		.current_thread_interrupt_disable = dtape_hook_current_thread_interrupt_disable,
412  		.current_thread_interrupt_enable = dtape_hook_current_thread_interrupt_enable,
413  		.current_thread_syscall_return = dtape_hook_current_thread_syscall_return,
414  		.current_thread_set_bsd_retval = dtape_hook_current_thread_set_bsd_retval,
415  
416  		.task_read_memory = dtape_hook_task_read_memory,
417  		.task_write_memory = dtape_hook_task_write_memory,
418  		.task_lookup = dtape_hook_task_lookup,
419  		.task_lookup_eternal = dtape_hook_task_lookup_eternal,
420  		.task_get_memory_info = dtape_hook_task_get_memory_info,
421  		.task_get_memory_region_info = dtape_hook_task_get_memory_region_info,
422  		.task_allocate_pages = dtape_hook_task_allocate_pages,
423  		.task_free_pages = dtape_hook_task_free_pages,
424  		.task_map_file = dtape_hook_task_map_file,
425  		.task_get_next_region = dtape_hook_task_get_next_region,
426  		.task_change_protection = dtape_hook_task_change_protection,
427  		.task_sync_memory = dtape_hook_task_sync_memory,
428  		.task_context_dispose = dtape_hook_task_context_dispose,
429  		.task_eternal_id = dtape_hook_task_eternal_id,
430  
431  #if DSERVER_EXTENDED_DEBUG
432  		.task_register_name = dtape_hook_task_register_name,
433  		.task_unregister_name = dtape_hook_task_unregister_name,
434  		.task_add_port_set_member = dtape_hook_task_add_port_set_member,
435  		.task_remove_port_set_member = dtape_hook_task_remove_port_set_member,
436  		.task_clear_port_set = dtape_hook_task_clear_port_set,
437  #endif
438  	};
439  };
440  
441  DarlingServer::Server::Server(std::string prefix):
442  	_prefix(prefix),
443  	_socketPath(_prefix + "/.darlingserver.sock"),
444  	_workQueue(std::bind(&Server::_worker, this, std::placeholders::_1))
445  {
446  	sharedInstancePointer = this;
447  
448  	// remove the old socket (if it exists)
449  	unlink(_socketPath.c_str());
450  
451  	// create the socket
452  	_listenerSocket = socket(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
453  	if (_listenerSocket < 0) {
454  		throw std::system_error(errno, std::generic_category(), "Failed to create socket");
455  	}
456  
457  	int passCred = 1;
458  	if (setsockopt(_listenerSocket, SOL_SOCKET, SO_PASSCRED, &passCred, sizeof(passCred)) < 0) {
459  		throw std::system_error(errno, std::generic_category(), "Failed to set SO_PASSCRED on socket");
460  	}
461  
462  	struct sockaddr_un addr;
463  	addr.sun_family = AF_UNIX;
464  	addr.sun_path[sizeof(addr.sun_path) - 1] = '\0';
465  	strncpy(addr.sun_path, _socketPath.c_str(), sizeof(addr.sun_path) - 1);
466  
467  	if (bind(_listenerSocket, (struct sockaddr*)&addr, sizeof(addr)) != 0) {
468  		throw std::system_error(errno, std::generic_category(), "Failed to bind socket");
469  	}
470  
471  	_wakeupFD = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
472  	if (_wakeupFD < 0) {
473  		throw std::system_error(errno, std::generic_category(), "Failed to create eventfd for on-demand epoll wakeups");
474  	}
475  
476  	_epollFD = epoll_create1(EPOLL_CLOEXEC);
477  	if (_epollFD < 0) {
478  		throw std::system_error(errno, std::generic_category(), "Failed to create epoll context");
479  	}
480  
481  	struct epoll_event settings;
482  	settings.data.ptr = this;
483  	settings.events = EPOLLIN | EPOLLOUT | EPOLLET;
484  
485  	if (epoll_ctl(_epollFD, EPOLL_CTL_ADD, _listenerSocket, &settings) < 0) {
486  		throw std::system_error(errno, std::generic_category(), "Failed to add listener socket to epoll context");
487  	}
488  
489  	settings.data.ptr = &_wakeupFD;
490  	settings.events = EPOLLIN | EPOLLONESHOT;
491  
492  	if (epoll_ctl(_epollFD, EPOLL_CTL_ADD, _wakeupFD, &settings) < 0) {
493  		throw std::system_error(errno, std::generic_category(), "Failed to add eventfd to epoll context");
494  	}
495  
496  	_outbox.setMessageArrivalNotificationCallback([this]() {
497  		// we don't really have to worry about the eventfd overflowing;
498  		// if it does, that means the main loop has been waiting a LONG time for the listener socket to become writable again.
499  		// in that case, we don't really care if the eventfd is being incremented; we can't send anything anyways.
500  		// once the socket becomes writable again, the eventfd will be monitored again.
501  		eventfd_write(_wakeupFD, 1);
502  	});
503  
504  	_timerFD = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
505  	if (_timerFD < 0) {
506  		throw std::system_error(errno, std::generic_category(), "Failed to create timer descriptor");
507  	}
508  
509  	settings.data.ptr = &_timerFD;
510  	settings.events = EPOLLIN;
511  
512  	if (epoll_ctl(_epollFD, EPOLL_CTL_ADD, _timerFD, &settings) < 0) {
513  		throw std::system_error(errno, std::generic_category(), "Failed to add timer descriptor to epoll context");
514  	}
515  };
516  
517  DarlingServer::Server::~Server() {
518  	close(_epollFD);
519  	close(_wakeupFD);
520  	close(_listenerSocket);
521  	unlink(_socketPath.c_str());
522  };
523  
524  void DarlingServer::Server::start() {
525  	Thread::interruptDisable();
526  	dtape_init(&DTapeHooks::dtape_hooks);
527  	Thread::interruptEnable();
528  
529  	// force the kernel process to be created now
530  	Process::kernelProcess();
531  
532  	// perform dtape initialization that requires a microthread context
533  	Thread::kernelSync(dtape_init_in_thread);
534  
535  	while (true) {
536  		if (_canRead) {
537  			_canRead = _inbox.receiveMany(_listenerSocket);
538  
539  			while (auto msg = _inbox.pop()) {
540  				// TODO: this could be done concurrently
541  				auto call = DarlingServer::Call::callFromMessage(std::move(*msg));
542  				if (call) {
543  					_workQueue.push(call->thread());
544  				}
545  			}
546  		}
547  
548  		// reset the eventfd by reading from it
549  		eventfd_t value;
550  		eventfd_read(_wakeupFD, &value);
551  
552  		if (_canWrite) {
553  			_canWrite = _outbox.sendMany(_listenerSocket);
554  		}
555  
556  		struct epoll_event settings;
557  		settings.data.ptr = &_wakeupFD;
558  		settings.events = (_canWrite) ? (EPOLLIN | EPOLLONESHOT) : 0;
559  
560  		if (epoll_ctl(_epollFD, EPOLL_CTL_MOD, _wakeupFD, &settings) < 0) {
561  			throw std::system_error(errno, std::generic_category(), "Failed to modify eventfd in epoll context");
562  		}
563  
564  		struct epoll_event events[16];
565  		int ret = epoll_wait(_epollFD, events, 16, -1);
566  
567  		if (ret < 0) {
568  			if (errno == EINTR) {
569  				continue;
570  			}
571  
572  			throw std::system_error(errno, std::generic_category(), "Failed to wait on epoll context");
573  		}
574  
575  		for (size_t i = 0; i < ret; ++i) {
576  			struct epoll_event* event = &events[i];
577  
578  			if (event->data.ptr == this) {
579  				if (event->events & EPOLLIN) {
580  					_canRead = true;
581  				}
582  
583  				if (event->events & EPOLLOUT) {
584  					_canWrite = true;
585  				}
586  			} else if (event->data.ptr == &_wakeupFD) {
587  				// we allow the loop to go back to the top and try to send some messages
588  				// (if _canWrite is true, the eventfd will be reset; otherwise, there's no point in resetting it)
589  			} else if (event->data.ptr == &_timerFD) {
590  				std::unique_lock lock(_timerLock);
591  				uint64_t expirations = 0;
592  
593  				if (read(_timerFD, &expirations, sizeof(expirations)) < 0) {
594  					if (errno == EAGAIN) {
595  						// spurious event?
596  						continue;
597  					}
598  
599  					throw std::system_error(errno, std::generic_category(), "Failed to read from timerfd");
600  				}
601  
602  				if (expirations < 1) {
603  					// spurious expiration?
604  					continue;
605  				}
606  
607  				// we're done handling the timerfd;
608  				// we don't need to lock anymore (and the following call might need to arm the timer again)
609  				lock.unlock();
610  
611  				// dtape_timer_fired() calls duct-taped functions that may need to wait (briefly), so it needs to be called in a microthread
612  				Thread::kernelAsync(dtape_timer_fired);
613  			} else {
614  				Monitor* monitor = static_cast<Monitor*>(event->data.ptr);
615  				std::shared_ptr<Monitor> aliveMonitor = nullptr;
616  
617  				// check whether the monitor is still valid
618  				_monitorsLock.lock();
619  				for (const auto& mon: _monitors) {
620  					if (mon.get() == monitor) {
621  						aliveMonitor = mon;
622  						break;
623  					}
624  				}
625  				_monitorsLock.unlock();
626  
627  				// if the monitor died/was removed, ignore the event
628  				if (!aliveMonitor) {
629  					continue;
630  				}
631  
632  				aliveMonitor->_callback(aliveMonitor, static_cast<Monitor::Event>(event->events & (EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDHUP)));
633  			}
634  		}
635  
636  		// as our final job on this wakeup, clear the list of monitors waiting to be removed.
637  		// this will destroy those references, possibly causing the monitors to be deallocated.
638  		//
639  		// it's necessary to do this instead of just removing them in removeMonitor in order to
640  		// avoid a potential race between an existing monitor being removed in removeMonitor,
641  		// another being subsequently created for the same address and added to the server,
642  		// and an event being received for the original monitor.
643  		//
644  		// since we keep a reference to the shared_ptrs until the end of this event loop iteration,
645  		// there's no chance that a new monitor will be created with the same address as a monitor
646  		// for which an event was returned in this event loop iteration.
647  		_monitorsLock.lock();
648  		_monitorsWaitingToDie.clear();
649  		_monitorsLock.unlock();
650  	}
651  
652  	// shouldn't ever be reached (exiting the main loop would be an error), but just in case
653  	dtape_deinit();
654  };
655  
656  void DarlingServer::Server::monitorProcess(std::shared_ptr<Process> process) {
657  	// the this-capture here is safe because the Server will always out-live everything else
658  	std::weak_ptr<Process> weakProcess = process;
659  	auto monitor = std::make_shared<Monitor>(process->_pidfd, Monitor::Event::Readable | Monitor::Event::HangUp, false, false, [this, weakProcess](std::shared_ptr<Monitor> thisMonitor, Monitor::Event events) {
660  		removeMonitor(thisMonitor);
661  
662  		auto process = weakProcess.lock();
663  
664  		if (!process) {
665  			// the process already died...
666  			return;
667  		}
668  
669  		process->notifyDead();
670  	});
671  
672  	addMonitor(monitor);
673  };
674  
675  DarlingServer::Server& DarlingServer::Server::sharedInstance() {
676  	return *sharedInstancePointer;
677  };
678  
679  std::string DarlingServer::Server::prefix() const {
680  	return _prefix;
681  };
682  
683  void DarlingServer::Server::_worker(std::shared_ptr<Thread> thread) {
684  	thread->doWork();
685  };
686  
687  void DarlingServer::Server::scheduleThread(std::shared_ptr<Thread> thread) {
688  	_workQueue.push(thread);
689  };
690  
691  void DarlingServer::Server::addMonitor(std::shared_ptr<Monitor> monitor) {
692  	bool valid = true;
693  
694  	_monitorsLock.lock();
695  	for (size_t i = 0; i < _monitors.size(); ++i) {
696  		if (_monitors[i].get() == monitor.get()) {
697  			valid = false;
698  			break;
699  		}
700  	}
701  
702  	if (!valid) {
703  		_monitorsLock.unlock();
704  		return;
705  	}
706  
707  	monitor->_lock.lock();
708  	struct epoll_event settings;
709  	settings.data.ptr = monitor.get();
710  	settings.events = monitor->_events;
711  
712  	if (epoll_ctl(_epollFD, EPOLL_CTL_ADD, monitor->_fd->fd(), &settings) < 0) {
713  		monitor->_lock.unlock();
714  		_monitorsLock.unlock();
715  		throw std::system_error(errno, std::generic_category(), "Failed to add descriptor to epoll context");
716  	}
717  
718  	monitor->_server = this;
719  
720  	monitor->_lock.unlock();
721  
722  	_monitors.push_back(monitor);
723  
724  	_monitorsLock.unlock();
725  };
726  
727  void DarlingServer::Server::removeMonitor(std::shared_ptr<Monitor> monitor) {
728  	bool valid = false;
729  
730  	_monitorsLock.lock();
731  	for (size_t i = 0; i < _monitors.size(); ++i) {
732  		if (_monitors[i].get() == monitor.get()) {
733  			valid = true;
734  			_monitorsWaitingToDie.push_back(monitor);
735  			_monitors.erase(_monitors.begin() + i);
736  			break;
737  		}
738  	}
739  
740  	if (!valid) {
741  		_monitorsLock.unlock();
742  		return;
743  	}
744  
745  	if (epoll_ctl(_epollFD, EPOLL_CTL_DEL, monitor->_fd->fd(), NULL) < 0) {
746  		throw std::system_error(errno, std::generic_category(), "Failed to remove descriptor from epoll context");
747  	}
748  
749  	monitor->_server = nullptr;
750  
751  	_monitorsLock.unlock();
752  
753  	// force an event loop wakeup (so the removal can be finalized as soon as possible)
754  	eventfd_write(_wakeupFD, 1);
755  };
756  
757  DarlingServer::Monitor::Monitor(std::shared_ptr<FD> descriptor, Event events, bool edgeTriggered, bool oneshot, std::function<void(std::shared_ptr<Monitor>, Event)> callback):
758  	_fd(descriptor),
759  	_userEvents(events),
760  	_events((uint32_t)events | (oneshot ? EPOLLONESHOT : 0) | (edgeTriggered ? EPOLLET : 0)),
761  	_callback(callback),
762  	_server(nullptr)
763  	{};
764  
765  void DarlingServer::Monitor::enable(bool edgeTriggered, bool oneshot) {
766  	std::unique_lock lock(_lock);
767  
768  	if (!_server) {
769  		return;
770  	}
771  
772  	_events = (uint32_t)_userEvents;
773  
774  	if (edgeTriggered) {
775  		_events |= EPOLLET;
776  	} else {
777  		_events &= ~EPOLLET;
778  	}
779  
780  	if (oneshot) {
781  		_events |= EPOLLONESHOT;
782  	} else {
783  		_events &= ~EPOLLONESHOT;
784  	}
785  
786  	struct epoll_event settings;
787  	settings.data.ptr = this;
788  	settings.events = _events;
789  
790  	if (epoll_ctl(_server->_epollFD, EPOLL_CTL_MOD, _fd->fd(), &settings) < 0) {
791  		throw std::system_error(errno, std::generic_category(), "Failed to modify descriptor in epoll context");
792  	}
793  };
794  
795  void DarlingServer::Monitor::disable() {
796  	std::unique_lock lock(_lock);
797  
798  	if (!_server) {
799  		return;
800  	}
801  
802  	_events = 0;
803  
804  	struct epoll_event settings;
805  	settings.data.ptr = this;
806  	settings.events = _events;
807  
808  	if (epoll_ctl(_server->_epollFD, EPOLL_CTL_MOD, _fd->fd(), &settings) < 0) {
809  		throw std::system_error(errno, std::generic_category(), "Failed to modify descriptor in epoll context");
810  	}
811  };
812  
813  std::shared_ptr<DarlingServer::FD> DarlingServer::Monitor::fd() const {
814  	return _fd;
815  };
816  
817  void DarlingServer::Server::sendMessage(Message&& message) {
818  	_outbox.push(std::move(message));
819  };