env_posix.cc
1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 5 #include <dirent.h> 6 #include <fcntl.h> 7 #include <pthread.h> 8 #include <sys/mman.h> 9 #include <sys/resource.h> 10 #include <sys/stat.h> 11 #include <sys/time.h> 12 #include <sys/types.h> 13 #include <unistd.h> 14 15 #include <atomic> 16 #include <cerrno> 17 #include <cstddef> 18 #include <cstdint> 19 #include <cstdio> 20 #include <cstdlib> 21 #include <cstring> 22 #include <limits> 23 #include <queue> 24 #include <set> 25 #include <string> 26 #include <thread> 27 #include <type_traits> 28 #include <utility> 29 30 #include "leveldb/env.h" 31 #include "leveldb/slice.h" 32 #include "leveldb/status.h" 33 #include "port/port.h" 34 #include "port/thread_annotations.h" 35 #include "util/env_posix_test_helper.h" 36 #include "util/posix_logger.h" 37 38 namespace leveldb { 39 40 namespace { 41 42 // Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles(). 43 int g_open_read_only_file_limit = -1; 44 45 // Up to 4096 mmap regions for 64-bit binaries; none for 32-bit. 46 constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 4096 : 0; 47 48 // Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit(). 49 int g_mmap_limit = kDefaultMmapLimit; 50 51 // Common flags defined for all posix open operations 52 #if HAVE_O_CLOEXEC 53 constexpr const int kOpenBaseFlags = O_CLOEXEC; 54 #else 55 constexpr const int kOpenBaseFlags = 0; 56 #endif // defined(HAVE_O_CLOEXEC) 57 58 constexpr const size_t kWritableFileBufferSize = 65536; 59 60 Status PosixError(const std::string& context, int error_number) { 61 if (error_number == ENOENT) { 62 return Status::NotFound(context, std::strerror(error_number)); 63 } else { 64 return Status::IOError(context, std::strerror(error_number)); 65 } 66 } 67 68 // Helper class to limit resource usage to avoid exhaustion. 69 // Currently used to limit read-only file descriptors and mmap file usage 70 // so that we do not run out of file descriptors or virtual memory, or run into 71 // kernel performance problems for very large databases. 72 class Limiter { 73 public: 74 // Limit maximum number of resources to |max_acquires|. 75 Limiter(int max_acquires) : acquires_allowed_(max_acquires) {} 76 77 Limiter(const Limiter&) = delete; 78 Limiter operator=(const Limiter&) = delete; 79 80 // If another resource is available, acquire it and return true. 81 // Else return false. 82 bool Acquire() { 83 int old_acquires_allowed = 84 acquires_allowed_.fetch_sub(1, std::memory_order_relaxed); 85 86 if (old_acquires_allowed > 0) return true; 87 88 acquires_allowed_.fetch_add(1, std::memory_order_relaxed); 89 return false; 90 } 91 92 // Release a resource acquired by a previous call to Acquire() that returned 93 // true. 94 void Release() { acquires_allowed_.fetch_add(1, std::memory_order_relaxed); } 95 96 private: 97 // The number of available resources. 98 // 99 // This is a counter and is not tied to the invariants of any other class, so 100 // it can be operated on safely using std::memory_order_relaxed. 101 std::atomic<int> acquires_allowed_; 102 }; 103 104 // Implements sequential read access in a file using read(). 105 // 106 // Instances of this class are thread-friendly but not thread-safe, as required 107 // by the SequentialFile API. 108 class PosixSequentialFile final : public SequentialFile { 109 public: 110 PosixSequentialFile(std::string filename, int fd) 111 : fd_(fd), filename_(filename) {} 112 ~PosixSequentialFile() override { close(fd_); } 113 114 Status Read(size_t n, Slice* result, char* scratch) override { 115 Status status; 116 while (true) { 117 ::ssize_t read_size = ::read(fd_, scratch, n); 118 if (read_size < 0) { // Read error. 119 if (errno == EINTR) { 120 continue; // Retry 121 } 122 status = PosixError(filename_, errno); 123 break; 124 } 125 *result = Slice(scratch, read_size); 126 break; 127 } 128 return status; 129 } 130 131 Status Skip(uint64_t n) override { 132 if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) { 133 return PosixError(filename_, errno); 134 } 135 return Status::OK(); 136 } 137 138 virtual std::string GetName() const override { return filename_; } 139 140 private: 141 const int fd_; 142 const std::string filename_; 143 }; 144 145 // Implements random read access in a file using pread(). 146 // 147 // Instances of this class are thread-safe, as required by the RandomAccessFile 148 // API. Instances are immutable and Read() only calls thread-safe library 149 // functions. 150 class PosixRandomAccessFile final : public RandomAccessFile { 151 public: 152 // The new instance takes ownership of |fd|. |fd_limiter| must outlive this 153 // instance, and will be used to determine if . 154 PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter) 155 : has_permanent_fd_(fd_limiter->Acquire()), 156 fd_(has_permanent_fd_ ? fd : -1), 157 fd_limiter_(fd_limiter), 158 filename_(std::move(filename)) { 159 if (!has_permanent_fd_) { 160 assert(fd_ == -1); 161 ::close(fd); // The file will be opened on every read. 162 } 163 } 164 165 ~PosixRandomAccessFile() override { 166 if (has_permanent_fd_) { 167 assert(fd_ != -1); 168 ::close(fd_); 169 fd_limiter_->Release(); 170 } 171 } 172 173 Status Read(uint64_t offset, size_t n, Slice* result, 174 char* scratch) const override { 175 int fd = fd_; 176 if (!has_permanent_fd_) { 177 fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags); 178 if (fd < 0) { 179 return PosixError(filename_, errno); 180 } 181 } 182 183 assert(fd != -1); 184 185 Status status; 186 ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset)); 187 *result = Slice(scratch, (read_size < 0) ? 0 : read_size); 188 if (read_size < 0) { 189 // An error: return a non-ok status. 190 status = PosixError(filename_, errno); 191 } 192 if (!has_permanent_fd_) { 193 // Close the temporary file descriptor opened earlier. 194 assert(fd != fd_); 195 ::close(fd); 196 } 197 return status; 198 } 199 200 virtual std::string GetName() const override { return filename_; } 201 202 private: 203 const bool has_permanent_fd_; // If false, the file is opened on every read. 204 const int fd_; // -1 if has_permanent_fd_ is false. 205 Limiter* const fd_limiter_; 206 const std::string filename_; 207 }; 208 209 // Implements random read access in a file using mmap(). 210 // 211 // Instances of this class are thread-safe, as required by the RandomAccessFile 212 // API. Instances are immutable and Read() only calls thread-safe library 213 // functions. 214 class PosixMmapReadableFile final : public RandomAccessFile { 215 public: 216 // mmap_base[0, length-1] points to the memory-mapped contents of the file. It 217 // must be the result of a successful call to mmap(). This instances takes 218 // over the ownership of the region. 219 // 220 // |mmap_limiter| must outlive this instance. The caller must have already 221 // acquired the right to use one mmap region, which will be released when this 222 // instance is destroyed. 223 PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length, 224 Limiter* mmap_limiter) 225 : mmap_base_(mmap_base), 226 length_(length), 227 mmap_limiter_(mmap_limiter), 228 filename_(std::move(filename)) {} 229 230 ~PosixMmapReadableFile() override { 231 ::munmap(static_cast<void*>(mmap_base_), length_); 232 mmap_limiter_->Release(); 233 } 234 235 Status Read(uint64_t offset, size_t n, Slice* result, 236 char* scratch) const override { 237 if (offset + n > length_) { 238 *result = Slice(); 239 return PosixError(filename_, EINVAL); 240 } 241 242 *result = Slice(mmap_base_ + offset, n); 243 return Status::OK(); 244 } 245 246 virtual std::string GetName() const override { return filename_; } 247 248 private: 249 char* const mmap_base_; 250 const size_t length_; 251 Limiter* const mmap_limiter_; 252 const std::string filename_; 253 }; 254 255 class PosixWritableFile final : public WritableFile { 256 public: 257 PosixWritableFile(std::string filename, int fd) 258 : pos_(0), 259 fd_(fd), 260 is_manifest_(IsManifest(filename)), 261 filename_(std::move(filename)), 262 dirname_(Dirname(filename_)) {} 263 264 ~PosixWritableFile() override { 265 if (fd_ >= 0) { 266 // Ignoring any potential errors 267 Close(); 268 } 269 } 270 271 Status Append(const Slice& data) override { 272 size_t write_size = data.size(); 273 const char* write_data = data.data(); 274 275 // Fit as much as possible into buffer. 276 size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_); 277 std::memcpy(buf_ + pos_, write_data, copy_size); 278 write_data += copy_size; 279 write_size -= copy_size; 280 pos_ += copy_size; 281 if (write_size == 0) { 282 return Status::OK(); 283 } 284 285 // Can't fit in buffer, so need to do at least one write. 286 Status status = FlushBuffer(); 287 if (!status.ok()) { 288 return status; 289 } 290 291 // Small writes go to buffer, large writes are written directly. 292 if (write_size < kWritableFileBufferSize) { 293 std::memcpy(buf_, write_data, write_size); 294 pos_ = write_size; 295 return Status::OK(); 296 } 297 return WriteUnbuffered(write_data, write_size); 298 } 299 300 Status Close() override { 301 Status status = FlushBuffer(); 302 const int close_result = ::close(fd_); 303 if (close_result < 0 && status.ok()) { 304 status = PosixError(filename_, errno); 305 } 306 fd_ = -1; 307 return status; 308 } 309 310 Status Flush() override { return FlushBuffer(); } 311 312 Status Sync() override { 313 // Ensure new files referred to by the manifest are in the filesystem. 314 // 315 // This needs to happen before the manifest file is flushed to disk, to 316 // avoid crashing in a state where the manifest refers to files that are not 317 // yet on disk. 318 Status status = SyncDirIfManifest(); 319 if (!status.ok()) { 320 return status; 321 } 322 323 status = FlushBuffer(); 324 if (!status.ok()) { 325 return status; 326 } 327 328 return SyncFd(fd_, filename_, false); 329 } 330 331 private: 332 Status FlushBuffer() { 333 Status status = WriteUnbuffered(buf_, pos_); 334 pos_ = 0; 335 return status; 336 } 337 338 Status WriteUnbuffered(const char* data, size_t size) { 339 while (size > 0) { 340 ssize_t write_result = ::write(fd_, data, size); 341 if (write_result < 0) { 342 if (errno == EINTR) { 343 continue; // Retry 344 } 345 return PosixError(filename_, errno); 346 } 347 data += write_result; 348 size -= write_result; 349 } 350 return Status::OK(); 351 } 352 353 Status SyncDirIfManifest() { 354 Status status; 355 if (!is_manifest_) { 356 return status; 357 } 358 359 int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags); 360 if (fd < 0) { 361 status = PosixError(dirname_, errno); 362 } else { 363 status = SyncFd(fd, dirname_, true); 364 ::close(fd); 365 } 366 return status; 367 } 368 369 // Ensures that all the caches associated with the given file descriptor's 370 // data are flushed all the way to durable media, and can withstand power 371 // failures. 372 // 373 // The path argument is only used to populate the description string in the 374 // returned Status if an error occurs. 375 static Status SyncFd(int fd, const std::string& fd_path, bool syncing_dir) { 376 #if HAVE_FULLFSYNC 377 // On macOS and iOS, fsync() doesn't guarantee durability past power 378 // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some 379 // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to 380 // fsync(). 381 if (::fcntl(fd, F_FULLFSYNC) == 0) { 382 return Status::OK(); 383 } 384 #endif // HAVE_FULLFSYNC 385 386 #if HAVE_FDATASYNC 387 bool sync_success = ::fdatasync(fd) == 0; 388 #else 389 bool sync_success = ::fsync(fd) == 0; 390 #endif // HAVE_FDATASYNC 391 392 if (sync_success) { 393 return Status::OK(); 394 } 395 // Do not crash if filesystem can't fsync directories 396 // (see https://github.com/bitcoin/bitcoin/pull/10000) 397 if (syncing_dir && errno == EINVAL) { 398 return Status::OK(); 399 } 400 return PosixError(fd_path, errno); 401 } 402 403 // Returns the directory name in a path pointing to a file. 404 // 405 // Returns "." if the path does not contain any directory separator. 406 static std::string Dirname(const std::string& filename) { 407 std::string::size_type separator_pos = filename.rfind('/'); 408 if (separator_pos == std::string::npos) { 409 return std::string("."); 410 } 411 // The filename component should not contain a path separator. If it does, 412 // the splitting was done incorrectly. 413 assert(filename.find('/', separator_pos + 1) == std::string::npos); 414 415 return filename.substr(0, separator_pos); 416 } 417 418 // Extracts the file name from a path pointing to a file. 419 // 420 // The returned Slice points to |filename|'s data buffer, so it is only valid 421 // while |filename| is alive and unchanged. 422 static Slice Basename(const std::string& filename) { 423 std::string::size_type separator_pos = filename.rfind('/'); 424 if (separator_pos == std::string::npos) { 425 return Slice(filename); 426 } 427 // The filename component should not contain a path separator. If it does, 428 // the splitting was done incorrectly. 429 assert(filename.find('/', separator_pos + 1) == std::string::npos); 430 431 return Slice(filename.data() + separator_pos + 1, 432 filename.length() - separator_pos - 1); 433 } 434 435 // True if the given file is a manifest file. 436 static bool IsManifest(const std::string& filename) { 437 return Basename(filename).starts_with("MANIFEST"); 438 } 439 440 virtual std::string GetName() const override { return filename_; } 441 442 // buf_[0, pos_ - 1] contains data to be written to fd_. 443 char buf_[kWritableFileBufferSize]; 444 size_t pos_; 445 int fd_; 446 447 const bool is_manifest_; // True if the file's name starts with MANIFEST. 448 const std::string filename_; 449 const std::string dirname_; // The directory of filename_. 450 }; 451 452 int LockOrUnlock(int fd, bool lock) { 453 errno = 0; 454 struct ::flock file_lock_info; 455 std::memset(&file_lock_info, 0, sizeof(file_lock_info)); 456 file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK); 457 file_lock_info.l_whence = SEEK_SET; 458 file_lock_info.l_start = 0; 459 file_lock_info.l_len = 0; // Lock/unlock entire file. 460 return ::fcntl(fd, F_SETLK, &file_lock_info); 461 } 462 463 // Instances are thread-safe because they are immutable. 464 class PosixFileLock : public FileLock { 465 public: 466 PosixFileLock(int fd, std::string filename) 467 : fd_(fd), filename_(std::move(filename)) {} 468 469 int fd() const { return fd_; } 470 const std::string& filename() const { return filename_; } 471 472 private: 473 const int fd_; 474 const std::string filename_; 475 }; 476 477 // Tracks the files locked by PosixEnv::LockFile(). 478 // 479 // We maintain a separate set instead of relying on fcntl(F_SETLK) because 480 // fcntl(F_SETLK) does not provide any protection against multiple uses from the 481 // same process. 482 // 483 // Instances are thread-safe because all member data is guarded by a mutex. 484 class PosixLockTable { 485 public: 486 bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) { 487 mu_.Lock(); 488 bool succeeded = locked_files_.insert(fname).second; 489 mu_.Unlock(); 490 return succeeded; 491 } 492 void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) { 493 mu_.Lock(); 494 locked_files_.erase(fname); 495 mu_.Unlock(); 496 } 497 498 private: 499 port::Mutex mu_; 500 std::set<std::string> locked_files_ GUARDED_BY(mu_); 501 }; 502 503 class PosixEnv : public Env { 504 public: 505 PosixEnv(); 506 ~PosixEnv() override { 507 static const char msg[] = 508 "PosixEnv singleton destroyed. Unsupported behavior!\n"; 509 std::fwrite(msg, 1, sizeof(msg), stderr); 510 std::abort(); 511 } 512 513 Status NewSequentialFile(const std::string& filename, 514 SequentialFile** result) override { 515 int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags); 516 if (fd < 0) { 517 *result = nullptr; 518 return PosixError(filename, errno); 519 } 520 521 *result = new PosixSequentialFile(filename, fd); 522 return Status::OK(); 523 } 524 525 Status NewRandomAccessFile(const std::string& filename, 526 RandomAccessFile** result) override { 527 *result = nullptr; 528 int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags); 529 if (fd < 0) { 530 return PosixError(filename, errno); 531 } 532 533 if (!mmap_limiter_.Acquire()) { 534 *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_); 535 return Status::OK(); 536 } 537 538 uint64_t file_size; 539 Status status = GetFileSize(filename, &file_size); 540 if (status.ok()) { 541 void* mmap_base = 542 ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0); 543 if (mmap_base != MAP_FAILED) { 544 *result = new PosixMmapReadableFile(filename, 545 reinterpret_cast<char*>(mmap_base), 546 file_size, &mmap_limiter_); 547 } else { 548 status = PosixError(filename, errno); 549 } 550 } 551 ::close(fd); 552 if (!status.ok()) { 553 mmap_limiter_.Release(); 554 } 555 return status; 556 } 557 558 Status NewWritableFile(const std::string& filename, 559 WritableFile** result) override { 560 int fd = ::open(filename.c_str(), 561 O_TRUNC | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); 562 if (fd < 0) { 563 *result = nullptr; 564 return PosixError(filename, errno); 565 } 566 567 *result = new PosixWritableFile(filename, fd); 568 return Status::OK(); 569 } 570 571 Status NewAppendableFile(const std::string& filename, 572 WritableFile** result) override { 573 int fd = ::open(filename.c_str(), 574 O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); 575 if (fd < 0) { 576 *result = nullptr; 577 return PosixError(filename, errno); 578 } 579 580 *result = new PosixWritableFile(filename, fd); 581 return Status::OK(); 582 } 583 584 bool FileExists(const std::string& filename) override { 585 return ::access(filename.c_str(), F_OK) == 0; 586 } 587 588 Status GetChildren(const std::string& directory_path, 589 std::vector<std::string>* result) override { 590 result->clear(); 591 ::DIR* dir = ::opendir(directory_path.c_str()); 592 if (dir == nullptr) { 593 return PosixError(directory_path, errno); 594 } 595 struct ::dirent* entry; 596 while ((entry = ::readdir(dir)) != nullptr) { 597 result->emplace_back(entry->d_name); 598 } 599 ::closedir(dir); 600 return Status::OK(); 601 } 602 603 Status DeleteFile(const std::string& filename) override { 604 if (::unlink(filename.c_str()) != 0) { 605 return PosixError(filename, errno); 606 } 607 return Status::OK(); 608 } 609 610 Status CreateDir(const std::string& dirname) override { 611 if (::mkdir(dirname.c_str(), 0755) != 0) { 612 return PosixError(dirname, errno); 613 } 614 return Status::OK(); 615 } 616 617 Status DeleteDir(const std::string& dirname) override { 618 if (::rmdir(dirname.c_str()) != 0) { 619 return PosixError(dirname, errno); 620 } 621 return Status::OK(); 622 } 623 624 Status GetFileSize(const std::string& filename, uint64_t* size) override { 625 struct ::stat file_stat; 626 if (::stat(filename.c_str(), &file_stat) != 0) { 627 *size = 0; 628 return PosixError(filename, errno); 629 } 630 *size = file_stat.st_size; 631 return Status::OK(); 632 } 633 634 Status RenameFile(const std::string& from, const std::string& to) override { 635 if (std::rename(from.c_str(), to.c_str()) != 0) { 636 return PosixError(from, errno); 637 } 638 return Status::OK(); 639 } 640 641 Status LockFile(const std::string& filename, FileLock** lock) override { 642 *lock = nullptr; 643 644 int fd = ::open(filename.c_str(), O_RDWR | O_CREAT | kOpenBaseFlags, 0644); 645 if (fd < 0) { 646 return PosixError(filename, errno); 647 } 648 649 if (!locks_.Insert(filename)) { 650 ::close(fd); 651 return Status::IOError("lock " + filename, "already held by process"); 652 } 653 654 if (LockOrUnlock(fd, true) == -1) { 655 int lock_errno = errno; 656 ::close(fd); 657 locks_.Remove(filename); 658 return PosixError("lock " + filename, lock_errno); 659 } 660 661 *lock = new PosixFileLock(fd, filename); 662 return Status::OK(); 663 } 664 665 Status UnlockFile(FileLock* lock) override { 666 PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock); 667 if (LockOrUnlock(posix_file_lock->fd(), false) == -1) { 668 return PosixError("unlock " + posix_file_lock->filename(), errno); 669 } 670 locks_.Remove(posix_file_lock->filename()); 671 ::close(posix_file_lock->fd()); 672 delete posix_file_lock; 673 return Status::OK(); 674 } 675 676 void Schedule(void (*background_work_function)(void* background_work_arg), 677 void* background_work_arg) override; 678 679 void StartThread(void (*thread_main)(void* thread_main_arg), 680 void* thread_main_arg) override { 681 std::thread new_thread(thread_main, thread_main_arg); 682 new_thread.detach(); 683 } 684 685 Status GetTestDirectory(std::string* result) override { 686 const char* env = std::getenv("TEST_TMPDIR"); 687 if (env && env[0] != '\0') { 688 *result = env; 689 } else { 690 char buf[100]; 691 std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d", 692 static_cast<int>(::geteuid())); 693 *result = buf; 694 } 695 696 // The CreateDir status is ignored because the directory may already exist. 697 CreateDir(*result); 698 699 return Status::OK(); 700 } 701 702 Status NewLogger(const std::string& filename, Logger** result) override { 703 int fd = ::open(filename.c_str(), 704 O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); 705 if (fd < 0) { 706 *result = nullptr; 707 return PosixError(filename, errno); 708 } 709 710 std::FILE* fp = ::fdopen(fd, "w"); 711 if (fp == nullptr) { 712 ::close(fd); 713 *result = nullptr; 714 return PosixError(filename, errno); 715 } else { 716 *result = new PosixLogger(fp); 717 return Status::OK(); 718 } 719 } 720 721 uint64_t NowMicros() override { 722 static constexpr uint64_t kUsecondsPerSecond = 1000000; 723 struct ::timeval tv; 724 ::gettimeofday(&tv, nullptr); 725 return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec; 726 } 727 728 void SleepForMicroseconds(int micros) override { 729 std::this_thread::sleep_for(std::chrono::microseconds(micros)); 730 } 731 732 private: 733 void BackgroundThreadMain(); 734 735 static void BackgroundThreadEntryPoint(PosixEnv* env) { 736 env->BackgroundThreadMain(); 737 } 738 739 // Stores the work item data in a Schedule() call. 740 // 741 // Instances are constructed on the thread calling Schedule() and used on the 742 // background thread. 743 // 744 // This structure is thread-safe because it is immutable. 745 struct BackgroundWorkItem { 746 explicit BackgroundWorkItem(void (*function)(void* arg), void* arg) 747 : function(function), arg(arg) {} 748 749 void (*const function)(void*); 750 void* const arg; 751 }; 752 753 port::Mutex background_work_mutex_; 754 port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_); 755 bool started_background_thread_ GUARDED_BY(background_work_mutex_); 756 757 std::queue<BackgroundWorkItem> background_work_queue_ 758 GUARDED_BY(background_work_mutex_); 759 760 PosixLockTable locks_; // Thread-safe. 761 Limiter mmap_limiter_; // Thread-safe. 762 Limiter fd_limiter_; // Thread-safe. 763 }; 764 765 // Return the maximum number of concurrent mmaps. 766 int MaxMmaps() { return g_mmap_limit; } 767 768 // Return the maximum number of read-only files to keep open. 769 int MaxOpenFiles() { 770 if (g_open_read_only_file_limit >= 0) { 771 return g_open_read_only_file_limit; 772 } 773 struct ::rlimit rlim; 774 if (::getrlimit(RLIMIT_NOFILE, &rlim)) { 775 // getrlimit failed, fallback to hard-coded default. 776 g_open_read_only_file_limit = 50; 777 } else if (rlim.rlim_cur == RLIM_INFINITY) { 778 g_open_read_only_file_limit = std::numeric_limits<int>::max(); 779 } else { 780 // Allow use of 20% of available file descriptors for read-only files. 781 g_open_read_only_file_limit = rlim.rlim_cur / 5; 782 } 783 return g_open_read_only_file_limit; 784 } 785 786 } // namespace 787 788 PosixEnv::PosixEnv() 789 : background_work_cv_(&background_work_mutex_), 790 started_background_thread_(false), 791 mmap_limiter_(MaxMmaps()), 792 fd_limiter_(MaxOpenFiles()) {} 793 794 void PosixEnv::Schedule( 795 void (*background_work_function)(void* background_work_arg), 796 void* background_work_arg) { 797 background_work_mutex_.Lock(); 798 799 // Start the background thread, if we haven't done so already. 800 if (!started_background_thread_) { 801 started_background_thread_ = true; 802 std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this); 803 background_thread.detach(); 804 } 805 806 // If the queue is empty, the background thread may be waiting for work. 807 if (background_work_queue_.empty()) { 808 background_work_cv_.Signal(); 809 } 810 811 background_work_queue_.emplace(background_work_function, background_work_arg); 812 background_work_mutex_.Unlock(); 813 } 814 815 void PosixEnv::BackgroundThreadMain() { 816 while (true) { 817 background_work_mutex_.Lock(); 818 819 // Wait until there is work to be done. 820 while (background_work_queue_.empty()) { 821 background_work_cv_.Wait(); 822 } 823 824 assert(!background_work_queue_.empty()); 825 auto background_work_function = background_work_queue_.front().function; 826 void* background_work_arg = background_work_queue_.front().arg; 827 background_work_queue_.pop(); 828 829 background_work_mutex_.Unlock(); 830 background_work_function(background_work_arg); 831 } 832 } 833 834 namespace { 835 836 // Wraps an Env instance whose destructor is never created. 837 // 838 // Intended usage: 839 // using PlatformSingletonEnv = SingletonEnv<PlatformEnv>; 840 // void ConfigurePosixEnv(int param) { 841 // PlatformSingletonEnv::AssertEnvNotInitialized(); 842 // // set global configuration flags. 843 // } 844 // Env* Env::Default() { 845 // static PlatformSingletonEnv default_env; 846 // return default_env.env(); 847 // } 848 template <typename EnvType> 849 class SingletonEnv { 850 public: 851 SingletonEnv() { 852 #if !defined(NDEBUG) 853 env_initialized_.store(true, std::memory_order_relaxed); 854 #endif // !defined(NDEBUG) 855 static_assert(sizeof(env_storage_) >= sizeof(EnvType), 856 "env_storage_ will not fit the Env"); 857 static_assert(std::is_standard_layout_v<SingletonEnv<EnvType>>); 858 static_assert( 859 offsetof(SingletonEnv<EnvType>, env_storage_) % alignof(EnvType) == 0, 860 "env_storage_ does not meet the Env's alignment needs"); 861 static_assert(alignof(SingletonEnv<EnvType>) % alignof(EnvType) == 0, 862 "env_storage_ does not meet the Env's alignment needs"); 863 new (env_storage_) EnvType(); 864 } 865 ~SingletonEnv() = default; 866 867 SingletonEnv(const SingletonEnv&) = delete; 868 SingletonEnv& operator=(const SingletonEnv&) = delete; 869 870 Env* env() { return reinterpret_cast<Env*>(&env_storage_); } 871 872 static void AssertEnvNotInitialized() { 873 #if !defined(NDEBUG) 874 assert(!env_initialized_.load(std::memory_order_relaxed)); 875 #endif // !defined(NDEBUG) 876 } 877 878 private: 879 alignas(EnvType) char env_storage_[sizeof(EnvType)]; 880 #if !defined(NDEBUG) 881 static std::atomic<bool> env_initialized_; 882 #endif // !defined(NDEBUG) 883 }; 884 885 #if !defined(NDEBUG) 886 template <typename EnvType> 887 std::atomic<bool> SingletonEnv<EnvType>::env_initialized_; 888 #endif // !defined(NDEBUG) 889 890 using PosixDefaultEnv = SingletonEnv<PosixEnv>; 891 892 } // namespace 893 894 void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) { 895 PosixDefaultEnv::AssertEnvNotInitialized(); 896 g_open_read_only_file_limit = limit; 897 } 898 899 void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) { 900 PosixDefaultEnv::AssertEnvNotInitialized(); 901 g_mmap_limit = limit; 902 } 903 904 Env* Env::Default() { 905 static PosixDefaultEnv env_container; 906 return env_container.env(); 907 } 908 909 } // namespace leveldb