/ src / leveldb / util / env_posix.cc
env_posix.cc
  1  // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2  // Use of this source code is governed by a BSD-style license that can be
  3  // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4  
  5  #include <dirent.h>
  6  #include <fcntl.h>
  7  #include <pthread.h>
  8  #include <sys/mman.h>
  9  #include <sys/resource.h>
 10  #include <sys/stat.h>
 11  #include <sys/time.h>
 12  #include <sys/types.h>
 13  #include <unistd.h>
 14  
 15  #include <atomic>
 16  #include <cerrno>
 17  #include <cstddef>
 18  #include <cstdint>
 19  #include <cstdio>
 20  #include <cstdlib>
 21  #include <cstring>
 22  #include <limits>
 23  #include <queue>
 24  #include <set>
 25  #include <string>
 26  #include <thread>
 27  #include <type_traits>
 28  #include <utility>
 29  
 30  #include "leveldb/env.h"
 31  #include "leveldb/slice.h"
 32  #include "leveldb/status.h"
 33  #include "port/port.h"
 34  #include "port/thread_annotations.h"
 35  #include "util/env_posix_test_helper.h"
 36  #include "util/posix_logger.h"
 37  
 38  namespace leveldb {
 39  
 40  namespace {
 41  
 42  // Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles().
 43  int g_open_read_only_file_limit = -1;
 44  
 45  // Up to 4096 mmap regions for 64-bit binaries; none for 32-bit.
 46  constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 4096 : 0;
 47  
 48  // Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit().
 49  int g_mmap_limit = kDefaultMmapLimit;
 50  
 51  // Common flags defined for all posix open operations
 52  #if HAVE_O_CLOEXEC
 53  constexpr const int kOpenBaseFlags = O_CLOEXEC;
 54  #else
 55  constexpr const int kOpenBaseFlags = 0;
 56  #endif  // defined(HAVE_O_CLOEXEC)
 57  
 58  constexpr const size_t kWritableFileBufferSize = 65536;
 59  
 60  Status PosixError(const std::string& context, int error_number) {
 61    if (error_number == ENOENT) {
 62      return Status::NotFound(context, std::strerror(error_number));
 63    } else {
 64      return Status::IOError(context, std::strerror(error_number));
 65    }
 66  }
 67  
 68  // Helper class to limit resource usage to avoid exhaustion.
 69  // Currently used to limit read-only file descriptors and mmap file usage
 70  // so that we do not run out of file descriptors or virtual memory, or run into
 71  // kernel performance problems for very large databases.
 72  class Limiter {
 73   public:
 74    // Limit maximum number of resources to |max_acquires|.
 75    Limiter(int max_acquires) : acquires_allowed_(max_acquires) {}
 76  
 77    Limiter(const Limiter&) = delete;
 78    Limiter operator=(const Limiter&) = delete;
 79  
 80    // If another resource is available, acquire it and return true.
 81    // Else return false.
 82    bool Acquire() {
 83      int old_acquires_allowed =
 84          acquires_allowed_.fetch_sub(1, std::memory_order_relaxed);
 85  
 86      if (old_acquires_allowed > 0) return true;
 87  
 88      acquires_allowed_.fetch_add(1, std::memory_order_relaxed);
 89      return false;
 90    }
 91  
 92    // Release a resource acquired by a previous call to Acquire() that returned
 93    // true.
 94    void Release() { acquires_allowed_.fetch_add(1, std::memory_order_relaxed); }
 95  
 96   private:
 97    // The number of available resources.
 98    //
 99    // This is a counter and is not tied to the invariants of any other class, so
100    // it can be operated on safely using std::memory_order_relaxed.
101    std::atomic<int> acquires_allowed_;
102  };
103  
104  // Implements sequential read access in a file using read().
105  //
106  // Instances of this class are thread-friendly but not thread-safe, as required
107  // by the SequentialFile API.
108  class PosixSequentialFile final : public SequentialFile {
109   public:
110    PosixSequentialFile(std::string filename, int fd)
111        : fd_(fd), filename_(filename) {}
112    ~PosixSequentialFile() override { close(fd_); }
113  
114    Status Read(size_t n, Slice* result, char* scratch) override {
115      Status status;
116      while (true) {
117        ::ssize_t read_size = ::read(fd_, scratch, n);
118        if (read_size < 0) {  // Read error.
119          if (errno == EINTR) {
120            continue;  // Retry
121          }
122          status = PosixError(filename_, errno);
123          break;
124        }
125        *result = Slice(scratch, read_size);
126        break;
127      }
128      return status;
129    }
130  
131    Status Skip(uint64_t n) override {
132      if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) {
133        return PosixError(filename_, errno);
134      }
135      return Status::OK();
136    }
137  
138    virtual std::string GetName() const override { return filename_; }
139  
140   private:
141    const int fd_;
142    const std::string filename_;
143  };
144  
145  // Implements random read access in a file using pread().
146  //
147  // Instances of this class are thread-safe, as required by the RandomAccessFile
148  // API. Instances are immutable and Read() only calls thread-safe library
149  // functions.
150  class PosixRandomAccessFile final : public RandomAccessFile {
151   public:
152    // The new instance takes ownership of |fd|. |fd_limiter| must outlive this
153    // instance, and will be used to determine if .
154    PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter)
155        : has_permanent_fd_(fd_limiter->Acquire()),
156          fd_(has_permanent_fd_ ? fd : -1),
157          fd_limiter_(fd_limiter),
158          filename_(std::move(filename)) {
159      if (!has_permanent_fd_) {
160        assert(fd_ == -1);
161        ::close(fd);  // The file will be opened on every read.
162      }
163    }
164  
165    ~PosixRandomAccessFile() override {
166      if (has_permanent_fd_) {
167        assert(fd_ != -1);
168        ::close(fd_);
169        fd_limiter_->Release();
170      }
171    }
172  
173    Status Read(uint64_t offset, size_t n, Slice* result,
174                char* scratch) const override {
175      int fd = fd_;
176      if (!has_permanent_fd_) {
177        fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags);
178        if (fd < 0) {
179          return PosixError(filename_, errno);
180        }
181      }
182  
183      assert(fd != -1);
184  
185      Status status;
186      ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset));
187      *result = Slice(scratch, (read_size < 0) ? 0 : read_size);
188      if (read_size < 0) {
189        // An error: return a non-ok status.
190        status = PosixError(filename_, errno);
191      }
192      if (!has_permanent_fd_) {
193        // Close the temporary file descriptor opened earlier.
194        assert(fd != fd_);
195        ::close(fd);
196      }
197      return status;
198    }
199  
200    virtual std::string GetName() const override { return filename_; }
201  
202   private:
203    const bool has_permanent_fd_;  // If false, the file is opened on every read.
204    const int fd_;                 // -1 if has_permanent_fd_ is false.
205    Limiter* const fd_limiter_;
206    const std::string filename_;
207  };
208  
209  // Implements random read access in a file using mmap().
210  //
211  // Instances of this class are thread-safe, as required by the RandomAccessFile
212  // API. Instances are immutable and Read() only calls thread-safe library
213  // functions.
214  class PosixMmapReadableFile final : public RandomAccessFile {
215   public:
216    // mmap_base[0, length-1] points to the memory-mapped contents of the file. It
217    // must be the result of a successful call to mmap(). This instances takes
218    // over the ownership of the region.
219    //
220    // |mmap_limiter| must outlive this instance. The caller must have already
221    // acquired the right to use one mmap region, which will be released when this
222    // instance is destroyed.
223    PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length,
224                          Limiter* mmap_limiter)
225        : mmap_base_(mmap_base),
226          length_(length),
227          mmap_limiter_(mmap_limiter),
228          filename_(std::move(filename)) {}
229  
230    ~PosixMmapReadableFile() override {
231      ::munmap(static_cast<void*>(mmap_base_), length_);
232      mmap_limiter_->Release();
233    }
234  
235    Status Read(uint64_t offset, size_t n, Slice* result,
236                char* scratch) const override {
237      if (offset + n > length_) {
238        *result = Slice();
239        return PosixError(filename_, EINVAL);
240      }
241  
242      *result = Slice(mmap_base_ + offset, n);
243      return Status::OK();
244    }
245  
246    virtual std::string GetName() const override { return filename_; }
247  
248   private:
249    char* const mmap_base_;
250    const size_t length_;
251    Limiter* const mmap_limiter_;
252    const std::string filename_;
253  };
254  
255  class PosixWritableFile final : public WritableFile {
256   public:
257    PosixWritableFile(std::string filename, int fd)
258        : pos_(0),
259          fd_(fd),
260          is_manifest_(IsManifest(filename)),
261          filename_(std::move(filename)),
262          dirname_(Dirname(filename_)) {}
263  
264    ~PosixWritableFile() override {
265      if (fd_ >= 0) {
266        // Ignoring any potential errors
267        Close();
268      }
269    }
270  
271    Status Append(const Slice& data) override {
272      size_t write_size = data.size();
273      const char* write_data = data.data();
274  
275      // Fit as much as possible into buffer.
276      size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_);
277      std::memcpy(buf_ + pos_, write_data, copy_size);
278      write_data += copy_size;
279      write_size -= copy_size;
280      pos_ += copy_size;
281      if (write_size == 0) {
282        return Status::OK();
283      }
284  
285      // Can't fit in buffer, so need to do at least one write.
286      Status status = FlushBuffer();
287      if (!status.ok()) {
288        return status;
289      }
290  
291      // Small writes go to buffer, large writes are written directly.
292      if (write_size < kWritableFileBufferSize) {
293        std::memcpy(buf_, write_data, write_size);
294        pos_ = write_size;
295        return Status::OK();
296      }
297      return WriteUnbuffered(write_data, write_size);
298    }
299  
300    Status Close() override {
301      Status status = FlushBuffer();
302      const int close_result = ::close(fd_);
303      if (close_result < 0 && status.ok()) {
304        status = PosixError(filename_, errno);
305      }
306      fd_ = -1;
307      return status;
308    }
309  
310    Status Flush() override { return FlushBuffer(); }
311  
312    Status Sync() override {
313      // Ensure new files referred to by the manifest are in the filesystem.
314      //
315      // This needs to happen before the manifest file is flushed to disk, to
316      // avoid crashing in a state where the manifest refers to files that are not
317      // yet on disk.
318      Status status = SyncDirIfManifest();
319      if (!status.ok()) {
320        return status;
321      }
322  
323      status = FlushBuffer();
324      if (!status.ok()) {
325        return status;
326      }
327  
328      return SyncFd(fd_, filename_, false);
329    }
330  
331   private:
332    Status FlushBuffer() {
333      Status status = WriteUnbuffered(buf_, pos_);
334      pos_ = 0;
335      return status;
336    }
337  
338    Status WriteUnbuffered(const char* data, size_t size) {
339      while (size > 0) {
340        ssize_t write_result = ::write(fd_, data, size);
341        if (write_result < 0) {
342          if (errno == EINTR) {
343            continue;  // Retry
344          }
345          return PosixError(filename_, errno);
346        }
347        data += write_result;
348        size -= write_result;
349      }
350      return Status::OK();
351    }
352  
353    Status SyncDirIfManifest() {
354      Status status;
355      if (!is_manifest_) {
356        return status;
357      }
358  
359      int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags);
360      if (fd < 0) {
361        status = PosixError(dirname_, errno);
362      } else {
363        status = SyncFd(fd, dirname_, true);
364        ::close(fd);
365      }
366      return status;
367    }
368  
369    // Ensures that all the caches associated with the given file descriptor's
370    // data are flushed all the way to durable media, and can withstand power
371    // failures.
372    //
373    // The path argument is only used to populate the description string in the
374    // returned Status if an error occurs.
375    static Status SyncFd(int fd, const std::string& fd_path, bool syncing_dir) {
376  #if HAVE_FULLFSYNC
377      // On macOS and iOS, fsync() doesn't guarantee durability past power
378      // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some
379      // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to
380      // fsync().
381      if (::fcntl(fd, F_FULLFSYNC) == 0) {
382        return Status::OK();
383      }
384  #endif  // HAVE_FULLFSYNC
385  
386  #if HAVE_FDATASYNC
387      bool sync_success = ::fdatasync(fd) == 0;
388  #else
389      bool sync_success = ::fsync(fd) == 0;
390  #endif  // HAVE_FDATASYNC
391  
392      if (sync_success) {
393        return Status::OK();
394      }
395      // Do not crash if filesystem can't fsync directories
396      // (see https://github.com/bitcoin/bitcoin/pull/10000)
397      if (syncing_dir && errno == EINVAL) {
398        return Status::OK();
399      }
400      return PosixError(fd_path, errno);
401    }
402  
403    // Returns the directory name in a path pointing to a file.
404    //
405    // Returns "." if the path does not contain any directory separator.
406    static std::string Dirname(const std::string& filename) {
407      std::string::size_type separator_pos = filename.rfind('/');
408      if (separator_pos == std::string::npos) {
409        return std::string(".");
410      }
411      // The filename component should not contain a path separator. If it does,
412      // the splitting was done incorrectly.
413      assert(filename.find('/', separator_pos + 1) == std::string::npos);
414  
415      return filename.substr(0, separator_pos);
416    }
417  
418    // Extracts the file name from a path pointing to a file.
419    //
420    // The returned Slice points to |filename|'s data buffer, so it is only valid
421    // while |filename| is alive and unchanged.
422    static Slice Basename(const std::string& filename) {
423      std::string::size_type separator_pos = filename.rfind('/');
424      if (separator_pos == std::string::npos) {
425        return Slice(filename);
426      }
427      // The filename component should not contain a path separator. If it does,
428      // the splitting was done incorrectly.
429      assert(filename.find('/', separator_pos + 1) == std::string::npos);
430  
431      return Slice(filename.data() + separator_pos + 1,
432                   filename.length() - separator_pos - 1);
433    }
434  
435    // True if the given file is a manifest file.
436    static bool IsManifest(const std::string& filename) {
437      return Basename(filename).starts_with("MANIFEST");
438    }
439  
440    virtual std::string GetName() const override { return filename_; }
441  
442    // buf_[0, pos_ - 1] contains data to be written to fd_.
443    char buf_[kWritableFileBufferSize];
444    size_t pos_;
445    int fd_;
446  
447    const bool is_manifest_;  // True if the file's name starts with MANIFEST.
448    const std::string filename_;
449    const std::string dirname_;  // The directory of filename_.
450  };
451  
452  int LockOrUnlock(int fd, bool lock) {
453    errno = 0;
454    struct ::flock file_lock_info;
455    std::memset(&file_lock_info, 0, sizeof(file_lock_info));
456    file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK);
457    file_lock_info.l_whence = SEEK_SET;
458    file_lock_info.l_start = 0;
459    file_lock_info.l_len = 0;  // Lock/unlock entire file.
460    return ::fcntl(fd, F_SETLK, &file_lock_info);
461  }
462  
463  // Instances are thread-safe because they are immutable.
464  class PosixFileLock : public FileLock {
465   public:
466    PosixFileLock(int fd, std::string filename)
467        : fd_(fd), filename_(std::move(filename)) {}
468  
469    int fd() const { return fd_; }
470    const std::string& filename() const { return filename_; }
471  
472   private:
473    const int fd_;
474    const std::string filename_;
475  };
476  
477  // Tracks the files locked by PosixEnv::LockFile().
478  //
479  // We maintain a separate set instead of relying on fcntl(F_SETLK) because
480  // fcntl(F_SETLK) does not provide any protection against multiple uses from the
481  // same process.
482  //
483  // Instances are thread-safe because all member data is guarded by a mutex.
484  class PosixLockTable {
485   public:
486    bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) {
487      mu_.Lock();
488      bool succeeded = locked_files_.insert(fname).second;
489      mu_.Unlock();
490      return succeeded;
491    }
492    void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) {
493      mu_.Lock();
494      locked_files_.erase(fname);
495      mu_.Unlock();
496    }
497  
498   private:
499    port::Mutex mu_;
500    std::set<std::string> locked_files_ GUARDED_BY(mu_);
501  };
502  
503  class PosixEnv : public Env {
504   public:
505    PosixEnv();
506    ~PosixEnv() override {
507      static const char msg[] =
508          "PosixEnv singleton destroyed. Unsupported behavior!\n";
509      std::fwrite(msg, 1, sizeof(msg), stderr);
510      std::abort();
511    }
512  
513    Status NewSequentialFile(const std::string& filename,
514                             SequentialFile** result) override {
515      int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
516      if (fd < 0) {
517        *result = nullptr;
518        return PosixError(filename, errno);
519      }
520  
521      *result = new PosixSequentialFile(filename, fd);
522      return Status::OK();
523    }
524  
525    Status NewRandomAccessFile(const std::string& filename,
526                               RandomAccessFile** result) override {
527      *result = nullptr;
528      int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
529      if (fd < 0) {
530        return PosixError(filename, errno);
531      }
532  
533      if (!mmap_limiter_.Acquire()) {
534        *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_);
535        return Status::OK();
536      }
537  
538      uint64_t file_size;
539      Status status = GetFileSize(filename, &file_size);
540      if (status.ok()) {
541        void* mmap_base =
542            ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
543        if (mmap_base != MAP_FAILED) {
544          *result = new PosixMmapReadableFile(filename,
545                                              reinterpret_cast<char*>(mmap_base),
546                                              file_size, &mmap_limiter_);
547        } else {
548          status = PosixError(filename, errno);
549        }
550      }
551      ::close(fd);
552      if (!status.ok()) {
553        mmap_limiter_.Release();
554      }
555      return status;
556    }
557  
558    Status NewWritableFile(const std::string& filename,
559                           WritableFile** result) override {
560      int fd = ::open(filename.c_str(),
561                      O_TRUNC | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
562      if (fd < 0) {
563        *result = nullptr;
564        return PosixError(filename, errno);
565      }
566  
567      *result = new PosixWritableFile(filename, fd);
568      return Status::OK();
569    }
570  
571    Status NewAppendableFile(const std::string& filename,
572                             WritableFile** result) override {
573      int fd = ::open(filename.c_str(),
574                      O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
575      if (fd < 0) {
576        *result = nullptr;
577        return PosixError(filename, errno);
578      }
579  
580      *result = new PosixWritableFile(filename, fd);
581      return Status::OK();
582    }
583  
584    bool FileExists(const std::string& filename) override {
585      return ::access(filename.c_str(), F_OK) == 0;
586    }
587  
588    Status GetChildren(const std::string& directory_path,
589                       std::vector<std::string>* result) override {
590      result->clear();
591      ::DIR* dir = ::opendir(directory_path.c_str());
592      if (dir == nullptr) {
593        return PosixError(directory_path, errno);
594      }
595      struct ::dirent* entry;
596      while ((entry = ::readdir(dir)) != nullptr) {
597        result->emplace_back(entry->d_name);
598      }
599      ::closedir(dir);
600      return Status::OK();
601    }
602  
603    Status DeleteFile(const std::string& filename) override {
604      if (::unlink(filename.c_str()) != 0) {
605        return PosixError(filename, errno);
606      }
607      return Status::OK();
608    }
609  
610    Status CreateDir(const std::string& dirname) override {
611      if (::mkdir(dirname.c_str(), 0755) != 0) {
612        return PosixError(dirname, errno);
613      }
614      return Status::OK();
615    }
616  
617    Status DeleteDir(const std::string& dirname) override {
618      if (::rmdir(dirname.c_str()) != 0) {
619        return PosixError(dirname, errno);
620      }
621      return Status::OK();
622    }
623  
624    Status GetFileSize(const std::string& filename, uint64_t* size) override {
625      struct ::stat file_stat;
626      if (::stat(filename.c_str(), &file_stat) != 0) {
627        *size = 0;
628        return PosixError(filename, errno);
629      }
630      *size = file_stat.st_size;
631      return Status::OK();
632    }
633  
634    Status RenameFile(const std::string& from, const std::string& to) override {
635      if (std::rename(from.c_str(), to.c_str()) != 0) {
636        return PosixError(from, errno);
637      }
638      return Status::OK();
639    }
640  
641    Status LockFile(const std::string& filename, FileLock** lock) override {
642      *lock = nullptr;
643  
644      int fd = ::open(filename.c_str(), O_RDWR | O_CREAT | kOpenBaseFlags, 0644);
645      if (fd < 0) {
646        return PosixError(filename, errno);
647      }
648  
649      if (!locks_.Insert(filename)) {
650        ::close(fd);
651        return Status::IOError("lock " + filename, "already held by process");
652      }
653  
654      if (LockOrUnlock(fd, true) == -1) {
655        int lock_errno = errno;
656        ::close(fd);
657        locks_.Remove(filename);
658        return PosixError("lock " + filename, lock_errno);
659      }
660  
661      *lock = new PosixFileLock(fd, filename);
662      return Status::OK();
663    }
664  
665    Status UnlockFile(FileLock* lock) override {
666      PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock);
667      if (LockOrUnlock(posix_file_lock->fd(), false) == -1) {
668        return PosixError("unlock " + posix_file_lock->filename(), errno);
669      }
670      locks_.Remove(posix_file_lock->filename());
671      ::close(posix_file_lock->fd());
672      delete posix_file_lock;
673      return Status::OK();
674    }
675  
676    void Schedule(void (*background_work_function)(void* background_work_arg),
677                  void* background_work_arg) override;
678  
679    void StartThread(void (*thread_main)(void* thread_main_arg),
680                     void* thread_main_arg) override {
681      std::thread new_thread(thread_main, thread_main_arg);
682      new_thread.detach();
683    }
684  
685    Status GetTestDirectory(std::string* result) override {
686      const char* env = std::getenv("TEST_TMPDIR");
687      if (env && env[0] != '\0') {
688        *result = env;
689      } else {
690        char buf[100];
691        std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d",
692                      static_cast<int>(::geteuid()));
693        *result = buf;
694      }
695  
696      // The CreateDir status is ignored because the directory may already exist.
697      CreateDir(*result);
698  
699      return Status::OK();
700    }
701  
702    Status NewLogger(const std::string& filename, Logger** result) override {
703      int fd = ::open(filename.c_str(),
704                      O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
705      if (fd < 0) {
706        *result = nullptr;
707        return PosixError(filename, errno);
708      }
709  
710      std::FILE* fp = ::fdopen(fd, "w");
711      if (fp == nullptr) {
712        ::close(fd);
713        *result = nullptr;
714        return PosixError(filename, errno);
715      } else {
716        *result = new PosixLogger(fp);
717        return Status::OK();
718      }
719    }
720  
721    uint64_t NowMicros() override {
722      static constexpr uint64_t kUsecondsPerSecond = 1000000;
723      struct ::timeval tv;
724      ::gettimeofday(&tv, nullptr);
725      return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec;
726    }
727  
728    void SleepForMicroseconds(int micros) override {
729      std::this_thread::sleep_for(std::chrono::microseconds(micros));
730    }
731  
732   private:
733    void BackgroundThreadMain();
734  
735    static void BackgroundThreadEntryPoint(PosixEnv* env) {
736      env->BackgroundThreadMain();
737    }
738  
739    // Stores the work item data in a Schedule() call.
740    //
741    // Instances are constructed on the thread calling Schedule() and used on the
742    // background thread.
743    //
744    // This structure is thread-safe because it is immutable.
745    struct BackgroundWorkItem {
746      explicit BackgroundWorkItem(void (*function)(void* arg), void* arg)
747          : function(function), arg(arg) {}
748  
749      void (*const function)(void*);
750      void* const arg;
751    };
752  
753    port::Mutex background_work_mutex_;
754    port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_);
755    bool started_background_thread_ GUARDED_BY(background_work_mutex_);
756  
757    std::queue<BackgroundWorkItem> background_work_queue_
758        GUARDED_BY(background_work_mutex_);
759  
760    PosixLockTable locks_;  // Thread-safe.
761    Limiter mmap_limiter_;  // Thread-safe.
762    Limiter fd_limiter_;    // Thread-safe.
763  };
764  
765  // Return the maximum number of concurrent mmaps.
766  int MaxMmaps() { return g_mmap_limit; }
767  
768  // Return the maximum number of read-only files to keep open.
769  int MaxOpenFiles() {
770    if (g_open_read_only_file_limit >= 0) {
771      return g_open_read_only_file_limit;
772    }
773    struct ::rlimit rlim;
774    if (::getrlimit(RLIMIT_NOFILE, &rlim)) {
775      // getrlimit failed, fallback to hard-coded default.
776      g_open_read_only_file_limit = 50;
777    } else if (rlim.rlim_cur == RLIM_INFINITY) {
778      g_open_read_only_file_limit = std::numeric_limits<int>::max();
779    } else {
780      // Allow use of 20% of available file descriptors for read-only files.
781      g_open_read_only_file_limit = rlim.rlim_cur / 5;
782    }
783    return g_open_read_only_file_limit;
784  }
785  
786  }  // namespace
787  
788  PosixEnv::PosixEnv()
789      : background_work_cv_(&background_work_mutex_),
790        started_background_thread_(false),
791        mmap_limiter_(MaxMmaps()),
792        fd_limiter_(MaxOpenFiles()) {}
793  
794  void PosixEnv::Schedule(
795      void (*background_work_function)(void* background_work_arg),
796      void* background_work_arg) {
797    background_work_mutex_.Lock();
798  
799    // Start the background thread, if we haven't done so already.
800    if (!started_background_thread_) {
801      started_background_thread_ = true;
802      std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this);
803      background_thread.detach();
804    }
805  
806    // If the queue is empty, the background thread may be waiting for work.
807    if (background_work_queue_.empty()) {
808      background_work_cv_.Signal();
809    }
810  
811    background_work_queue_.emplace(background_work_function, background_work_arg);
812    background_work_mutex_.Unlock();
813  }
814  
815  void PosixEnv::BackgroundThreadMain() {
816    while (true) {
817      background_work_mutex_.Lock();
818  
819      // Wait until there is work to be done.
820      while (background_work_queue_.empty()) {
821        background_work_cv_.Wait();
822      }
823  
824      assert(!background_work_queue_.empty());
825      auto background_work_function = background_work_queue_.front().function;
826      void* background_work_arg = background_work_queue_.front().arg;
827      background_work_queue_.pop();
828  
829      background_work_mutex_.Unlock();
830      background_work_function(background_work_arg);
831    }
832  }
833  
834  namespace {
835  
836  // Wraps an Env instance whose destructor is never created.
837  //
838  // Intended usage:
839  //   using PlatformSingletonEnv = SingletonEnv<PlatformEnv>;
840  //   void ConfigurePosixEnv(int param) {
841  //     PlatformSingletonEnv::AssertEnvNotInitialized();
842  //     // set global configuration flags.
843  //   }
844  //   Env* Env::Default() {
845  //     static PlatformSingletonEnv default_env;
846  //     return default_env.env();
847  //   }
848  template <typename EnvType>
849  class SingletonEnv {
850   public:
851    SingletonEnv() {
852  #if !defined(NDEBUG)
853      env_initialized_.store(true, std::memory_order_relaxed);
854  #endif  // !defined(NDEBUG)
855      static_assert(sizeof(env_storage_) >= sizeof(EnvType),
856                    "env_storage_ will not fit the Env");
857      static_assert(std::is_standard_layout_v<SingletonEnv<EnvType>>);
858      static_assert(
859          offsetof(SingletonEnv<EnvType>, env_storage_) % alignof(EnvType) == 0,
860          "env_storage_ does not meet the Env's alignment needs");
861      static_assert(alignof(SingletonEnv<EnvType>) % alignof(EnvType) == 0,
862                    "env_storage_ does not meet the Env's alignment needs");
863      new (env_storage_) EnvType();
864    }
865    ~SingletonEnv() = default;
866  
867    SingletonEnv(const SingletonEnv&) = delete;
868    SingletonEnv& operator=(const SingletonEnv&) = delete;
869  
870    Env* env() { return reinterpret_cast<Env*>(&env_storage_); }
871  
872    static void AssertEnvNotInitialized() {
873  #if !defined(NDEBUG)
874      assert(!env_initialized_.load(std::memory_order_relaxed));
875  #endif  // !defined(NDEBUG)
876    }
877  
878   private:
879    alignas(EnvType) char env_storage_[sizeof(EnvType)];
880  #if !defined(NDEBUG)
881    static std::atomic<bool> env_initialized_;
882  #endif  // !defined(NDEBUG)
883  };
884  
885  #if !defined(NDEBUG)
886  template <typename EnvType>
887  std::atomic<bool> SingletonEnv<EnvType>::env_initialized_;
888  #endif  // !defined(NDEBUG)
889  
890  using PosixDefaultEnv = SingletonEnv<PosixEnv>;
891  
892  }  // namespace
893  
894  void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) {
895    PosixDefaultEnv::AssertEnvNotInitialized();
896    g_open_read_only_file_limit = limit;
897  }
898  
899  void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) {
900    PosixDefaultEnv::AssertEnvNotInitialized();
901    g_mmap_limit = limit;
902  }
903  
904  Env* Env::Default() {
905    static PosixDefaultEnv env_container;
906    return env_container.env();
907  }
908  
909  }  // namespace leveldb