/ src / leveldb / db / version_set.h
version_set.h
  1  // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2  // Use of this source code is governed by a BSD-style license that can be
  3  // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4  //
  5  // The representation of a DBImpl consists of a set of Versions.  The
  6  // newest version is called "current".  Older versions may be kept
  7  // around to provide a consistent view to live iterators.
  8  //
  9  // Each Version keeps track of a set of Table files per level.  The
 10  // entire set of versions is maintained in a VersionSet.
 11  //
 12  // Version,VersionSet are thread-compatible, but require external
 13  // synchronization on all accesses.
 14  
 15  #ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_
 16  #define STORAGE_LEVELDB_DB_VERSION_SET_H_
 17  
 18  #include <map>
 19  #include <set>
 20  #include <vector>
 21  
 22  #include "db/dbformat.h"
 23  #include "db/version_edit.h"
 24  #include "port/port.h"
 25  #include "port/thread_annotations.h"
 26  
 27  namespace leveldb {
 28  
 29  namespace log {
 30  class Writer;
 31  }
 32  
 33  class Compaction;
 34  class Iterator;
 35  class MemTable;
 36  class TableBuilder;
 37  class TableCache;
 38  class Version;
 39  class VersionSet;
 40  class WritableFile;
 41  
 42  // Return the smallest index i such that files[i]->largest >= key.
 43  // Return files.size() if there is no such file.
 44  // REQUIRES: "files" contains a sorted list of non-overlapping files.
 45  int FindFile(const InternalKeyComparator& icmp,
 46               const std::vector<FileMetaData*>& files, const Slice& key);
 47  
 48  // Returns true iff some file in "files" overlaps the user key range
 49  // [*smallest,*largest].
 50  // smallest==nullptr represents a key smaller than all keys in the DB.
 51  // largest==nullptr represents a key largest than all keys in the DB.
 52  // REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges
 53  //           in sorted order.
 54  bool SomeFileOverlapsRange(const InternalKeyComparator& icmp,
 55                             bool disjoint_sorted_files,
 56                             const std::vector<FileMetaData*>& files,
 57                             const Slice* smallest_user_key,
 58                             const Slice* largest_user_key);
 59  
 60  class Version {
 61   public:
 62    // Lookup the value for key.  If found, store it in *val and
 63    // return OK.  Else return a non-OK status.  Fills *stats.
 64    // REQUIRES: lock is not held
 65    struct GetStats {
 66      FileMetaData* seek_file;
 67      int seek_file_level;
 68    };
 69  
 70    // Append to *iters a sequence of iterators that will
 71    // yield the contents of this Version when merged together.
 72    // REQUIRES: This version has been saved (see VersionSet::SaveTo)
 73    void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters);
 74  
 75    Status Get(const ReadOptions&, const LookupKey& key, std::string* val,
 76               GetStats* stats);
 77  
 78    // Adds "stats" into the current state.  Returns true if a new
 79    // compaction may need to be triggered, false otherwise.
 80    // REQUIRES: lock is held
 81    bool UpdateStats(const GetStats& stats);
 82  
 83    // Record a sample of bytes read at the specified internal key.
 84    // Samples are taken approximately once every config::kReadBytesPeriod
 85    // bytes.  Returns true if a new compaction may need to be triggered.
 86    // REQUIRES: lock is held
 87    bool RecordReadSample(Slice key);
 88  
 89    // Reference count management (so Versions do not disappear out from
 90    // under live iterators)
 91    void Ref();
 92    void Unref();
 93  
 94    void GetOverlappingInputs(
 95        int level,
 96        const InternalKey* begin,  // nullptr means before all keys
 97        const InternalKey* end,    // nullptr means after all keys
 98        std::vector<FileMetaData*>* inputs);
 99  
100    // Returns true iff some file in the specified level overlaps
101    // some part of [*smallest_user_key,*largest_user_key].
102    // smallest_user_key==nullptr represents a key smaller than all the DB's keys.
103    // largest_user_key==nullptr represents a key largest than all the DB's keys.
104    bool OverlapInLevel(int level, const Slice* smallest_user_key,
105                        const Slice* largest_user_key);
106  
107    // Return the level at which we should place a new memtable compaction
108    // result that covers the range [smallest_user_key,largest_user_key].
109    int PickLevelForMemTableOutput(const Slice& smallest_user_key,
110                                   const Slice& largest_user_key);
111  
112    int NumFiles(int level) const { return files_[level].size(); }
113  
114    // Return a human readable string that describes this version's contents.
115    std::string DebugString() const;
116  
117   private:
118    friend class Compaction;
119    friend class VersionSet;
120  
121    class LevelFileNumIterator;
122  
123    explicit Version(VersionSet* vset)
124        : vset_(vset),
125          next_(this),
126          prev_(this),
127          refs_(0),
128          file_to_compact_(nullptr),
129          file_to_compact_level_(-1),
130          compaction_score_(-1),
131          compaction_level_(-1) {}
132  
133    Version(const Version&) = delete;
134    Version& operator=(const Version&) = delete;
135  
136    ~Version();
137  
138    Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
139  
140    // Call func(arg, level, f) for every file that overlaps user_key in
141    // order from newest to oldest.  If an invocation of func returns
142    // false, makes no more calls.
143    //
144    // REQUIRES: user portion of internal_key == user_key.
145    void ForEachOverlapping(Slice user_key, Slice internal_key, void* arg,
146                            bool (*func)(void*, int, FileMetaData*));
147  
148    VersionSet* vset_;  // VersionSet to which this Version belongs
149    Version* next_;     // Next version in linked list
150    Version* prev_;     // Previous version in linked list
151    int refs_;          // Number of live refs to this version
152  
153    // List of files per level
154    std::vector<FileMetaData*> files_[config::kNumLevels];
155  
156    // Next file to compact based on seek stats.
157    FileMetaData* file_to_compact_;
158    int file_to_compact_level_;
159  
160    // Level that should be compacted next and its compaction score.
161    // Score < 1 means compaction is not strictly needed.  These fields
162    // are initialized by Finalize().
163    double compaction_score_;
164    int compaction_level_;
165  };
166  
167  class VersionSet {
168   public:
169    VersionSet(const std::string& dbname, const Options* options,
170               TableCache* table_cache, const InternalKeyComparator*);
171    VersionSet(const VersionSet&) = delete;
172    VersionSet& operator=(const VersionSet&) = delete;
173  
174    ~VersionSet();
175  
176    // Apply *edit to the current version to form a new descriptor that
177    // is both saved to persistent state and installed as the new
178    // current version.  Will release *mu while actually writing to the file.
179    // REQUIRES: *mu is held on entry.
180    // REQUIRES: no other thread concurrently calls LogAndApply()
181    Status LogAndApply(VersionEdit* edit, port::Mutex* mu)
182        EXCLUSIVE_LOCKS_REQUIRED(mu);
183  
184    // Recover the last saved descriptor from persistent storage.
185    Status Recover(bool* save_manifest);
186  
187    // Return the current version.
188    Version* current() const { return current_; }
189  
190    // Return the current manifest file number
191    uint64_t ManifestFileNumber() const { return manifest_file_number_; }
192  
193    // Allocate and return a new file number
194    uint64_t NewFileNumber() { return next_file_number_++; }
195  
196    // Arrange to reuse "file_number" unless a newer file number has
197    // already been allocated.
198    // REQUIRES: "file_number" was returned by a call to NewFileNumber().
199    void ReuseFileNumber(uint64_t file_number) {
200      if (next_file_number_ == file_number + 1) {
201        next_file_number_ = file_number;
202      }
203    }
204  
205    // Return the number of Table files at the specified level.
206    int NumLevelFiles(int level) const;
207  
208    // Return the combined file size of all files at the specified level.
209    int64_t NumLevelBytes(int level) const;
210  
211    // Return the last sequence number.
212    uint64_t LastSequence() const { return last_sequence_; }
213  
214    // Set the last sequence number to s.
215    void SetLastSequence(uint64_t s) {
216      assert(s >= last_sequence_);
217      last_sequence_ = s;
218    }
219  
220    // Mark the specified file number as used.
221    void MarkFileNumberUsed(uint64_t number);
222  
223    // Return the current log file number.
224    uint64_t LogNumber() const { return log_number_; }
225  
226    // Return the log file number for the log file that is currently
227    // being compacted, or zero if there is no such log file.
228    uint64_t PrevLogNumber() const { return prev_log_number_; }
229  
230    // Pick level and inputs for a new compaction.
231    // Returns nullptr if there is no compaction to be done.
232    // Otherwise returns a pointer to a heap-allocated object that
233    // describes the compaction.  Caller should delete the result.
234    Compaction* PickCompaction();
235  
236    // Return a compaction object for compacting the range [begin,end] in
237    // the specified level.  Returns nullptr if there is nothing in that
238    // level that overlaps the specified range.  Caller should delete
239    // the result.
240    Compaction* CompactRange(int level, const InternalKey* begin,
241                             const InternalKey* end);
242  
243    // Return the maximum overlapping data (in bytes) at next level for any
244    // file at a level >= 1.
245    int64_t MaxNextLevelOverlappingBytes();
246  
247    // Create an iterator that reads over the compaction inputs for "*c".
248    // The caller should delete the iterator when no longer needed.
249    Iterator* MakeInputIterator(Compaction* c);
250  
251    // Returns true iff some level needs a compaction.
252    bool NeedsCompaction() const {
253      Version* v = current_;
254      return (v->compaction_score_ >= 1) || (v->file_to_compact_ != nullptr);
255    }
256  
257    // Add all files listed in any live version to *live.
258    // May also mutate some internal state.
259    void AddLiveFiles(std::set<uint64_t>* live);
260  
261    // Return the approximate offset in the database of the data for
262    // "key" as of version "v".
263    uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
264  
265    // Return a human-readable short (single-line) summary of the number
266    // of files per level.  Uses *scratch as backing store.
267    struct LevelSummaryStorage {
268      char buffer[100];
269    };
270    const char* LevelSummary(LevelSummaryStorage* scratch) const;
271  
272   private:
273    class Builder;
274  
275    friend class Compaction;
276    friend class Version;
277  
278    bool ReuseManifest(const std::string& dscname, const std::string& dscbase);
279  
280    void Finalize(Version* v);
281  
282    void GetRange(const std::vector<FileMetaData*>& inputs, InternalKey* smallest,
283                  InternalKey* largest);
284  
285    void GetRange2(const std::vector<FileMetaData*>& inputs1,
286                   const std::vector<FileMetaData*>& inputs2,
287                   InternalKey* smallest, InternalKey* largest);
288  
289    void SetupOtherInputs(Compaction* c);
290  
291    // Save current contents to *log
292    Status WriteSnapshot(log::Writer* log);
293  
294    void AppendVersion(Version* v);
295  
296    Env* const env_;
297    const std::string dbname_;
298    const Options* const options_;
299    TableCache* const table_cache_;
300    const InternalKeyComparator icmp_;
301    uint64_t next_file_number_;
302    uint64_t manifest_file_number_;
303    uint64_t last_sequence_;
304    uint64_t log_number_;
305    uint64_t prev_log_number_;  // 0 or backing store for memtable being compacted
306  
307    // Opened lazily
308    WritableFile* descriptor_file_;
309    log::Writer* descriptor_log_;
310    Version dummy_versions_;  // Head of circular doubly-linked list of versions.
311    Version* current_;        // == dummy_versions_.prev_
312  
313    // Per-level key at which the next compaction at that level should start.
314    // Either an empty string, or a valid InternalKey.
315    std::string compact_pointer_[config::kNumLevels];
316  };
317  
318  // A Compaction encapsulates information about a compaction.
319  class Compaction {
320   public:
321    ~Compaction();
322  
323    // Return the level that is being compacted.  Inputs from "level"
324    // and "level+1" will be merged to produce a set of "level+1" files.
325    int level() const { return level_; }
326  
327    // Return the object that holds the edits to the descriptor done
328    // by this compaction.
329    VersionEdit* edit() { return &edit_; }
330  
331    // "which" must be either 0 or 1
332    int num_input_files(int which) const { return inputs_[which].size(); }
333  
334    // Return the ith input file at "level()+which" ("which" must be 0 or 1).
335    FileMetaData* input(int which, int i) const { return inputs_[which][i]; }
336  
337    // Maximum size of files to build during this compaction.
338    uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
339  
340    // Is this a trivial compaction that can be implemented by just
341    // moving a single input file to the next level (no merging or splitting)
342    bool IsTrivialMove() const;
343  
344    // Add all inputs to this compaction as delete operations to *edit.
345    void AddInputDeletions(VersionEdit* edit);
346  
347    // Returns true if the information we have available guarantees that
348    // the compaction is producing data in "level+1" for which no data exists
349    // in levels greater than "level+1".
350    bool IsBaseLevelForKey(const Slice& user_key);
351  
352    // Returns true iff we should stop building the current output
353    // before processing "internal_key".
354    bool ShouldStopBefore(const Slice& internal_key);
355  
356    // Release the input version for the compaction, once the compaction
357    // is successful.
358    void ReleaseInputs();
359  
360   private:
361    friend class Version;
362    friend class VersionSet;
363  
364    Compaction(const Options* options, int level);
365  
366    int level_;
367    uint64_t max_output_file_size_;
368    Version* input_version_;
369    VersionEdit edit_;
370  
371    // Each compaction reads inputs from "level_" and "level_+1"
372    std::vector<FileMetaData*> inputs_[2];  // The two sets of inputs
373  
374    // State used to check for number of overlapping grandparent files
375    // (parent == level_ + 1, grandparent == level_ + 2)
376    std::vector<FileMetaData*> grandparents_;
377    size_t grandparent_index_;  // Index in grandparent_starts_
378    bool seen_key_;             // Some output key has been seen
379    int64_t overlapped_bytes_;  // Bytes of overlap between current output
380                                // and grandparent files
381  
382    // State for implementing IsBaseLevelForKey
383  
384    // level_ptrs_ holds indices into input_version_->levels_: our state
385    // is that we are positioned at one of the file ranges for each
386    // higher level than the ones involved in this compaction (i.e. for
387    // all L >= level_ + 2).
388    size_t level_ptrs_[config::kNumLevels];
389  };
390  
391  }  // namespace leveldb
392  
393  #endif  // STORAGE_LEVELDB_DB_VERSION_SET_H_