理清leveldb的recover流程对于了解leveldb如何保证数据正确性和一致性(即便在节点解体的状况下)是十分有帮忙的。首先从Open函数开始,结构一个DBImpl实例,而后调用了其Recover办法。
Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) { *dbptr = NULL; DBImpl* impl = new DBImpl(options, dbname); impl->mutex_.Lock(); VersionEdit edit; // Recover handles create_if_missing, error_if_exists bool save_manifest = false; Status s = impl->Recover(&edit, &save_manifest); if (s.ok() && impl->mem_ == NULL) { // Create new log and a corresponding memtable. uint64_t new_log_number = impl->versions_->NewFileNumber(); WritableFile* lfile; s = options.env->NewWritableFile(LogFileName(dbname, new_log_number), &lfile); if (s.ok()) { edit.SetLogNumber(new_log_number); impl->logfile_ = lfile; impl->logfile_number_ = new_log_number; impl->log_ = new log::Writer(lfile); impl->mem_ = new MemTable(impl->internal_comparator_); impl->mem_->Ref(); } } if (s.ok() && save_manifest) { edit.SetPrevLogNumber(0); // No older logs needed after recovery. edit.SetLogNumber(impl->logfile_number_); s = impl->versions_->LogAndApply(&edit, &impl->mutex_); } if (s.ok()) { impl->DeleteObsoleteFiles(); impl->MaybeScheduleCompaction(); } impl->mutex_.Unlock(); if (s.ok()) { assert(impl->mem_ != NULL); *dbptr = impl; } else { delete impl; } return s;}Status DBImpl::Recover(VersionEdit* edit, bool *save_manifest) { mutex_.AssertHeld(); // Ignore error from CreateDir since the creation of the DB is // committed only when the descriptor is created, and this directory // may already exist from a previous failed creation attempt. env_->CreateDir(dbname_); assert(db_lock_ == NULL); Status s = env_->LockFile(LockFileName(dbname_), &db_lock_); if (!s.ok()) { return s; } if (!env_->FileExists(CurrentFileName(dbname_))) { if (options_.create_if_missing) { s = NewDB(); if (!s.ok()) { return s; } } else { return Status::InvalidArgument( dbname_, "does not exist (create_if_missing is false)"); } } else { if (options_.error_if_exists) { return Status::InvalidArgument( dbname_, "exists (error_if_exists is true)"); } } // 这里调用了versions_->Recover函数 s = versions_->Recover(save_manifest); if (!s.ok()) { return s; } SequenceNumber max_sequence(0); // Recover from all newer log files than the ones named in the // descriptor (new log files may have been added by the previous // incarnation without registering them in the descriptor). // // Note that PrevLogNumber() is no longer used, but we pay // attention to it in case we are recovering a database // produced by an older version of leveldb. // 以下是复原在descriptor中记录的最初一个log file之后的所有日志文件 const uint64_t min_log = versions_->LogNumber(); const uint64_t prev_log = versions_->PrevLogNumber(); std::vector<std::string> filenames; s = env_->GetChildren(dbname_, &filenames); if (!s.ok()) { return s; } std::set<uint64_t> expected; versions_->AddLiveFiles(&expected); uint64_t number; FileType type; std::vector<uint64_t> logs; for (size_t i = 0; i < filenames.size(); i++) { if (ParseFileName(filenames[i], &number, &type)) { expected.erase(number); // 对于日志文件而言,所有文件编号大于等于versions_->LogNumber()的日志文件都没有来得及被写入以后版本,此时须要回放。 if (type == kLogFile && ((number >= min_log) || (number == prev_log))) logs.push_back(number); } } if (!expected.empty()) { char buf[50]; snprintf(buf, sizeof(buf), "%d missing files; e.g.", static_cast<int>(expected.size())); return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin()))); } // Recover in the order in which the logs were generated std::sort(logs.begin(), logs.end()); for (size_t i = 0; i < logs.size(); i++) { s = RecoverLogFile(logs[i], (i == logs.size() - 1), save_manifest, edit, &max_sequence); if (!s.ok()) { return s; } // The previous incarnation may not have written any MANIFEST // records after allocating this log number. So we manually // update the file number allocation counter in VersionSet. versions_->MarkFileNumberUsed(logs[i]); } if (versions_->LastSequence() < max_sequence) { versions_->SetLastSequence(max_sequence); } return Status::OK();}上面关注下versions_->Recover()函数
...