From d2d8a41c47b408df35d1379255fc009f50a0b770 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sun, 17 Sep 2017 00:12:42 +0100 Subject: Use actual batch size for resize estimates And optimize import startup: Remember start_height position during initial count_blocks pass to avoid having to reread entire file again to arrive at start_height --- src/blockchain_utilities/blockchain_import.cpp | 63 +++++++++++----- src/blockchain_utilities/bootstrap_file.cpp | 100 ++++++++++++++++--------- src/blockchain_utilities/bootstrap_file.h | 2 + 3 files changed, 109 insertions(+), 56 deletions(-) (limited to 'src/blockchain_utilities') diff --git a/src/blockchain_utilities/blockchain_import.cpp b/src/blockchain_utilities/blockchain_import.cpp index 635a70b10..d6302ea1d 100644 --- a/src/blockchain_utilities/blockchain_import.cpp +++ b/src/blockchain_utilities/blockchain_import.cpp @@ -230,11 +230,22 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path return false; } + uint64_t start_height = 1, seek_height; + if (opt_resume) + start_height = core.get_blockchain_storage().get_current_blockchain_height(); + + seek_height = start_height; BootstrapFile bootstrap; + streampos pos; // BootstrapFile bootstrap(import_file_path); - uint64_t total_source_blocks = bootstrap.count_blocks(import_file_path); + uint64_t total_source_blocks = bootstrap.count_blocks(import_file_path, pos, seek_height); MINFO("bootstrap file last block number: " << total_source_blocks-1 << " (zero-based height) total blocks: " << total_source_blocks); + if (total_source_blocks-1 <= start_height) + { + return false; + } + std::cout << ENDL; std::cout << "Preparing to read blocks..." << ENDL; std::cout << ENDL; @@ -259,11 +270,7 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path block b; transaction tx; int quit = 0; - uint64_t bytes_read = 0; - - uint64_t start_height = 1; - if (opt_resume) - start_height = core.get_blockchain_storage().get_current_blockchain_height(); + uint64_t bytes_read; // Note that a new blockchain will start with block number 0 (total blocks: 1) // due to genesis block being added at initialization. @@ -280,18 +287,35 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path bool use_batch = opt_batch && !opt_verify; - if (use_batch) - core.get_blockchain_storage().get_db().batch_start(db_batch_size); - MINFO("Reading blockchain from bootstrap file..."); std::cout << ENDL; std::list blocks; - // Within the loop, we skip to start_height before we start adding. - // TODO: Not a bottleneck, but we can use what's done in count_blocks() and - // only do the chunk size reads, skipping the chunk content reads until we're - // at start_height. + // Skip to start_height before we start adding. + { + bool q2 = false; + import_file.seekg(pos); + bytes_read = bootstrap.count_bytes(import_file, start_height-seek_height, h, q2); + if (q2) + { + quit = 2; + goto quitting; + } + h = start_height; + } + + if (use_batch) + { + uint64_t bytes, h2; + bool q2; + pos = import_file.tellg(); + bytes = bootstrap.count_bytes(import_file, db_batch_size, h2, q2); + if (import_file.eof()) + import_file.clear(); + import_file.seekg(pos); + core.get_blockchain_storage().get_db().batch_start(db_batch_size, bytes); + } while (! quit) { uint32_t chunk_size; @@ -344,11 +368,6 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path bytes_read += chunk_size; MDEBUG("Total bytes read: " << bytes_read); - if (h + NUM_BLOCKS_PER_CHUNK < start_height + 1) - { - h += NUM_BLOCKS_PER_CHUNK; - continue; - } if (h > block_stop) { std::cout << refresh_string << "block " << h-1 @@ -456,11 +475,16 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path { if ((h-1) % db_batch_size == 0) { + uint64_t bytes, h2; + bool q2; std::cout << refresh_string; // zero-based height std::cout << ENDL << "[- batch commit at height " << h-1 << " -]" << ENDL; core.get_blockchain_storage().get_db().batch_stop(); - core.get_blockchain_storage().get_db().batch_start(db_batch_size); + pos = import_file.tellg(); + bytes = bootstrap.count_bytes(import_file, db_batch_size, h2, q2); + import_file.seekg(pos); + core.get_blockchain_storage().get_db().batch_start(db_batch_size, bytes); std::cout << ENDL; core.get_blockchain_storage().get_db().show_stats(); } @@ -477,6 +501,7 @@ int import_from_file(cryptonote::core& core, const std::string& import_file_path } } // while +quitting: import_file.close(); if (opt_verify) diff --git a/src/blockchain_utilities/bootstrap_file.cpp b/src/blockchain_utilities/bootstrap_file.cpp index 2b1a5d6c7..a004d3547 100644 --- a/src/blockchain_utilities/bootstrap_file.cpp +++ b/src/blockchain_utilities/bootstrap_file.cpp @@ -375,39 +375,15 @@ uint64_t BootstrapFile::seek_to_first_chunk(std::ifstream& import_file) return full_header_size; } -uint64_t BootstrapFile::count_blocks(const std::string& import_file_path) +uint64_t BootstrapFile::count_bytes(std::ifstream& import_file, uint64_t blocks, uint64_t& h, bool& quit) { - boost::filesystem::path raw_file_path(import_file_path); - boost::system::error_code ec; - if (!boost::filesystem::exists(raw_file_path, ec)) - { - MFATAL("bootstrap file not found: " << raw_file_path); - throw std::runtime_error("Aborting"); - } - std::ifstream import_file; - import_file.open(import_file_path, std::ios_base::binary | std::ifstream::in); - - uint64_t h = 0; - if (import_file.fail()) - { - MFATAL("import_file.open() fail"); - throw std::runtime_error("Aborting"); - } - - uint64_t full_header_size; // 4 byte magic + length of header structures - full_header_size = seek_to_first_chunk(import_file); - - MINFO("Scanning blockchain from bootstrap file..."); - block b; - bool quit = false; uint64_t bytes_read = 0; - int progress_interval = 10; - + uint32_t chunk_size; + char buf1[sizeof(chunk_size)]; std::string str1; - char buf1[2048]; - while (! quit) + h = 0; + while (1) { - uint32_t chunk_size; import_file.read(buf1, sizeof(chunk_size)); if (!import_file) { std::cout << refresh_string; @@ -415,15 +391,7 @@ uint64_t BootstrapFile::count_blocks(const std::string& import_file_path) quit = true; break; } - h += NUM_BLOCKS_PER_CHUNK; - if ((h-1) % progress_interval == 0) - { - std::cout << "\r" << "block height: " << h-1 << - " " << - std::flush; - } bytes_read += sizeof(chunk_size); - str1.assign(buf1, sizeof(chunk_size)); if (! ::serialization::parse_binary(str1, chunk_size)) throw std::runtime_error("Error in deserialization of chunk_size"); @@ -456,6 +424,64 @@ uint64_t BootstrapFile::count_blocks(const std::string& import_file_path) throw std::runtime_error("Aborting"); } bytes_read += chunk_size; + h += NUM_BLOCKS_PER_CHUNK; + if (h >= blocks) + break; + } + return bytes_read; +} + +uint64_t BootstrapFile::count_blocks(const std::string& import_file_path) +{ + streampos dummy_pos; + uint64_t dummy_height = 0; + return count_blocks(import_file_path, dummy_pos, dummy_height); +} + +// If seek_height is non-zero on entry, return a stream position <= this height when finished. +// And return the actual height corresponding to this position. Allows the caller to locate its +// starting position without having to reread the entire file again. +uint64_t BootstrapFile::count_blocks(const std::string& import_file_path, streampos &start_pos, uint64_t& seek_height) +{ + boost::filesystem::path raw_file_path(import_file_path); + boost::system::error_code ec; + if (!boost::filesystem::exists(raw_file_path, ec)) + { + MFATAL("bootstrap file not found: " << raw_file_path); + throw std::runtime_error("Aborting"); + } + std::ifstream import_file; + import_file.open(import_file_path, std::ios_base::binary | std::ifstream::in); + + uint64_t start_height = seek_height; + uint64_t h = 0; + if (import_file.fail()) + { + MFATAL("import_file.open() fail"); + throw std::runtime_error("Aborting"); + } + + uint64_t full_header_size; // 4 byte magic + length of header structures + full_header_size = seek_to_first_chunk(import_file); + + MINFO("Scanning blockchain from bootstrap file..."); + bool quit = false; + uint64_t bytes_read = 0, blocks; + int progress_interval = 10; + + while (! quit) + { + if (start_height && h + progress_interval >= start_height - 1) + { + start_height = 0; + start_pos = import_file.tellg(); + seek_height = h; + } + bytes_read += count_bytes(import_file, progress_interval, blocks, quit); + h += blocks; + std::cout << "\r" << "block height: " << h-1 << + " " << + std::flush; // std::cout << refresh_string; MDEBUG("Number bytes scanned: " << bytes_read); diff --git a/src/blockchain_utilities/bootstrap_file.h b/src/blockchain_utilities/bootstrap_file.h index 1a646b54b..c3969a357 100644 --- a/src/blockchain_utilities/bootstrap_file.h +++ b/src/blockchain_utilities/bootstrap_file.h @@ -56,6 +56,8 @@ class BootstrapFile { public: + uint64_t count_bytes(std::ifstream& import_file, uint64_t blocks, uint64_t& h, bool& quit); + uint64_t count_blocks(const std::string& dir_path, streampos& start_pos, uint64_t& seek_height); uint64_t count_blocks(const std::string& dir_path); uint64_t seek_to_first_chunk(std::ifstream& import_file); -- cgit v1.2.3