diff options
author | warptangent <warptangent@inbox.com> | 2015-05-08 13:46:26 -0700 |
---|---|---|
committer | warptangent <warptangent@inbox.com> | 2015-05-08 14:12:20 -0700 |
commit | 1eb4c66ad8647c8f73c31dd32645a689426346bd (patch) | |
tree | 095e5fd0143a7e2366217e58078bc3a0c215ec69 | |
parent | Add MDB_NORDAHEAD as a supported LMDB flag for blockchain_import (diff) | |
download | monero-1eb4c66ad8647c8f73c31dd32645a689426346bd.tar.xz |
Update blockchain utilities with portable bootstrap file format
Remove repeated coinbase tx in each exported block's data.
Add resume from last exported height to blockchain_export, making it the
default behavior when the file already exists.
Start reorganizing the utilities.
Various cleanup.
Update output, including referring to both height and block numbers as
zero-based instead of one-based. This better matches the block data,
rather than just some parts of the existing codebase.
Use smaller default batch sizes for importer when verifying, so progress
is saved more frequently.
Use small default batch size (1000) for importer on Windows, due to
current issue with big transaction sizes on LMDB.
file format
-----------
[4-byte magic | variable-length header | block data]
header
------
4-byte file_info length
file_info struct
file format major version
file format minor version
header length (includes file_info struct)
[rest of header, padded with 0 bytes up to header length]
block data
----------
4-byte chunk/block_package length
block_package struct
block
txs (coinbase/miner tx included already in block)
block_size
cumulative_difficulty
coins_generated
4-byte chunk/block_package length
block_package struct
[...]
-rw-r--r-- | src/blockchain_converter/CMakeLists.txt | 9 | ||||
-rw-r--r-- | src/blockchain_converter/README.md | 2 | ||||
-rw-r--r-- | src/blockchain_converter/blockchain_converter.cpp | 11 | ||||
-rw-r--r-- | src/blockchain_converter/blockchain_export.cpp | 258 | ||||
-rw-r--r-- | src/blockchain_converter/blockchain_import.cpp | 326 | ||||
-rw-r--r-- | src/blockchain_converter/bootstrap_file.cpp | 501 | ||||
-rw-r--r-- | src/blockchain_converter/bootstrap_file.h (renamed from src/blockchain_converter/blockchain_export.h) | 43 | ||||
-rw-r--r-- | src/blockchain_converter/bootstrap_serialization.h (renamed from src/blockchain_converter/import.h) | 65 |
8 files changed, 755 insertions, 460 deletions
diff --git a/src/blockchain_converter/CMakeLists.txt b/src/blockchain_converter/CMakeLists.txt index 660650980..5be37c450 100644 --- a/src/blockchain_converter/CMakeLists.txt +++ b/src/blockchain_converter/CMakeLists.txt @@ -37,11 +37,13 @@ bitmonero_private_headers(blockchain_converter set(blockchain_import_sources blockchain_import.cpp + bootstrap_file.cpp ) set(blockchain_import_private_headers - import.h fake_core.h + bootstrap_file.h + bootstrap_serialization.h ) bitmonero_private_headers(blockchain_import @@ -49,11 +51,12 @@ bitmonero_private_headers(blockchain_import set(blockchain_export_sources blockchain_export.cpp + bootstrap_file.cpp ) set(blockchain_export_private_headers - import.h - blockchain_export.h + bootstrap_file.h + bootstrap_serialization.h ) bitmonero_private_headers(blockchain_export diff --git a/src/blockchain_converter/README.md b/src/blockchain_converter/README.md index 00160c6b9..ea9aec24a 100644 --- a/src/blockchain_converter/README.md +++ b/src/blockchain_converter/README.md @@ -9,7 +9,7 @@ This is also the default compile setting on the blockchain branch. By default, the exporter will use the original in-memory database (blockchain.bin) as its source. This default is to make migrating to an LMDB database easy, without having to recompile anything. -To change the source, adjust `SOURCE_DB` in `src/blockchain_converter/blockchain_export.h` according to the comments. +To change the source, adjust `SOURCE_DB` in `src/blockchain_converter/bootstrap_file.h` according to the comments. # Usage: diff --git a/src/blockchain_converter/blockchain_converter.cpp b/src/blockchain_converter/blockchain_converter.cpp index c3c6d0918..855dde644 100644 --- a/src/blockchain_converter/blockchain_converter.cpp +++ b/src/blockchain_converter/blockchain_converter.cpp @@ -57,7 +57,16 @@ bool opt_testnet = false; // number of blocks per batch transaction // adjustable through command-line argument according to available RAM -uint64_t db_batch_size = 20000; +#if !defined(WIN32) +uint64_t db_batch_size_verify = 5000; +#else +// set a lower default batch size for Windows, pending possible LMDB issue with +// large batch size. +uint64_t db_batch_size_verify = 1000; +#endif + +// converter only uses verify mode +uint64_t db_batch_size = db_batch_size_verify; } diff --git a/src/blockchain_converter/blockchain_export.cpp b/src/blockchain_converter/blockchain_export.cpp index 8d7c78be3..aa34ea1dc 100644 --- a/src/blockchain_converter/blockchain_export.cpp +++ b/src/blockchain_converter/blockchain_export.cpp @@ -26,264 +26,14 @@ // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF // THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include <algorithm> -#include <cstdio> -#include <fstream> -#include <boost/iostreams/copy.hpp> -#include <atomic> - -#include <boost/archive/binary_oarchive.hpp> -#include <boost/archive/binary_iarchive.hpp> -#include <boost/iostreams/stream_buffer.hpp> -#include <boost/iostreams/stream.hpp> -#include <boost/iostreams/device/back_inserter.hpp> -#include <boost/iostreams/filtering_streambuf.hpp> -#include <boost/iostreams/filter/bzip2.hpp> +#include "bootstrap_file.h" #include "common/command_line.h" #include "version.h" -#include "blockchain_export.h" -#include "cryptonote_core/cryptonote_boost_serialization.h" - -#include "import.h" unsigned int epee::g_test_dbg_lock_sleep = 0; -static int max_chunk = 0; -static size_t height; - namespace po = boost::program_options; - -using namespace cryptonote; -using namespace epee; - -bool BlockchainExport::open(const boost::filesystem::path& dir_path) -{ - if (boost::filesystem::exists(dir_path)) - { - if (!boost::filesystem::is_directory(dir_path)) - { - LOG_PRINT_RED_L0("export directory path is a file: " << dir_path); - return false; - } - } - else - { - if (!boost::filesystem::create_directory(dir_path)) - { - LOG_PRINT_RED_L0("Failed to create directory " << dir_path); - return false; - } - } - - std::string file_path = (dir_path / BLOCKCHAIN_RAW).string(); - m_raw_data_file = new std::ofstream(); - m_raw_data_file->open(file_path , std::ios_base::binary | std::ios_base::out| std::ios::trunc); - if (m_raw_data_file->fail()) - return false; - - m_output_stream = new boost::iostreams::stream<boost::iostreams::back_insert_device<buffer_type>>(m_buffer); - m_raw_archive = new boost::archive::binary_oarchive(*m_output_stream); - if (m_raw_archive == NULL) - return false; - - return true; -} - -void BlockchainExport::flush_chunk() -{ - m_output_stream->flush(); - char buffer[STR_LENGTH_OF_INT + 1]; - int chunk_size = (int) m_buffer.size(); - if (chunk_size > BUFFER_SIZE) - { - LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > BUFFER_SIZE " << BUFFER_SIZE); - } - sprintf(buffer, STR_FORMAT_OF_INT, chunk_size); - m_raw_data_file->write(buffer, STR_LENGTH_OF_INT); - if (max_chunk < chunk_size) - { - max_chunk = chunk_size; - } - long pos_before = m_raw_data_file->tellp(); - std::copy(m_buffer.begin(), m_buffer.end(), std::ostreambuf_iterator<char>(*m_raw_data_file)); - m_raw_data_file->flush(); - long pos_after = m_raw_data_file->tellp(); - long num_chars_written = pos_after - pos_before; - if ((int) num_chars_written != chunk_size) - { - LOG_PRINT_RED_L0("INTERNAL ERROR: num chars wrote NEQ buffer size. height = " << height); - } - - m_buffer.clear(); - delete m_raw_archive; - delete m_output_stream; - m_output_stream = new boost::iostreams::stream<boost::iostreams::back_insert_device<buffer_type>>(m_buffer); - m_raw_archive = new boost::archive::binary_oarchive(*m_output_stream); -} - -void BlockchainExport::serialize_block_to_text_buffer(const block& block) -{ - *m_raw_archive << block; -} - -void BlockchainExport::buffer_serialize_tx(const transaction& tx) -{ - *m_raw_archive << tx; -} - -void BlockchainExport::buffer_write_num_txs(const std::list<transaction> txs) -{ - int n = txs.size(); - *m_raw_archive << n; -} - -void BlockchainExport::write_block(block& block) -{ - serialize_block_to_text_buffer(block); - std::list<transaction> txs; - - uint64_t block_height = boost::get<txin_gen>(block.miner_tx.vin.front()).height; - - // put coinbase transaction first - transaction coinbase_tx = block.miner_tx; - crypto::hash coinbase_tx_hash = get_transaction_hash(coinbase_tx); -#if SOURCE_DB == DB_MEMORY - const transaction* cb_tx_full = m_blockchain_storage->get_tx(coinbase_tx_hash); -#else - transaction cb_tx_full = m_blockchain_storage->get_db().get_tx(coinbase_tx_hash); -#endif - -#if SOURCE_DB == DB_MEMORY - if (cb_tx_full != NULL) - { - txs.push_back(*cb_tx_full); - } -#else - // TODO: should check and abort if cb_tx_full equals null_hash? - txs.push_back(cb_tx_full); -#endif - - // now add all regular transactions - BOOST_FOREACH(const auto& tx_id, block.tx_hashes) - { -#if SOURCE_DB == DB_MEMORY - const transaction* tx = m_blockchain_storage->get_tx(tx_id); -#else - transaction tx = m_blockchain_storage->get_db().get_tx(tx_id); -#endif - -#if SOURCE_DB == DB_MEMORY - if(tx == NULL) - { - if (! m_tx_pool) - throw std::runtime_error("Aborting: tx == NULL, so memory pool required to get tx, but memory pool isn't enabled"); - else - { - transaction tx; - if(m_tx_pool->get_transaction(tx_id, tx)) - txs.push_back(tx); - else - throw std::runtime_error("Aborting: tx not found in pool"); - } - } - else - txs.push_back(*tx); -#else - txs.push_back(tx); -#endif - } - - // serialize all txs to the persistant storage - buffer_write_num_txs(txs); - BOOST_FOREACH(const auto& tx, txs) - { - buffer_serialize_tx(tx); - } - - // These three attributes are currently necessary for a fast import that adds blocks without verification. - bool include_extra_block_data = true; - if (include_extra_block_data) - { -#if SOURCE_DB == DB_MEMORY - size_t block_size = m_blockchain_storage->get_block_size(block_height); - difficulty_type cumulative_difficulty = m_blockchain_storage->get_block_cumulative_difficulty(block_height); - uint64_t coins_generated = m_blockchain_storage->get_block_coins_generated(block_height); -#else - size_t block_size = m_blockchain_storage->get_db().get_block_size(block_height); - difficulty_type cumulative_difficulty = m_blockchain_storage->get_db().get_block_cumulative_difficulty(block_height); - uint64_t coins_generated = m_blockchain_storage->get_db().get_block_already_generated_coins(block_height); -#endif - - *m_raw_archive << block_size; - *m_raw_archive << cumulative_difficulty; - *m_raw_archive << coins_generated; - } -} - -bool BlockchainExport::BlockchainExport::close() -{ - if (m_raw_data_file->fail()) - return false; - - m_raw_data_file->flush(); - delete m_raw_archive; - delete m_output_stream; - delete m_raw_data_file; - return true; -} - - -#if SOURCE_DB == DB_MEMORY -bool BlockchainExport::store_blockchain_raw(blockchain_storage* _blockchain_storage, tx_memory_pool* _tx_pool, boost::filesystem::path& output_dir, uint64_t requested_block_height) -#else -bool BlockchainExport::store_blockchain_raw(Blockchain* _blockchain_storage, tx_memory_pool* _tx_pool, boost::filesystem::path& output_dir, uint64_t requested_block_height) -#endif -{ - uint64_t block_height = 0; - m_blockchain_storage = _blockchain_storage; - m_tx_pool = _tx_pool; - uint64_t progress_interval = 100; - std::string refresh_string = "\r \r"; - LOG_PRINT_L0("Storing blocks raw data..."); - if (!BlockchainExport::open(output_dir)) - { - LOG_PRINT_RED_L0("failed to open raw file for write"); - return false; - } - block b; - LOG_PRINT_L0("source blockchain height: " << m_blockchain_storage->get_current_blockchain_height()); - LOG_PRINT_L0("requested block height: " << requested_block_height); - if ((requested_block_height > 0) && (requested_block_height < m_blockchain_storage->get_current_blockchain_height())) - block_height = requested_block_height; - else - { - block_height = m_blockchain_storage->get_current_blockchain_height(); - LOG_PRINT_L0("Using block height of source blockchain: " << block_height); - } - for (height=0; height < block_height; ++height) - { - crypto::hash hash = m_blockchain_storage->get_block_id_by_height(height); - m_blockchain_storage->get_block_by_hash(hash, b); - write_block(b); - if (height % NUM_BLOCKS_PER_CHUNK == 0) { - flush_chunk(); - } - if (height % progress_interval == 0) { - std::cout << refresh_string; - std::cout << "height " << height << "/" << block_height << std::flush; - } - } - if (height % NUM_BLOCKS_PER_CHUNK != 0) - { - flush_chunk(); - } - std::cout << refresh_string; - std::cout << "height " << height << "/" << block_height << ENDL; - - LOG_PRINT_L0("longest chunk was " << max_chunk << " bytes"); - return BlockchainExport::close(); -} - +using namespace epee; // log_space int main(int argc, char* argv[]) { @@ -397,8 +147,8 @@ int main(int argc, char* argv[]) LOG_PRINT_L0("Source blockchain storage initialized OK"); LOG_PRINT_L0("Exporting blockchain raw data..."); - BlockchainExport be; - r = be.store_blockchain_raw(core_storage, NULL, output_dir, block_height); + BootstrapFile bootstrap; + r = bootstrap.store_blockchain_raw(core_storage, NULL, output_dir, block_height); CHECK_AND_ASSERT_MES(r, false, "Failed to export blockchain raw data"); LOG_PRINT_L0("Blockchain raw data exported OK"); } diff --git a/src/blockchain_converter/blockchain_import.cpp b/src/blockchain_converter/blockchain_import.cpp index 6f952375f..41e929577 100644 --- a/src/blockchain_converter/blockchain_import.cpp +++ b/src/blockchain_converter/blockchain_import.cpp @@ -32,34 +32,43 @@ #include <fstream> #include <boost/filesystem.hpp> -#include <boost/iostreams/stream.hpp> -#include <boost/archive/binary_iarchive.hpp> -#include "cryptonote_core/cryptonote_basic.h" +#include "bootstrap_file.h" +#include "bootstrap_serialization.h" #include "cryptonote_core/cryptonote_format_utils.h" -#include "cryptonote_core/cryptonote_boost_serialization.h" +#include "serialization/binary_utils.h" // dump_binary(), parse_binary() #include "serialization/json_utils.h" // dump_json() #include "include_base_utils.h" -#include "common/command_line.h" -#include "version.h" #include <lmdb.h> // for db flag arguments -#include "import.h" #include "fake_core.h" unsigned int epee::g_test_dbg_lock_sleep = 0; +namespace +{ // CONFIG -static bool opt_batch = true; -static bool opt_verify = true; // use add_new_block, which does verification before calling add_block -static bool opt_resume = true; -static bool opt_testnet = true; +bool opt_batch = true; +bool opt_verify = true; // use add_new_block, which does verification before calling add_block +bool opt_resume = true; +bool opt_testnet = true; // number of blocks per batch transaction // adjustable through command-line argument according to available RAM -static uint64_t db_batch_size = 20000; +#if !defined(WIN32) +uint64_t db_batch_size = 20000; +#else +// set a lower default batch size, pending possible LMDB issue with large transaction size +uint64_t db_batch_size = 1000; +#endif + +// when verifying, use a smaller default batch size so progress is more +// frequently saved +uint64_t db_batch_size_verify = 5000; + +std::string refresh_string = "\r \r"; +} -static std::string refresh_string = "\r \r"; namespace po = boost::program_options; @@ -159,93 +168,6 @@ int pop_blocks(FakeCore& simple_core, int num_blocks) return num_blocks; } -int count_blocks(std::string& import_file_path) -{ - boost::filesystem::path raw_file_path(import_file_path); - boost::system::error_code ec; - if (!boost::filesystem::exists(raw_file_path, ec)) - { - LOG_PRINT_L0("import file not found: " << raw_file_path); - throw std::runtime_error("Aborting"); - } - std::ifstream import_file; - import_file.open(import_file_path, std::ios_base::binary | std::ifstream::in); - - uint64_t h = 0; - if (import_file.fail()) - { - LOG_PRINT_L0("import_file.open() fail"); - throw std::runtime_error("Aborting"); - } - LOG_PRINT_L0("Scanning blockchain from import file..."); - char buffer1[STR_LENGTH_OF_INT + 1]; - block b; - transaction tx; - bool quit = false; - uint64_t bytes_read = 0; - int progress_interval = 10; - - while (! quit) - { - int chunk_size; - import_file.read(buffer1, STR_LENGTH_OF_INT); - if (!import_file) { - std::cout << refresh_string; - LOG_PRINT_L1("End of import file reached"); - quit = true; - break; - } - h += NUM_BLOCKS_PER_CHUNK; - if (h % progress_interval == 0) - { - std::cout << refresh_string << "block height: " << h << - std::flush; - } - bytes_read += STR_LENGTH_OF_INT; - buffer1[STR_LENGTH_OF_INT] = '\0'; - chunk_size = atoi(buffer1); - if (chunk_size > BUFFER_SIZE) - { - std::cout << refresh_string; - LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > BUFFER_SIZE " << BUFFER_SIZE - << " height: " << h); - throw std::runtime_error("Aborting: chunk size exceeds buffer size"); - } - if (chunk_size > 100000) - { - std::cout << refresh_string; - LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > 100000" << " height: " - << h); - } - else if (chunk_size <= 0) { - std::cout << refresh_string; - LOG_PRINT_L0("ERROR: chunk_size " << chunk_size << " <= 0" << " height: " << h); - throw std::runtime_error("Aborting"); - } - // skip to next expected block size value - import_file.seekg(chunk_size, std::ios_base::cur); - if (! import_file) { - std::cout << refresh_string; - LOG_PRINT_L0("ERROR: unexpected end of import file: bytes read before error: " - << import_file.gcount() << " of chunk_size " << chunk_size); - throw std::runtime_error("Aborting"); - } - bytes_read += chunk_size; - std::cout << refresh_string; - - LOG_PRINT_L3("Total bytes scanned: " << bytes_read); - } - - import_file.close(); - - std::cout << ENDL; - std::cout << "Done scanning import file" << ENDL; - std::cout << "Total bytes scanned: " << bytes_read << ENDL; - std::cout << "Height: " << h << ENDL; - - return h; -} - template <typename FakeCore> int import_from_file(FakeCore& simple_core, std::string& import_file_path) { @@ -266,23 +188,35 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) boost::system::error_code ec; if (!boost::filesystem::exists(raw_file_path, ec)) { - LOG_PRINT_L0("import file not found: " << raw_file_path); + LOG_PRINT_L0("bootstrap file not found: " << raw_file_path); return false; } - uint64_t source_height = count_blocks(import_file_path); - LOG_PRINT_L0("import file blockchain height: " << source_height); + BootstrapFile bootstrap; + // BootstrapFile bootstrap(import_file_path); + uint64_t total_source_blocks = bootstrap.count_blocks(import_file_path); + LOG_PRINT_L0("bootstrap file last block number: " << total_source_blocks-1 << " (zero-based height) total blocks: " << total_source_blocks); + + std::cout << ENDL; + std::cout << "Preparing to read blocks..." << ENDL; + std::cout << ENDL; std::ifstream import_file; import_file.open(import_file_path, std::ios_base::binary | std::ifstream::in); uint64_t h = 0; + uint64_t num_imported = 0; if (import_file.fail()) { LOG_PRINT_L0("import_file.open() fail"); return false; } - char buffer1[STR_LENGTH_OF_INT + 1]; + + // 4 byte magic + (currently) 1024 byte header structures + bootstrap.seek_to_first_chunk(import_file); + + std::string str1; + char buffer1[1024]; char buffer_block[BUFFER_SIZE]; block b; transaction tx; @@ -293,17 +227,16 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) if (opt_resume) start_height = simple_core.m_storage.get_current_blockchain_height(); - // Note that a new blockchain will start with a height of 1 (block number 0) + // Note that a new blockchain will start with block number 0 (total blocks: 1) // due to genesis block being added at initialization. // CONFIG // TODO: can expand on this, e.g. with --block-number option - uint64_t stop_height = source_height; + uint64_t stop_height = total_source_blocks - 1; // These are what we'll try to use, and they don't have to be a determination - // from source and destination blockchains, but those are the current - // defaults. - LOG_PRINT_L0("start height: " << start_height << " stop height: " << + // from source and destination blockchains, but those are the defaults. + LOG_PRINT_L0("start block: " << start_height << " stop block: " << stop_height); bool use_batch = false; @@ -318,7 +251,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) if (use_batch) simple_core.batch_start(); - LOG_PRINT_L0("Reading blockchain from import file..."); + LOG_PRINT_L0("Reading blockchain from bootstrap file..."); std::cout << ENDL; // Within the loop, we skip to start_height before we start adding. @@ -327,17 +260,24 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) // at start_height. while (! quit) { - int chunk_size; - import_file.read(buffer1, STR_LENGTH_OF_INT); + uint32_t chunk_size; + import_file.read(buffer1, sizeof(chunk_size)); + // TODO: bootstrap.read_chunk(); if (! import_file) { std::cout << refresh_string; - LOG_PRINT_L0("End of import file reached"); + LOG_PRINT_L0("End of file reached"); quit = 1; break; } - bytes_read += STR_LENGTH_OF_INT; - buffer1[STR_LENGTH_OF_INT] = '\0'; - chunk_size = atoi(buffer1); + bytes_read += sizeof(chunk_size); + + str1.assign(buffer1, sizeof(chunk_size)); + if (! ::serialization::parse_binary(str1, chunk_size)) + { + throw std::runtime_error("Error in deserialization of chunk size"); + } + LOG_PRINT_L1("chunk_size: " << chunk_size); + if (chunk_size > BUFFER_SIZE) { LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > BUFFER_SIZE " << BUFFER_SIZE); @@ -345,7 +285,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) } if (chunk_size > 100000) { - LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > 100000"); + LOG_PRINT_L0("NOTE: chunk_size " << chunk_size << " > 100000"); } else if (chunk_size < 0) { LOG_PRINT_L0("ERROR: chunk_size " << chunk_size << " < 0"); @@ -353,7 +293,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) } import_file.read(buffer_block, chunk_size); if (! import_file) { - LOG_PRINT_L0("ERROR: unexpected end of import file: bytes read before error: " + LOG_PRINT_L0("ERROR: unexpected end of file: bytes read before error: " << import_file.gcount() << " of chunk_size " << chunk_size); return 2; } @@ -367,77 +307,79 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) } if (h > stop_height) { - LOG_PRINT_L0("Specified height reached - stopping. height: " << h << " block: " << h-1); + std::cout << refresh_string << "block " << h-1 + << " / " << stop_height + << std::flush; + std::cout << ENDL << ENDL; + LOG_PRINT_L0("Specified block number reached - stopping. block: " << h-1 << " total blocks: " << h); quit = 1; break; } try { - boost::iostreams::basic_array_source<char> device(buffer_block, chunk_size); - boost::iostreams::stream<boost::iostreams::basic_array_source<char>> s(device); - boost::archive::binary_iarchive a(s); + str1.assign(buffer_block, chunk_size); + bootstrap::block_package bp; + if (! ::serialization::parse_binary(str1, bp)) + throw std::runtime_error("Error in deserialization of chunk"); int display_interval = 1000; int progress_interval = 10; - for (int chunk_ind = 0; chunk_ind < NUM_BLOCKS_PER_CHUNK; chunk_ind++) + // NOTE: use of NUM_BLOCKS_PER_CHUNK is a placeholder in case multi-block chunks are later supported. + for (int chunk_ind = 0; chunk_ind < NUM_BLOCKS_PER_CHUNK; ++chunk_ind) { - h++; - if (h % display_interval == 0) + ++h; + if ((h-1) % display_interval == 0) { std::cout << refresh_string; - LOG_PRINT_L0("loading block height " << h); + LOG_PRINT_L0("loading block number " << h-1); } else { - LOG_PRINT_L3("loading block height " << h); - } - try { - a >> b; - } - catch (const std::exception& e) - { - std::cout << refresh_string; - LOG_PRINT_RED_L0("exception while de-archiving block, height=" << h); - quit = 1; - break; + LOG_PRINT_L3("loading block number " << h-1); } + b = bp.block; LOG_PRINT_L2("block prev_id: " << b.prev_id << ENDL); - if (h % progress_interval == 0) + if ((h-1) % progress_interval == 0) { std::cout << refresh_string << "block " << h-1 + << " / " << stop_height << std::flush; } std::vector<transaction> txs; + std::vector<transaction> archived_txs; - int num_txs; - try - { - a >> num_txs; - } - catch (const std::exception& e) - { - std::cout << refresh_string; - LOG_PRINT_RED_L0("exception while de-archiving tx-num, height=" << h); - quit = 1; - break; - } - for(int tx_num = 1; tx_num <= num_txs; tx_num++) + archived_txs = bp.txs; + + // std::cout << refresh_string; + // LOG_PRINT_L1("txs: " << archived_txs.size()); + + // if archived_txs is invalid + // { + // std::cout << refresh_string; + // LOG_PRINT_RED_L0("exception while de-archiving txs, height=" << h); + // quit = 1; + // break; + // } + + // tx number 1: coinbase tx + // tx number 2 onwards: archived_txs + unsigned int tx_num = 1; + for (const transaction& tx : archived_txs) { - try { - a >> tx; - } - catch (const std::exception& e) - { - LOG_PRINT_RED_L0("exception while de-archiving tx, height=" << h <<", tx_num=" << tx_num); - quit = 1; - break; - } - // if (tx_num == 1) { - // std::cout << "coinbase transaction" << ENDL; + ++tx_num; + // if tx is invalid + // { + // LOG_PRINT_RED_L0("exception while indexing tx from txs, height=" << h <<", tx_num=" << tx_num); + // quit = 1; + // break; // } + + // std::cout << refresh_string; + // LOG_PRINT_L1("tx hash: " << get_transaction_hash(tx)); + // crypto::hash hsh = null_hash; // size_t blob_size = 0; // NOTE: all tx hashes except for coinbase tx are available in the block data @@ -449,9 +391,6 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) // for Blockchain and blockchain_storage add_new_block(). if (opt_verify) { - if (tx_num == 1) { - continue; // coinbase transaction. no need to insert to tx_pool. - } // crypto::hash hsh = null_hash; // size_t blob_size = 0; // get_transaction_hash(tx, hsh, blob_size); @@ -473,10 +412,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) // because add_block() calls // add_transaction(blk_hash, blk.miner_tx) first, and // then a for loop for the transactions in txs. - if (tx_num > 1) - { - txs.push_back(tx); - } + txs.push_back(tx); } } @@ -488,7 +424,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) if (bvc.m_verifivation_failed) { LOG_PRINT_L0("Failed to add block to blockchain, verification failed, height = " << h); - LOG_PRINT_L0("skipping rest of import file"); + LOG_PRINT_L0("skipping rest of file"); // ok to commit previously batched data because it failed only in // verification of potential new block with nothing added to batch // yet @@ -498,7 +434,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) if (! bvc.m_added_to_main_chain) { LOG_PRINT_L0("Failed to add block to blockchain, height = " << h); - LOG_PRINT_L0("skipping rest of import file"); + LOG_PRINT_L0("skipping rest of file"); // make sure we don't commit partial block data quit = 2; break; @@ -510,14 +446,14 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) difficulty_type cumulative_difficulty; uint64_t coins_generated; - a >> block_size; - a >> cumulative_difficulty; - a >> coins_generated; + block_size = bp.block_size; + cumulative_difficulty = bp.cumulative_difficulty; + coins_generated = bp.coins_generated; - std::cout << refresh_string; - LOG_PRINT_L2("block_size: " << block_size); - LOG_PRINT_L2("cumulative_difficulty: " << cumulative_difficulty); - LOG_PRINT_L2("coins_generated: " << coins_generated); + // std::cout << refresh_string; + // LOG_PRINT_L2("block_size: " << block_size); + // LOG_PRINT_L2("cumulative_difficulty: " << cumulative_difficulty); + // LOG_PRINT_L2("coins_generated: " << coins_generated); try { @@ -531,13 +467,15 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) break; } } + ++num_imported; if (use_batch) { - if (h % db_batch_size == 0) + if ((h-1) % db_batch_size == 0) { std::cout << refresh_string; - std::cout << ENDL << "[- batch commit at height " << h << " -]" << ENDL; + // zero-based height + std::cout << ENDL << "[- batch commit at height " << h-1 << " -]" << ENDL; simple_core.batch_stop(); simple_core.batch_start(); std::cout << ENDL; @@ -551,7 +489,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) catch (const std::exception& e) { std::cout << refresh_string; - LOG_PRINT_RED_L0("exception while reading from import file, height=" << h); + LOG_PRINT_RED_L0("exception while reading from file, height=" << h); return 2; } } // while @@ -572,8 +510,10 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path) #if !defined(BLOCKCHAIN_DB) || (BLOCKCHAIN_DB == DB_LMDB) simple_core.m_storage.get_db().show_stats(); #endif + LOG_PRINT_L0("Number of blocks imported: " << num_imported) if (h > 0) - LOG_PRINT_L0("Finished at height: " << h << " block: " << h-1); + // TODO: if there was an error, the last added block is probably at zero-based height h-2 + LOG_PRINT_L0("Finished at block: " << h-1 << " total blocks: " << h); } std::cout << ENDL; return 0; @@ -608,7 +548,7 @@ int main(int argc, char* argv[]) }; const command_line::arg_descriptor<bool> arg_count_blocks = { "count-blocks" - , "Count blocks in import file and exit" + , "Count blocks in bootstrap file and exit" , false }; const command_line::arg_descriptor<std::string> arg_database = { @@ -677,6 +617,18 @@ int main(int argc, char* argv[]) std::cerr << "Error: batch-size must be > 0" << ENDL; exit(1); } + if (opt_verify && vm["batch-size"].defaulted()) + { + // usually want batch size default lower if verify on, so progress can be + // frequently saved. + // + // currently, with Windows, default batch size is low, so ignore + // default db_batch_size_verify unless it's even lower + if (db_batch_size > db_batch_size_verify) + { + db_batch_size = db_batch_size_verify; + } + } std::vector<std::string> db_engines {"memory", "lmdb"}; @@ -694,10 +646,10 @@ int main(int argc, char* argv[]) std::string import_file_path; import_file_path = (file_path / "export" / import_filename).string(); - if (command_line::has_arg(vm, arg_count_blocks)) { - count_blocks(import_file_path); + BootstrapFile bootstrap; + bootstrap.count_blocks(import_file_path); exit(0); } @@ -732,8 +684,8 @@ int main(int argc, char* argv[]) LOG_PRINT_L0("resume: " << std::boolalpha << opt_resume << std::noboolalpha); LOG_PRINT_L0("testnet: " << std::boolalpha << opt_testnet << std::noboolalpha); - std::cout << "import file path: " << import_file_path << ENDL; - std::cout << "database path: " << file_path.string() << ENDL; + LOG_PRINT_L0("bootstrap file path: " << import_file_path); + LOG_PRINT_L0("database path: " << file_path.string()); try { diff --git a/src/blockchain_converter/bootstrap_file.cpp b/src/blockchain_converter/bootstrap_file.cpp new file mode 100644 index 000000000..fb67e12bc --- /dev/null +++ b/src/blockchain_converter/bootstrap_file.cpp @@ -0,0 +1,501 @@ +// Copyright (c) 2014-2015, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "bootstrap_serialization.h" +#include "serialization/binary_utils.h" // dump_binary(), parse_binary() +#include "serialization/json_utils.h" // dump_json() + +#include "bootstrap_file.h" + + +namespace po = boost::program_options; + +using namespace cryptonote; +using namespace epee; + +namespace +{ + // This number was picked by taking the leading 4 bytes from this output: + // echo Monero bootstrap file | sha1sum + const uint32_t blockchain_raw_magic = 0x28721586; + const uint32_t header_size = 1024; + + std::string refresh_string = "\r \r"; +} + + + +bool BootstrapFile::open_writer(const boost::filesystem::path& dir_path) +{ + if (boost::filesystem::exists(dir_path)) + { + if (!boost::filesystem::is_directory(dir_path)) + { + LOG_PRINT_RED_L0("export directory path is a file: " << dir_path); + return false; + } + } + else + { + if (!boost::filesystem::create_directory(dir_path)) + { + LOG_PRINT_RED_L0("Failed to create directory " << dir_path); + return false; + } + } + + std::string file_path = (dir_path / BLOCKCHAIN_RAW).string(); + m_raw_data_file = new std::ofstream(); + + bool do_initialize_file = false; + uint64_t num_blocks = 0; + + if (! boost::filesystem::exists(file_path)) + { + LOG_PRINT_L0("creating file"); + do_initialize_file = true; + num_blocks = 0; + } + else + { + num_blocks = count_blocks(file_path); + LOG_PRINT_L0("appending to existing file with height: " << num_blocks-1 << " total blocks: " << num_blocks); + } + m_height = num_blocks; + + if (do_initialize_file) + m_raw_data_file->open(file_path, std::ios_base::binary | std::ios_base::out | std::ios::trunc); + else + m_raw_data_file->open(file_path, std::ios_base::binary | std::ios_base::out | std::ios::app | std::ios::ate); + + if (m_raw_data_file->fail()) + return false; + + m_output_stream = new boost::iostreams::stream<boost::iostreams::back_insert_device<buffer_type>>(m_buffer); + if (m_output_stream == nullptr) + return false; + + if (do_initialize_file) + initialize_file(); + + return true; +} + + +bool BootstrapFile::initialize_file() +{ + const uint32_t file_magic = blockchain_raw_magic; + + std::string blob; + if (! ::serialization::dump_binary(file_magic, blob)) + { + throw std::runtime_error("Error in serialization of file magic"); + } + *m_raw_data_file << blob; + + bootstrap::file_info bfi; + bfi.major_version = 0; + bfi.minor_version = 1; + bfi.header_size = header_size; + + bootstrap::blocks_info bbi; + bbi.block_first = 0; + bbi.block_last = 0; + bbi.block_last_pos = 0; + + buffer_type buffer2; + boost::iostreams::stream<boost::iostreams::back_insert_device<buffer_type>>* output_stream_header; + output_stream_header = new boost::iostreams::stream<boost::iostreams::back_insert_device<buffer_type>>(buffer2); + + uint32_t bd_size = 0; + + blobdata bd = t_serializable_object_to_blob(bfi); + LOG_PRINT_L1("bootstrap::file_info size: " << bd.size()); + bd_size = bd.size(); + + if (! ::serialization::dump_binary(bd_size, blob)) + { + throw std::runtime_error("Error in serialization of bootstrap::file_info size"); + } + *output_stream_header << blob; + *output_stream_header << bd; + + bd = t_serializable_object_to_blob(bbi); + LOG_PRINT_L1("bootstrap::blocks_info size: " << bd.size()); + bd_size = bd.size(); + + if (! ::serialization::dump_binary(bd_size, blob)) + { + throw std::runtime_error("Error in serialization of bootstrap::blocks_info size"); + } + *output_stream_header << blob; + *output_stream_header << bd; + + output_stream_header->flush(); + *output_stream_header << std::string(header_size-buffer2.size(), 0); // fill in rest with null bytes + output_stream_header->flush(); + std::copy(buffer2.begin(), buffer2.end(), std::ostreambuf_iterator<char>(*m_raw_data_file)); + + return true; +} + +void BootstrapFile::flush_chunk() +{ + m_output_stream->flush(); + + uint32_t chunk_size = m_buffer.size(); + // LOG_PRINT_L0("chunk_size " << chunk_size); + if (chunk_size > BUFFER_SIZE) + { + LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > BUFFER_SIZE " << BUFFER_SIZE); + } + + std::string blob; + if (! ::serialization::dump_binary(chunk_size, blob)) + { + throw std::runtime_error("Error in serialization of chunk size"); + } + *m_raw_data_file << blob; + + if (m_max_chunk < chunk_size) + { + m_max_chunk = chunk_size; + } + long pos_before = m_raw_data_file->tellp(); + std::copy(m_buffer.begin(), m_buffer.end(), std::ostreambuf_iterator<char>(*m_raw_data_file)); + m_raw_data_file->flush(); + long pos_after = m_raw_data_file->tellp(); + long num_chars_written = pos_after - pos_before; + if (static_cast<unsigned long>(num_chars_written) != chunk_size) + { + LOG_PRINT_RED_L0("Error writing chunk: height: " << m_cur_height << " chunk_size: " << chunk_size << " num chars written: " << num_chars_written); + throw std::runtime_error("Error writing chunk"); + } + + m_buffer.clear(); + delete m_output_stream; + m_output_stream = new boost::iostreams::stream<boost::iostreams::back_insert_device<buffer_type>>(m_buffer); + LOG_PRINT_L1("flushed chunk: chunk_size: " << chunk_size); +} + +void BootstrapFile::write_block(block& block) +{ + bootstrap::block_package bp; + bp.block = block; + + std::vector<transaction> txs; + + uint64_t block_height = boost::get<txin_gen>(block.miner_tx.vin.front()).height; + + + // now add all regular transactions + for (const auto& tx_id : block.tx_hashes) + { + if (tx_id == null_hash) + { + throw std::runtime_error("Aborting: tx == null_hash"); + } +#if SOURCE_DB == DB_MEMORY + const transaction* tx = m_blockchain_storage->get_tx(tx_id); +#else + transaction tx = m_blockchain_storage->get_db().get_tx(tx_id); +#endif + +#if SOURCE_DB == DB_MEMORY + if(tx == NULL) + { + if (! m_tx_pool) + throw std::runtime_error("Aborting: tx == NULL, so memory pool required to get tx, but memory pool isn't enabled"); + else + { + transaction tx; + if(m_tx_pool->get_transaction(tx_id, tx)) + txs.push_back(tx); + else + throw std::runtime_error("Aborting: tx not found in pool"); + } + } + else + txs.push_back(*tx); +#else + txs.push_back(tx); +#endif + } + + // these non-coinbase txs will be serialized using this structure + bp.txs = txs; + + // These three attributes are currently necessary for a fast import that adds blocks without verification. + bool include_extra_block_data = true; + if (include_extra_block_data) + { +#if SOURCE_DB == DB_MEMORY + size_t block_size = m_blockchain_storage->get_block_size(block_height); + difficulty_type cumulative_difficulty = m_blockchain_storage->get_block_cumulative_difficulty(block_height); + uint64_t coins_generated = m_blockchain_storage->get_block_coins_generated(block_height); +#else + size_t block_size = m_blockchain_storage->get_db().get_block_size(block_height); + difficulty_type cumulative_difficulty = m_blockchain_storage->get_db().get_block_cumulative_difficulty(block_height); + uint64_t coins_generated = m_blockchain_storage->get_db().get_block_already_generated_coins(block_height); +#endif + + bp.block_size = block_size; + bp.cumulative_difficulty = cumulative_difficulty; + bp.coins_generated = coins_generated; + } + + blobdata bd = t_serializable_object_to_blob(bp); + m_output_stream->write((const char*)bd.data(), bd.size()); +} + +bool BootstrapFile::close() +{ + if (m_raw_data_file->fail()) + return false; + + m_raw_data_file->flush(); + delete m_output_stream; + delete m_raw_data_file; + return true; +} + + +#if SOURCE_DB == DB_MEMORY +bool BootstrapFile::store_blockchain_raw(blockchain_storage* _blockchain_storage, tx_memory_pool* _tx_pool, boost::filesystem::path& output_dir, uint64_t requested_block_height) +#else +bool BootstrapFile::store_blockchain_raw(Blockchain* _blockchain_storage, tx_memory_pool* _tx_pool, boost::filesystem::path& output_dir, uint64_t requested_block_height) +#endif +{ + uint64_t num_blocks_written = 0; + m_max_chunk = 0; + m_blockchain_storage = _blockchain_storage; + m_tx_pool = _tx_pool; + uint64_t progress_interval = 100; + LOG_PRINT_L0("Storing blocks raw data..."); + if (!BootstrapFile::open_writer(output_dir)) + { + LOG_PRINT_RED_L0("failed to open raw file for write"); + return false; + } + block b; + uint64_t height_start = m_height; // height_start uses 0-based height, m_height uses 1-based height. so height_start doesn't need to add 1 here, as it's already at the next height + uint64_t height_stop = 0; + LOG_PRINT_L0("source blockchain height: " << m_blockchain_storage->get_current_blockchain_height()-1); + if ((requested_block_height > 0) && (requested_block_height < m_blockchain_storage->get_current_blockchain_height())) + { + LOG_PRINT_L0("Using requested block height: " << requested_block_height); + height_stop = requested_block_height; + } + else + { + height_stop = m_blockchain_storage->get_current_blockchain_height() - 1; + LOG_PRINT_L0("Using block height of source blockchain: " << height_stop); + } + for (m_cur_height = height_start; m_cur_height <= height_stop; ++m_cur_height) + { + // this method's height refers to 0-based height (genesis block = height 0) + crypto::hash hash = m_blockchain_storage->get_block_id_by_height(m_cur_height); + m_blockchain_storage->get_block_by_hash(hash, b); + write_block(b); + if (m_cur_height % NUM_BLOCKS_PER_CHUNK == 0) { + flush_chunk(); + num_blocks_written += NUM_BLOCKS_PER_CHUNK; + } + if (m_cur_height % progress_interval == 0) { + std::cout << refresh_string; + std::cout << "block " << m_cur_height << "/" << height_stop << std::flush; + } + } + // NOTE: use of NUM_BLOCKS_PER_CHUNK is a placeholder in case multi-block chunks are later supported. + if (m_cur_height % NUM_BLOCKS_PER_CHUNK != 0) + { + flush_chunk(); + } + // print message for last block, which may not have been printed yet due to progress_interval + std::cout << refresh_string; + std::cout << "block " << m_cur_height-1 << "/" << height_stop << ENDL; + + LOG_PRINT_L0("Number of blocks exported: " << num_blocks_written); + if (num_blocks_written > 0) + LOG_PRINT_L0("Largest chunk: " << m_max_chunk << " bytes"); + + return BootstrapFile::close(); +} + +uint64_t BootstrapFile::seek_to_first_chunk(std::ifstream& import_file) +{ + uint32_t file_magic; + + std::string str1; + char buf1[2048]; + import_file.read(buf1, sizeof(file_magic)); + if (! import_file) + throw std::runtime_error("Error reading expected number of bytes"); + str1.assign(buf1, sizeof(file_magic)); + + if (! ::serialization::parse_binary(str1, file_magic)) + throw std::runtime_error("Error in deserialization of file_magic"); + + if (file_magic != blockchain_raw_magic) + { + LOG_PRINT_RED_L0("bootstrap file not recognized"); + throw std::runtime_error("Aborting"); + } + else + LOG_PRINT_L0("bootstrap file recognized"); + + uint32_t buflen_file_info; + + import_file.read(buf1, sizeof(buflen_file_info)); + str1.assign(buf1, sizeof(buflen_file_info)); + if (! import_file) + throw std::runtime_error("Error reading expected number of bytes"); + if (! ::serialization::parse_binary(str1, buflen_file_info)) + throw std::runtime_error("Error in deserialization of buflen_file_info"); + LOG_PRINT_L1("bootstrap::file_info size: " << buflen_file_info); + + if (buflen_file_info > sizeof(buf1)) + throw std::runtime_error("Error: bootstrap::file_info size exceeds buffer size"); + import_file.read(buf1, buflen_file_info); + if (! import_file) + throw std::runtime_error("Error reading expected number of bytes"); + str1.assign(buf1, buflen_file_info); + bootstrap::file_info bfi; + if (! ::serialization::parse_binary(str1, bfi)) + throw std::runtime_error("Error in deserialization of bootstrap::file_info"); + LOG_PRINT_L0("bootstrap file v" << unsigned(bfi.major_version) << "." << unsigned(bfi.minor_version)); + LOG_PRINT_L0("bootstrap magic size: " << sizeof(file_magic)); + LOG_PRINT_L0("bootstrap header size: " << bfi.header_size) + + uint64_t full_header_size = sizeof(file_magic) + bfi.header_size; + import_file.seekg(full_header_size); + + return full_header_size; +} + +uint64_t BootstrapFile::count_blocks(const std::string& import_file_path) +{ + boost::filesystem::path raw_file_path(import_file_path); + boost::system::error_code ec; + if (!boost::filesystem::exists(raw_file_path, ec)) + { + LOG_PRINT_L0("bootstrap file not found: " << raw_file_path); + throw std::runtime_error("Aborting"); + } + std::ifstream import_file; + import_file.open(import_file_path, std::ios_base::binary | std::ifstream::in); + + uint64_t h = 0; + if (import_file.fail()) + { + LOG_PRINT_L0("import_file.open() fail"); + throw std::runtime_error("Aborting"); + } + + uint64_t full_header_size; // 4 byte magic + length of header structures + full_header_size = seek_to_first_chunk(import_file); + + LOG_PRINT_L0("Scanning blockchain from bootstrap file..."); + block b; + bool quit = false; + uint64_t bytes_read = 0; + int progress_interval = 10; + + std::string str1; + char buf1[2048]; + while (! quit) + { + uint32_t chunk_size; + import_file.read(buf1, sizeof(chunk_size)); + if (!import_file) { + std::cout << refresh_string; + LOG_PRINT_L1("End of file reached"); + quit = true; + break; + } + h += NUM_BLOCKS_PER_CHUNK; + if ((h-1) % progress_interval == 0) + { + std::cout << "\r" << "block height: " << h-1 << + " " << + std::flush; + } + bytes_read += sizeof(chunk_size); + + str1.assign(buf1, sizeof(chunk_size)); + if (! ::serialization::parse_binary(str1, chunk_size)) + throw std::runtime_error("Error in deserialization of chunk_size"); + LOG_PRINT_L1("chunk_size: " << chunk_size); + + if (chunk_size > BUFFER_SIZE) + { + std::cout << refresh_string; + LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > BUFFER_SIZE " << BUFFER_SIZE + << " height: " << h-1); + throw std::runtime_error("Aborting: chunk size exceeds buffer size"); + } + if (chunk_size > 100000) + { + std::cout << refresh_string; + LOG_PRINT_L0("NOTE: chunk_size " << chunk_size << " > 100000" << " height: " + << h-1); + } + else if (chunk_size <= 0) { + std::cout << refresh_string; + LOG_PRINT_L0("ERROR: chunk_size " << chunk_size << " <= 0" << " height: " << h-1); + throw std::runtime_error("Aborting"); + } + // skip to next expected block size value + import_file.seekg(chunk_size, std::ios_base::cur); + if (! import_file) { + std::cout << refresh_string; + LOG_PRINT_L0("ERROR: unexpected end of file: bytes read before error: " + << import_file.gcount() << " of chunk_size " << chunk_size); + throw std::runtime_error("Aborting"); + } + bytes_read += chunk_size; + + // std::cout << refresh_string; + LOG_PRINT_L3("Number bytes scanned: " << bytes_read); + } + + import_file.close(); + + std::cout << ENDL; + std::cout << "Done scanning bootstrap file" << ENDL; + std::cout << "Full header length: " << full_header_size << " bytes" << ENDL; + std::cout << "Scanned for blocks: " << bytes_read << " bytes" << ENDL; + std::cout << "Total: " << full_header_size + bytes_read << " bytes" << ENDL; + std::cout << "Number of blocks: " << h << ENDL; + std::cout << ENDL; + + // NOTE: h is the number of blocks. + // Note that a block's stored height is zero-based, but parts of the code use + // one-based height. + return h; +} diff --git a/src/blockchain_converter/blockchain_export.h b/src/blockchain_converter/bootstrap_file.h index 43e25c039..5fb8a1d4a 100644 --- a/src/blockchain_converter/blockchain_export.h +++ b/src/blockchain_converter/bootstrap_file.h @@ -28,16 +28,28 @@ #pragma once -#include <boost/archive/binary_oarchive.hpp> #include <boost/iostreams/stream_buffer.hpp> #include <boost/iostreams/stream.hpp> #include <boost/iostreams/device/back_inserter.hpp> + +#include <boost/iostreams/filtering_streambuf.hpp> + #include "cryptonote_core/cryptonote_basic.h" #include "cryptonote_core/blockchain_storage.h" #include "cryptonote_core/blockchain.h" #include "blockchain_db/blockchain_db.h" #include "blockchain_db/lmdb/db_lmdb.h" +#include <algorithm> +#include <cstdio> +#include <fstream> +#include <boost/iostreams/copy.hpp> +#include <atomic> + +#include "common/command_line.h" +#include "version.h" + + // CONFIG: choose one of the three #define's // // DB_MEMORY is a sensible default for users migrating to LMDB, as it allows @@ -49,11 +61,24 @@ // to use global compile-time setting (DB_MEMORY or DB_LMDB): // #define SOURCE_DB BLOCKCHAIN_DB + +// bounds checking is done before writing to buffer, but buffer size +// should be a sensible maximum +#define BUFFER_SIZE 1000000 +#define NUM_BLOCKS_PER_CHUNK 1 +#define BLOCKCHAIN_RAW "blockchain.raw" + + using namespace cryptonote; -class BlockchainExport + +class BootstrapFile { public: + + uint64_t count_blocks(const std::string& dir_path); + uint64_t seek_to_first_chunk(std::ifstream& import_file); + #if SOURCE_DB == DB_MEMORY bool store_blockchain_raw(cryptonote::blockchain_storage* cs, cryptonote::tx_memory_pool* txp, boost::filesystem::path& output_dir, uint64_t use_block_height=0); @@ -63,6 +88,7 @@ public: #endif protected: + #if SOURCE_DB == DB_MEMORY blockchain_storage* m_blockchain_storage; #else @@ -72,16 +98,19 @@ protected: tx_memory_pool* m_tx_pool; typedef std::vector<char> buffer_type; std::ofstream * m_raw_data_file; - boost::archive::binary_oarchive * m_raw_archive; buffer_type m_buffer; boost::iostreams::stream<boost::iostreams::back_insert_device<buffer_type>>* m_output_stream; // open export file for write - bool open(const boost::filesystem::path& dir_path); + bool open_writer(const boost::filesystem::path& dir_path); + bool initialize_file(); bool close(); void write_block(block& block); - void serialize_block_to_text_buffer(const block& block); - void buffer_serialize_tx(const transaction& tx); - void buffer_write_num_txs(const std::list<transaction> txs); void flush_chunk(); + +private: + + uint64_t m_height; + uint64_t m_cur_height; // tracks current height during export + uint32_t m_max_chunk; }; diff --git a/src/blockchain_converter/import.h b/src/blockchain_converter/bootstrap_serialization.h index 632b4c0d9..6fa949353 100644 --- a/src/blockchain_converter/import.h +++ b/src/blockchain_converter/bootstrap_serialization.h @@ -28,10 +28,61 @@ #pragma once -// TODO: bounds checking is done before writing to buffer, but buffer size -// should be a sensible maximum -#define BUFFER_SIZE 1000000 -#define NUM_BLOCKS_PER_CHUNK 1 -#define STR_LENGTH_OF_INT 9 -#define STR_FORMAT_OF_INT "%09d" -#define BLOCKCHAIN_RAW "blockchain.raw" +#include "cryptonote_core/cryptonote_boost_serialization.h" +#include "cryptonote_core/difficulty.h" + + +namespace cryptonote +{ + namespace bootstrap + { + + struct file_info + { + uint8_t major_version; + uint8_t minor_version; + uint32_t header_size; + + BEGIN_SERIALIZE_OBJECT() + FIELD(major_version); + FIELD(minor_version); + VARINT_FIELD(header_size); + END_SERIALIZE() + }; + + struct blocks_info + { + // block heights of file's first and last blocks, zero-based indexes + uint64_t block_first; + uint64_t block_last; + + // file position, for directly reading last block + uint64_t block_last_pos; + + BEGIN_SERIALIZE_OBJECT() + VARINT_FIELD(block_first); + VARINT_FIELD(block_last); + VARINT_FIELD(block_last_pos); + END_SERIALIZE() + }; + + struct block_package + { + cryptonote::block block; + std::vector<transaction> txs; + size_t block_size; + difficulty_type cumulative_difficulty; + uint64_t coins_generated; + + BEGIN_SERIALIZE() + FIELD(block) + FIELD(txs) + VARINT_FIELD(block_size) + VARINT_FIELD(cumulative_difficulty) + VARINT_FIELD(coins_generated) + END_SERIALIZE() + }; + + } + +} |