From b0bf49a65a38ceb1acfbc8e17f40e63383ac140d Mon Sep 17 00:00:00 2001 From: jeffro256 Date: Sun, 16 Jul 2023 11:56:36 -0500 Subject: blockchain_db: add k-anonymity to txid fetching Read more about k-anonymity [here](https://en.wikipedia.org/wiki/K-anonymity). We implement this feature in the monero daemon for transactions by providing a "Txid Template", which is simply a txid with all but `num_matching_bits` bits zeroed out, and the number `num_matching_bits`. We add an operation to `BlockchainLMDB` called `get_txids_loose` which takes a txid template and returns all txids in the database (chain and mempool) that satisfy that template. Thus, a client can ask about a specific transaction from a daemon without revealing the exact transaction they are inquiring about. The client can control the statistical chance that other TXIDs (besides the one in question) match the txid template sent to the daemon up to a power of 2. For example, if a client sets their `num_matching_bits` to 5, then statistically any txid has a 1/(2^5) chance to match. With `num_matching_bits`=10, there is a 1/(2^10) chance, so on and so forth. Co-authored-by: ACK-J <60232273+ACK-J@users.noreply.github.com> --- src/blockchain_db/blockchain_db.h | 15 +++++ src/blockchain_db/lmdb/db_lmdb.cpp | 52 ++++++++++++++++++ src/blockchain_db/lmdb/db_lmdb.h | 2 + src/blockchain_db/testdb.h | 1 + src/cryptonote_basic/cryptonote_basic_impl.cpp | 47 ++++++++++++++++ src/cryptonote_basic/cryptonote_basic_impl.h | 35 ++++++++++++ src/rpc/core_rpc_server.cpp | 76 ++++++++++++++++++++++++++ src/rpc/core_rpc_server.h | 2 + src/rpc/core_rpc_server_commands_defs.h | 27 +++++++++ 9 files changed, 257 insertions(+) (limited to 'src') diff --git a/src/blockchain_db/blockchain_db.h b/src/blockchain_db/blockchain_db.h index 835d6dcad..f3e962c81 100644 --- a/src/blockchain_db/blockchain_db.h +++ b/src/blockchain_db/blockchain_db.h @@ -1305,6 +1305,21 @@ public: */ virtual bool get_pruned_tx_blobs_from(const crypto::hash& h, size_t count, std::vector &bd) const = 0; + /** + * @brief Get all txids in the database (chain and pool) that match a certain nbits txid template + * + * To be more specific, for all `dbtxid` txids in the database, return `dbtxid` if + * `0 == cryptonote::compare_hash32_reversed_nbits(txid_template, dbtxid, nbits)`. + * + * @param txid_template the transaction id template + * @param nbits number of bits to compare against in the template + * @param max_num_txs The maximum number of txids to match, if we hit this limit, throw early + * @return std::vector the list of all matching txids + * + * @throw TX_EXISTS if the number of txids that match exceed `max_num_txs` + */ + virtual std::vector get_txids_loose(const crypto::hash& txid_template, std::uint32_t nbits, uint64_t max_num_txs = 0) = 0; + /** * @brief fetches a variable number of blocks and transactions from the given height, in canonical blockchain order * diff --git a/src/blockchain_db/lmdb/db_lmdb.cpp b/src/blockchain_db/lmdb/db_lmdb.cpp index 4178c862b..2c015faee 100644 --- a/src/blockchain_db/lmdb/db_lmdb.cpp +++ b/src/blockchain_db/lmdb/db_lmdb.cpp @@ -3144,6 +3144,58 @@ bool BlockchainLMDB::get_pruned_tx_blobs_from(const crypto::hash& h, size_t coun return true; } +std::vector BlockchainLMDB::get_txids_loose(const crypto::hash& txid_template, std::uint32_t bits, uint64_t max_num_txs) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + std::vector matching_hashes; + + TXN_PREFIX_RDONLY(); // Start a read-only transaction + RCURSOR(tx_indices); // Open cursors to the tx_indices and txpool_meta databases + RCURSOR(txpool_meta); + + // Search on-chain and pool transactions together, starting with on-chain txs + MDB_cursor* cursor = m_cur_tx_indices; + MDB_val k = zerokval; // tx_indicies DB uses a dummy key + MDB_val_set(v, txid_template); // tx_indicies DB indexes data values by crypto::hash value on front + MDB_cursor_op op = MDB_GET_BOTH_RANGE; // Set the cursor to the first key/value pair >= the given key + bool doing_chain = true; // this variable tells us whether we are processing chain or pool txs + while (1) + { + const int get_result = mdb_cursor_get(cursor, &k, &v, op); + op = doing_chain ? MDB_NEXT_DUP : MDB_NEXT; // Set the cursor to the next key/value pair + if (get_result && get_result != MDB_NOTFOUND) + throw0(DB_ERROR(lmdb_error("DB error attempting to fetch txid range", get_result).c_str())); + + // In tx_indicies, the hash is stored at the data, in txpool_meta at the key + const crypto::hash* const p_dbtxid = (const crypto::hash*)(doing_chain ? v.mv_data : k.mv_data); + + // Check if we reached the end of a DB or the hashes no longer match the template + if (get_result == MDB_NOTFOUND || compare_hash32_reversed_nbits(txid_template, *p_dbtxid, bits)) + { + if (doing_chain) // done with chain processing, switch to pool processing + { + k.mv_size = sizeof(crypto::hash); // txpool_meta DB is indexed using crypto::hash as keys + k.mv_data = (void*) txid_template.data; + cursor = m_cur_txpool_meta; // switch databases + op = MDB_SET_RANGE; // Set the cursor to the first key >= the given key + doing_chain = false; + continue; + } + break; // if we get to this point, then we finished pool processing and we are done + } + else if (matching_hashes.size() >= max_num_txs && max_num_txs != 0) + throw0(TX_EXISTS("number of tx hashes in template range exceeds maximum")); + + matching_hashes.push_back(*p_dbtxid); + } + + TXN_POSTFIX_RDONLY(); // End the read-only transaction + + return matching_hashes; +} + bool BlockchainLMDB::get_blocks_from(uint64_t start_height, size_t min_block_count, size_t max_block_count, size_t max_tx_count, size_t max_size, std::vector, std::vector>>>& blocks, bool pruned, bool skip_coinbase, bool get_miner_tx_hash) const { LOG_PRINT_L3("BlockchainLMDB::" << __func__); diff --git a/src/blockchain_db/lmdb/db_lmdb.h b/src/blockchain_db/lmdb/db_lmdb.h index c352458b4..95e7b2aa4 100644 --- a/src/blockchain_db/lmdb/db_lmdb.h +++ b/src/blockchain_db/lmdb/db_lmdb.h @@ -262,6 +262,8 @@ public: virtual bool get_prunable_tx_blob(const crypto::hash& h, cryptonote::blobdata &tx) const; virtual bool get_prunable_tx_hash(const crypto::hash& tx_hash, crypto::hash &prunable_hash) const; + virtual std::vector get_txids_loose(const crypto::hash& h, std::uint32_t bits, uint64_t max_num_txs = 0); + virtual uint64_t get_tx_count() const; virtual std::vector get_tx_list(const std::vector& hlist) const; diff --git a/src/blockchain_db/testdb.h b/src/blockchain_db/testdb.h index 946f26270..a27183b2c 100644 --- a/src/blockchain_db/testdb.h +++ b/src/blockchain_db/testdb.h @@ -71,6 +71,7 @@ public: virtual bool get_pruned_tx_blob(const crypto::hash& h, cryptonote::blobdata &tx) const override { return false; } virtual bool get_pruned_tx_blobs_from(const crypto::hash& h, size_t count, std::vector &bd) const override { return false; } virtual bool get_blocks_from(uint64_t start_height, size_t min_block_count, size_t max_block_count, size_t max_tx_count, size_t max_size, std::vector, std::vector>>>& blocks, bool pruned, bool skip_coinbase, bool get_miner_tx_hash) const override { return false; } + virtual std::vector get_txids_loose(const crypto::hash& h, std::uint32_t bits, uint64_t max_num_txs = 0) override { return {}; } virtual bool get_prunable_tx_blob(const crypto::hash& h, cryptonote::blobdata &tx) const override { return false; } virtual bool get_prunable_tx_hash(const crypto::hash& tx_hash, crypto::hash &prunable_hash) const override { return false; } virtual uint64_t get_block_height(const crypto::hash& h) const override { return 0; } diff --git a/src/cryptonote_basic/cryptonote_basic_impl.cpp b/src/cryptonote_basic/cryptonote_basic_impl.cpp index 9bde20609..7fe398283 100644 --- a/src/cryptonote_basic/cryptonote_basic_impl.cpp +++ b/src/cryptonote_basic/cryptonote_basic_impl.cpp @@ -310,6 +310,53 @@ namespace cryptonote { bool operator ==(const cryptonote::block& a, const cryptonote::block& b) { return cryptonote::get_block_hash(a) == cryptonote::get_block_hash(b); } + //-------------------------------------------------------------------------------- + int compare_hash32_reversed_nbits(const crypto::hash& ha, const crypto::hash& hb, unsigned int nbits) + { + static_assert(sizeof(uint64_t) * 4 == sizeof(crypto::hash), "hash is wrong size"); + + // We have to copy these buffers b/c of the strict aliasing rule + uint64_t va[4]; + memcpy(va, &ha, sizeof(crypto::hash)); + uint64_t vb[4]; + memcpy(vb, &hb, sizeof(crypto::hash)); + + for (int n = 3; n >= 0 && nbits; --n) + { + const unsigned int msb_nbits = std::min(64, nbits); + const uint64_t lsb_nbits_dropped = static_cast(64 - msb_nbits); + const uint64_t van = SWAP64LE(va[n]) >> lsb_nbits_dropped; + const uint64_t vbn = SWAP64LE(vb[n]) >> lsb_nbits_dropped; + nbits -= msb_nbits; + + if (van < vbn) return -1; else if (van > vbn) return 1; + } + + return 0; + } + + crypto::hash make_hash32_loose_template(unsigned int nbits, const crypto::hash& h) + { + static_assert(sizeof(uint64_t) * 4 == sizeof(crypto::hash), "hash is wrong size"); + + // We have to copy this buffer b/c of the strict aliasing rule + uint64_t vh[4]; + memcpy(vh, &h, sizeof(crypto::hash)); + + for (int n = 3; n >= 0; --n) + { + const unsigned int msb_nbits = std::min(64, nbits); + const uint64_t mask = msb_nbits ? (~((std::uint64_t(1) << (64 - msb_nbits)) - 1)) : 0; + nbits -= msb_nbits; + + vh[n] &= SWAP64LE(mask); + } + + crypto::hash res; + memcpy(&res, vh, sizeof(crypto::hash)); + return res; + } + //-------------------------------------------------------------------------------- } //-------------------------------------------------------------------------------- diff --git a/src/cryptonote_basic/cryptonote_basic_impl.h b/src/cryptonote_basic/cryptonote_basic_impl.h index 984bee19f..53dbc47d7 100644 --- a/src/cryptonote_basic/cryptonote_basic_impl.h +++ b/src/cryptonote_basic/cryptonote_basic_impl.h @@ -112,6 +112,41 @@ namespace cryptonote { bool operator ==(const cryptonote::transaction& a, const cryptonote::transaction& b); bool operator ==(const cryptonote::block& a, const cryptonote::block& b); + + /************************************************************************/ + /* K-anonymity helper functions */ + /************************************************************************/ + + /** + * @brief Compares two hashes up to `nbits` bits in reverse byte order ("LMDB key order") + * + * The comparison essentially goes from the 31th, 30th, 29th, ..., 0th byte and compares the MSBs + * to the LSBs in each byte, up to `nbits` bits. If we use up `nbits` bits before finding a + * difference in the bits between the two hashes, we return 0. If we encounter a zero bit in `ha` + * where `hb` has a one in that bit place, then we reutrn -1. If the converse scenario happens, + * we return a 1. When `nbits` == 256 (there are 256 bits in `crypto::hash`), calling this is + * functionally identical to `BlockchainLMDB::compare_hash32`. + * + * @param ha left hash + * @param hb right hash + * @param nbits the number of bits to consider, a higher value means a finer comparison + * @return int 0 if ha == hb, -1 if ha < hb, 1 if ha > hb + */ + int compare_hash32_reversed_nbits(const crypto::hash& ha, const crypto::hash& hb, unsigned int nbits); + + /** + * @brief Make a template which matches `h` in LMDB order up to `nbits` bits, safe for k-anonymous fetching + * + * To be more technical, this function creates a hash which satifies the following property: + * For all `H_prime` s.t. `0 == compare_hash32_reversed_nbits(real_hash, H_prime, nbits)`, + * `1 > compare_hash32_reversed_nbits(real_hash, H_prime, 256)`. + * In other words, we return the "least" hash nbit-equal to `real_hash`. + * + * @param nbits The number of "MSB" bits to include in the template + * @param real_hash The original hash which contains more information than we want to disclose + * @return crypto::hash hash template that contains `nbits` bits matching real_hash and no more + */ + crypto::hash make_hash32_loose_template(unsigned int nbits, const crypto::hash& real_hash); } bool parse_hash256(const std::string &str_hash, crypto::hash& hash); diff --git a/src/rpc/core_rpc_server.cpp b/src/rpc/core_rpc_server.cpp index 0adf0b65e..97fd5fe13 100644 --- a/src/rpc/core_rpc_server.cpp +++ b/src/rpc/core_rpc_server.cpp @@ -3533,6 +3533,82 @@ namespace cryptonote return true; } //------------------------------------------------------------------------------------------------------------------------------ + bool core_rpc_server::on_get_txids_loose(const COMMAND_RPC_GET_TXIDS_LOOSE::request& req, COMMAND_RPC_GET_TXIDS_LOOSE::response& res, epee::json_rpc::error& error_resp, const connection_context *ctx) + { + RPC_TRACKER(get_txids_loose); + + // Maybe don't use bootstrap since this endpoint is meant to retreive TXIDs w/ k-anonymity, + // so shunting this request to a random node seems counterproductive. + +#if BYTE_ORDER == LITTLE_ENDIAN + const uint64_t max_num_txids = RESTRICTED_SPENT_KEY_IMAGES_COUNT * (m_restricted ? 1 : 10); + + // Sanity check parameters + if (req.num_matching_bits > 256) + { + error_resp.code = CORE_RPC_ERROR_CODE_WRONG_PARAM; + error_resp.message = "There are only 256 bits in a hash, you gave too many"; + return false; + } + + // Attempt to guess when bit count is too low before fetching, within a certain margin of error + const uint64_t num_txs_ever = m_core.get_blockchain_storage().get_db().get_tx_count(); + const uint64_t num_expected_fetch = (num_txs_ever >> std::min((int) req.num_matching_bits, 63)); + const uint64_t max_num_txids_with_margin = 2 * max_num_txids; + if (num_expected_fetch > max_num_txids_with_margin) + { + error_resp.code = CORE_RPC_ERROR_CODE_WRONG_PARAM; + error_resp.message = "Trying to search with too few matching bits, detected before fetching"; + return false; + } + + // Convert txid template to a crypto::hash + crypto::hash search_hash; + if (!epee::string_tools::hex_to_pod(req.txid_template, search_hash)) + { + error_resp.code = CORE_RPC_ERROR_CODE_WRONG_PARAM; + error_resp.message = "Could not decode hex txid"; + return false; + } + // Check that txid template is zeroed correctly for number of given matchign bits + else if (search_hash != make_hash32_loose_template(req.num_matching_bits, search_hash)) + { + error_resp.code = CORE_RPC_ERROR_CODE_WRONG_PARAM; + error_resp.message = "Txid template is not zeroed correctly for number of bits. You could be leaking true txid!"; + return false; + } + + try + { + // Do the DB fetch + const auto txids = m_core.get_blockchain_storage().get_db().get_txids_loose(search_hash, req.num_matching_bits, max_num_txids); + // Fill out response form + for (const auto& txid : txids) + res.txids.emplace_back(epee::string_tools::pod_to_hex(txid)); + } + catch (const TX_EXISTS&) + { + error_resp.code = CORE_RPC_ERROR_CODE_WRONG_PARAM; + error_resp.message = "Trying to search with too few matching bits"; + return false; + } + catch (const std::exception& e) + { + error_resp.code = CORE_RPC_ERROR_CODE_INTERNAL_ERROR; + error_resp.message = std::string("Error during get_txids_loose: ") + e.what(); + return false; + } + + res.status = CORE_RPC_STATUS_OK; + return true; +#else // BYTE_ORDER == BIG_ENDIAN + // BlockchainLMDB::compare_hash32 has different key ordering (thus different txid templates) on BE systems + error_resp.code = CORE_RPC_ERROR_CODE_INTERNAL_ERROR; + error_resp.message = "Due to implementation details, this feature is not available on big-endian daemons"; + return false; +#endif + } + //------------------------------------------------------------------------------------------------------------------------------ bool core_rpc_server::on_rpc_access_submit_nonce(const COMMAND_RPC_ACCESS_SUBMIT_NONCE::request& req, COMMAND_RPC_ACCESS_SUBMIT_NONCE::response& res, epee::json_rpc::error& error_resp, const connection_context *ctx) { RPC_TRACKER(rpc_access_submit_nonce); diff --git a/src/rpc/core_rpc_server.h b/src/rpc/core_rpc_server.h index 790d5eb23..7c31d2539 100644 --- a/src/rpc/core_rpc_server.h +++ b/src/rpc/core_rpc_server.h @@ -178,6 +178,7 @@ namespace cryptonote MAP_JON_RPC_WE("get_output_distribution", on_get_output_distribution, COMMAND_RPC_GET_OUTPUT_DISTRIBUTION) MAP_JON_RPC_WE_IF("prune_blockchain", on_prune_blockchain, COMMAND_RPC_PRUNE_BLOCKCHAIN, !m_restricted) MAP_JON_RPC_WE_IF("flush_cache", on_flush_cache, COMMAND_RPC_FLUSH_CACHE, !m_restricted) + MAP_JON_RPC_WE("get_txids_loose", on_get_txids_loose, COMMAND_RPC_GET_TXIDS_LOOSE) MAP_JON_RPC_WE("rpc_access_info", on_rpc_access_info, COMMAND_RPC_ACCESS_INFO) MAP_JON_RPC_WE("rpc_access_submit_nonce",on_rpc_access_submit_nonce, COMMAND_RPC_ACCESS_SUBMIT_NONCE) MAP_JON_RPC_WE("rpc_access_pay", on_rpc_access_pay, COMMAND_RPC_ACCESS_PAY) @@ -255,6 +256,7 @@ namespace cryptonote bool on_get_output_distribution(const COMMAND_RPC_GET_OUTPUT_DISTRIBUTION::request& req, COMMAND_RPC_GET_OUTPUT_DISTRIBUTION::response& res, epee::json_rpc::error& error_resp, const connection_context *ctx = NULL); bool on_prune_blockchain(const COMMAND_RPC_PRUNE_BLOCKCHAIN::request& req, COMMAND_RPC_PRUNE_BLOCKCHAIN::response& res, epee::json_rpc::error& error_resp, const connection_context *ctx = NULL); bool on_flush_cache(const COMMAND_RPC_FLUSH_CACHE::request& req, COMMAND_RPC_FLUSH_CACHE::response& res, epee::json_rpc::error& error_resp, const connection_context *ctx = NULL); + bool on_get_txids_loose(const COMMAND_RPC_GET_TXIDS_LOOSE::request& req, COMMAND_RPC_GET_TXIDS_LOOSE::response& res, epee::json_rpc::error& error_resp, const connection_context *ctx = NULL); bool on_rpc_access_info(const COMMAND_RPC_ACCESS_INFO::request& req, COMMAND_RPC_ACCESS_INFO::response& res, epee::json_rpc::error& error_resp, const connection_context *ctx = NULL); bool on_rpc_access_submit_nonce(const COMMAND_RPC_ACCESS_SUBMIT_NONCE::request& req, COMMAND_RPC_ACCESS_SUBMIT_NONCE::response& res, epee::json_rpc::error& error_resp, const connection_context *ctx = NULL); bool on_rpc_access_pay(const COMMAND_RPC_ACCESS_PAY::request& req, COMMAND_RPC_ACCESS_PAY::response& res, epee::json_rpc::error& error_resp, const connection_context *ctx = NULL); diff --git a/src/rpc/core_rpc_server_commands_defs.h b/src/rpc/core_rpc_server_commands_defs.h index 7f1581d0c..c7a669a9f 100644 --- a/src/rpc/core_rpc_server_commands_defs.h +++ b/src/rpc/core_rpc_server_commands_defs.h @@ -2790,4 +2790,31 @@ namespace cryptonote typedef epee::misc_utils::struct_init response; }; + struct COMMAND_RPC_GET_TXIDS_LOOSE + { + struct request_t: public rpc_request_base + { + std::string txid_template; + std::uint32_t num_matching_bits; + + BEGIN_KV_SERIALIZE_MAP() + KV_SERIALIZE_PARENT(rpc_request_base) + KV_SERIALIZE(txid_template) + KV_SERIALIZE(num_matching_bits) + END_KV_SERIALIZE_MAP() + }; + typedef epee::misc_utils::struct_init request; + + struct response_t: public rpc_response_base + { + std::vector txids; + + BEGIN_KV_SERIALIZE_MAP() + KV_SERIALIZE_PARENT(rpc_response_base) + KV_SERIALIZE(txids) + END_KV_SERIALIZE_MAP() + }; + typedef epee::misc_utils::struct_init response; + }; + } -- cgit v1.2.3