diff options
89 files changed, 40173 insertions, 151 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8af4ef69e..f2b553d92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,6 +65,21 @@ option(BOOST_IGNORE_SYSTEM_PATHS "Ignore boost system paths for local boost inst set_property(GLOBAL PROPERTY USE_FOLDERS ON) enable_testing() +# Check whether we're on a 32-bit or 64-bit system +if(CMAKE_SIZEOF_VOID_P EQUAL "8") + set(DEFAULT_BUILD_64 ON) +else() + set(DEFAULT_BUILD_64 OFF) +endif() +option(BUILD_64 "Build for 64-bit? 'OFF' builds for 32-bit." ${DEFAULT_BUILD_64}) + +if(BUILD_64) + set(ARCH_WIDTH "64") +else() + set(ARCH_WIDTH "32") +endif() +message(STATUS "Building for a ${ARCH_WIDTH}-bit system") + # Check if we're on FreeBSD so we can exclude the local miniupnpc (it should be installed from ports instead) # CMAKE_SYSTEM_NAME checks are commonly known, but specifically taken from libsdl's CMakeLists if(CMAKE_SYSTEM_NAME MATCHES "kFreeBSD.*") @@ -96,16 +111,16 @@ endif() option(STATIC "Link libraries statically" ${DEFAULT_STATIC}) if(MINGW) - get_filename_component(msys2_install_path "[HKEY_CURRENT_USER\\Software\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\MSYS2 64bit;InstallLocation]" ABSOLUTE) - set(CMAKE_INCLUDE_PATH "${msys2_install_path}/mingw64/include") + get_filename_component(msys2_install_path "[HKEY_CURRENT_USER\\Software\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\MSYS2 ${ARCH_WIDTH}bit;InstallLocation]" ABSOLUTE) + set(CMAKE_INCLUDE_PATH "${msys2_install_path}/mingw${ARCH_WIDTH}/include") # This is necessary because otherwise CMake will make Boost libraries -lfoo # rather than a full path. Unfortunately, this makes the shared libraries get # linked due to a bug in CMake which misses putting -static flags around the # -lfoo arguments. list(REMOVE_ITEM CMAKE_C_IMPLICIT_LINK_DIRECTORIES - "${msys2_install_path}/mingw64/lib") + "${msys2_install_path}/mingw${ARCH_WIDTH}/lib") list(REMOVE_ITEM CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES - "${msys2_install_path}/mingw64/lib") + "${msys2_install_path}/mingw${ARCH_WIDTH}/lib") endif() if(STATIC) @@ -116,6 +131,27 @@ if(STATIC) endif() endif() +# default database: +# should be lmdb for testing, memory for production still +# set(DATABASE memory) +set(DATABASE lmdb) + +if (DEFINED ENV{DATABASE}) + set(DATABASE $ENV{DATABASE}) + message(STATUS "DATABASE set: ${DATABASE}") +else() + message(STATUS "Could not find DATABASE in env (not required unless you want to change database type from default: ${DATABASE})") +endif() +if (DATABASE STREQUAL "lmdb") + set(BLOCKCHAIN_DB DB_LMDB) +elseif (DATABASE STREQUAL "memory") + set(BLOCKCHAIN_DB DB_MEMORY) +else() + die("Invalid database type: ${DATABASE}") +endif() + +add_definitions("-DBLOCKCHAIN_DB=${BLOCKCHAIN_DB}") + if (UNIX AND NOT APPLE) # Note that at the time of this writing the -Wstrict-prototypes flag added below will make this fail find_package(Threads) @@ -138,6 +174,9 @@ link_directories(${UNBOUND_LIBRARY_DIRS}) # Final setup for rapidjson include_directories(external/rapidjson) +# Final setup for liblmdb +include_directories(${LMDB_INCLUDE}) + if(MSVC) add_definitions("/bigobj /MP /W3 /GS- /D_CRT_SECURE_NO_WARNINGS /wd4996 /wd4345 /D_WIN32_WINNT=0x0600 /DWIN32_LEAN_AND_MEAN /DGTEST_HAS_TR1_TUPLE=0 /FIinline_c.h /D__SSE4_1__") # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Dinline=__inline") diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 80e574b06..245758d9f 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -97,3 +97,5 @@ else() die("Found libunbound includes, but could not find libunbound library. Please make sure you have installed libunbound or libunbound-dev or the equivalent") endif() endif() + +add_subdirectory(db_drivers) diff --git a/external/db_drivers/CMakeLists.txt b/external/db_drivers/CMakeLists.txt new file mode 100644 index 000000000..5bc27ed1f --- /dev/null +++ b/external/db_drivers/CMakeLists.txt @@ -0,0 +1,35 @@ +# Copyright (c) 2014-2015, The Monero Project +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are +# permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, this list +# of conditions and the following disclaimer in the documentation and/or other +# materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors may be +# used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# We aren't even going to check the system for an installed LMDB driver, as it is too +# critical a consensus component to rely on dynamically linked libraries +message(STATUS "Using ${ARCH_WIDTH}-bit LMDB from source tree") +add_subdirectory(liblmdb${ARCH_WIDTH}) +set(LMDB_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/liblmdb${ARCH_WIDTH}" CACHE STRING "LMDB Include path") + +set(LMDB_LIBRARY "lmdb" CACHE STRING "LMDB Library name") diff --git a/external/db_drivers/liblmdb32/.gitignore b/external/db_drivers/liblmdb32/.gitignore new file mode 100644 index 000000000..f3277afe4 --- /dev/null +++ b/external/db_drivers/liblmdb32/.gitignore @@ -0,0 +1,20 @@ +mtest +mtest[23456] +testdb +mdb_copy +mdb_stat +*.[ao] +*.so +*.exe +*[~#] +*.bak +*.orig +*.rej +*.gcov +*.gcda +*.gcno +core +core.* +valgrind.* +man/ +html/ diff --git a/external/db_drivers/liblmdb32/CHANGES b/external/db_drivers/liblmdb32/CHANGES new file mode 100644 index 000000000..34049ba7a --- /dev/null +++ b/external/db_drivers/liblmdb32/CHANGES @@ -0,0 +1,112 @@ +LMDB 0.9 Change Log + +LMDB 0.9.14 Release (2014/09/20) + Fix to support 64K page size (ITS#7713) + Fix to persist decreased as well as increased mapsizes (ITS#7789) + Fix cursor bug when deleting last node of a DUPSORT key + Fix mdb_env_info to return FIXEDMAP address + Fix ambiguous error code from writing to closed DBI (ITS#7825) + Fix mdb_copy copying past end of file (ITS#7886) + Fix cursor bugs from page_merge/rebalance + Fix to dirty fewer pages in deletes (mdb_page_loose()) + Fix mdb_dbi_open creating subDBs (ITS#7917) + Fix mdb_cursor_get(_DUP) with single value (ITS#7913) + Fix Windows compat issues in mtests (ITS#7879) + Add compacting variant of mdb_copy + Add BigEndian integer key compare code + Add mdb_dump/mdb_load utilities + +LMDB 0.9.13 Release (2014/06/18) + Fix mdb_page_alloc unlimited overflow page search + Documentation + Re-fix MDB_CURRENT doc (ITS#7793) + Fix MDB_GET_MULTIPLE/MDB_NEXT_MULTIPLE doc + +LMDB 0.9.12 Release (2014/06/13) + Fix MDB_GET_BOTH regression (ITS#7875,#7681) + Fix MDB_MULTIPLE writing multiple keys (ITS#7834) + Fix mdb_rebalance (ITS#7829) + Fix mdb_page_split (ITS#7815) + Fix md_entries count (ITS#7861,#7828,#7793) + Fix MDB_CURRENT (ITS#7793) + Fix possible crash on Windows DLL detach + Misc code cleanup + Documentation + mdb_cursor_put: cursor moves on error (ITS#7771) + + +LMDB 0.9.11 Release (2014/01/15) + Add mdb_env_set_assert() (ITS#7775) + Fix: invalidate txn on page allocation errors (ITS#7377) + Fix xcursor tracking in mdb_cursor_del0() (ITS#7771) + Fix corruption from deletes (ITS#7756) + Fix Windows/MSVC build issues + Raise safe limit of max MDB_MAXKEYSIZE + Misc code cleanup + Documentation + Remove spurious note about non-overlapping flags (ITS#7665) + +LMDB 0.9.10 Release (2013/11/12) + Add MDB_NOMEMINIT option + Fix mdb_page_split() again (ITS#7589) + Fix MDB_NORDAHEAD definition (ITS#7734) + Fix mdb_cursor_del() positioning (ITS#7733) + Partial fix for larger page sizes (ITS#7713) + Fix Windows64/MSVC build issues + +LMDB 0.9.9 Release (2013/10/24) + Add mdb_env_get_fd() + Add MDB_NORDAHEAD option + Add MDB_NOLOCK option + Avoid wasting space in mdb_page_split() (ITS#7589) + Fix mdb_page_merge() cursor fixup (ITS#7722) + Fix mdb_cursor_del() on last delete (ITS#7718) + Fix adding WRITEMAP on existing env (ITS#7715) + Fix nested txns (ITS#7515) + Fix mdb_env_copy() O_DIRECT bug (ITS#7682) + Fix mdb_cursor_set(SET_RANGE) return code (ITS#7681) + Fix mdb_rebalance() cursor fixup (ITS#7701) + Misc code cleanup + Documentation + Note that by default, readers need write access + + +LMDB 0.9.8 Release (2013/09/09) + Allow mdb_env_set_mapsize() on an open environment + Fix mdb_dbi_flags() (ITS#7672) + Fix mdb_page_unspill() in nested txns + Fix mdb_cursor_get(CURRENT|NEXT) after a delete + Fix mdb_cursor_get(DUP) to always return key (ITS#7671) + Fix mdb_cursor_del() to always advance to next item (ITS#7670) + Fix mdb_cursor_set(SET_RANGE) for tree with single page (ITS#7681) + Fix mdb_env_copy() retry open if O_DIRECT fails (ITS#7682) + Tweak mdb_page_spill() to be less aggressive + Documentation + Update caveats since mdb_reader_check() added in 0.9.7 + +LMDB 0.9.7 Release (2013/08/17) + Don't leave stale lockfile on failed RDONLY open (ITS#7664) + Fix mdb_page_split() ref beyond cursor depth + Fix read txn data race (ITS#7635) + Fix mdb_rebalance (ITS#7536, #7538) + Fix mdb_drop() (ITS#7561) + Misc DEBUG macro fixes + Add MDB_NOTLS envflag + Add mdb_env_copyfd() + Add mdb_txn_env() (ITS#7660) + Add mdb_dbi_flags() (ITS#7661) + Add mdb_env_get_maxkeysize() + Add mdb_env_reader_list()/mdb_env_reader_check() + Add mdb_page_spill/unspill, remove hard txn size limit + Use shorter names for semaphores (ITS#7615) + Build + Fix install target (ITS#7656) + Documentation + Misc updates for cursors, DB handles, data lifetime + +LMDB 0.9.6 Release (2013/02/25) + Many fixes/enhancements + +LMDB 0.9.5 Release (2012/11/30) + Renamed from libmdb to liblmdb + Many fixes/enhancements diff --git a/external/db_drivers/liblmdb32/CMakeLists.txt b/external/db_drivers/liblmdb32/CMakeLists.txt new file mode 100644 index 000000000..62b800135 --- /dev/null +++ b/external/db_drivers/liblmdb32/CMakeLists.txt @@ -0,0 +1,39 @@ +# Copyright (c) 2014-2015, The Monero Project +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are +# permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, this list +# of conditions and the following disclaimer in the documentation and/or other +# materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors may be +# used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set (lmdb_sources +mdb.c +midl.c) + +include_directories("${CMAKE_CURRENT_SOURCE_DIR}") + +add_library(lmdb + ${lmdb_sources}) +target_link_libraries(lmdb + LINK_PRIVATE + ${CMAKE_THREAD_LIBS_INIT}) diff --git a/external/db_drivers/liblmdb32/COPYRIGHT b/external/db_drivers/liblmdb32/COPYRIGHT new file mode 100644 index 000000000..e0eb484c7 --- /dev/null +++ b/external/db_drivers/liblmdb32/COPYRIGHT @@ -0,0 +1,20 @@ +Copyright 2011-2014 Howard Chu, Symas Corp. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted only as authorized by the OpenLDAP +Public License. + +A copy of this license is available in the file LICENSE in the +top-level directory of the distribution or, alternatively, at +<http://www.OpenLDAP.org/license.html>. + +OpenLDAP is a registered trademark of the OpenLDAP Foundation. + +Individual files and/or contributed packages may be copyright by +other parties and/or subject to additional restrictions. + +This work also contains materials derived from public sources. + +Additional information about OpenLDAP can be obtained at +<http://www.openldap.org/>. diff --git a/external/db_drivers/liblmdb32/Doxyfile b/external/db_drivers/liblmdb32/Doxyfile new file mode 100644 index 000000000..92d17b09e --- /dev/null +++ b/external/db_drivers/liblmdb32/Doxyfile @@ -0,0 +1,1631 @@ +# Doxyfile 1.7.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = LMDB + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen to replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +INLINE_GROUPED_CLASSES = YES +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = YES + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penality. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will rougly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = NO + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command <command> <input-file>, where <command> is the value of +# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = lmdb.h midl.h mdb.c midl.c + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command <filter> <input-file>, where <filter> +# is the value of the INPUT_FILTER tag, and <input-file> is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the stylesheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters"> +# Qt Help Project / Custom Filters</a>. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes"> +# Qt Help Project / Filter Attributes</a>. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvances is that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = YES + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = DEBUG=2 __GNUC__=1 + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = tooltag=./man1 + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will write a font called FreeSans.ttf to the output +# directory and reference it in all dot files that doxygen generates. This +# font does not include all possible unicode characters however, so when you need +# these (or just want a differently looking font) you can specify the font name +# using DOT_FONTNAME. You need need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = FreeSans.ttf + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = YES + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/external/db_drivers/liblmdb32/LICENSE b/external/db_drivers/liblmdb32/LICENSE new file mode 100644 index 000000000..05ad7571e --- /dev/null +++ b/external/db_drivers/liblmdb32/LICENSE @@ -0,0 +1,47 @@ +The OpenLDAP Public License + Version 2.8, 17 August 2003 + +Redistribution and use of this software and associated documentation +("Software"), with or without modification, are permitted provided +that the following conditions are met: + +1. Redistributions in source form must retain copyright statements + and notices, + +2. Redistributions in binary form must reproduce applicable copyright + statements and notices, this list of conditions, and the following + disclaimer in the documentation and/or other materials provided + with the distribution, and + +3. Redistributions must contain a verbatim copy of this document. + +The OpenLDAP Foundation may revise this license from time to time. +Each revision is distinguished by a version number. You may use +this Software under terms of this license revision or under the +terms of any subsequent revision of the license. + +THIS SOFTWARE IS PROVIDED BY THE OPENLDAP FOUNDATION AND ITS +CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE OPENLDAP FOUNDATION, ITS CONTRIBUTORS, OR THE AUTHOR(S) +OR OWNER(S) OF THE SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +The names of the authors and copyright holders must not be used in +advertising or otherwise to promote the sale, use or other dealing +in this Software without specific, written prior permission. Title +to copyright in this Software shall at all times remain with copyright +holders. + +OpenLDAP is a registered trademark of the OpenLDAP Foundation. + +Copyright 1999-2003 The OpenLDAP Foundation, Redwood City, +California, USA. All Rights Reserved. Permission to copy and +distribute verbatim copies of this document is granted. diff --git a/external/db_drivers/liblmdb32/Makefile b/external/db_drivers/liblmdb32/Makefile new file mode 100644 index 000000000..095e65587 --- /dev/null +++ b/external/db_drivers/liblmdb32/Makefile @@ -0,0 +1,98 @@ +# Makefile for liblmdb (Lightning memory-mapped database library). + +######################################################################## +# Configuration. The compiler options must enable threaded compilation. +# +# Preprocessor macros (for CPPFLAGS) of interest... +# Note that the defaults should already be correct for most +# platforms; you should not need to change any of these. +# Read their descriptions in mdb.c if you do: +# +# - MDB_USE_POSIX_SEM +# - MDB_DSYNC +# - MDB_FDATASYNC +# - MDB_USE_PWRITEV +# +# There may be other macros in mdb.c of interest. You should +# read mdb.c before changing any of them. +# +CC = gcc +W = -W -Wall -Wno-unused-parameter -Wbad-function-cast -Wuninitialized +THREADS = -pthread +OPT = -O2 -g +CFLAGS = $(THREADS) $(OPT) $(W) $(XCFLAGS) +LDLIBS = +SOLIBS = +prefix = /usr/local +XCFLAGS = -DVL32 + +######################################################################## + +IHDRS = lmdb.h +ILIBS = liblmdb.a liblmdb.so +IPROGS = mdb_stat mdb_copy mdb_dump mdb_load +IDOCS = mdb_stat.1 mdb_copy.1 mdb_dump.1 mdb_load.1 +PROGS = $(IPROGS) mtest mtest2 mtest3 mtest4 mtest5 +all: $(ILIBS) $(PROGS) + +install: $(ILIBS) $(IPROGS) $(IHDRS) + for f in $(IPROGS); do cp $$f $(DESTDIR)$(prefix)/bin; done + for f in $(ILIBS); do cp $$f $(DESTDIR)$(prefix)/lib; done + for f in $(IHDRS); do cp $$f $(DESTDIR)$(prefix)/include; done + for f in $(IDOCS); do cp $$f $(DESTDIR)$(prefix)/man/man1; done + +clean: + rm -rf $(PROGS) *.[ao] *.so *~ testdb + +test: all + mkdir testdb + ./mtest && ./mdb_stat testdb + +liblmdb.a: mdb.o midl.o + ar rs $@ mdb.o midl.o + +liblmdb.so: mdb.o midl.o +# $(CC) $(LDFLAGS) -pthread -shared -Wl,-Bsymbolic -o $@ mdb.o midl.o $(SOLIBS) + $(CC) $(LDFLAGS) -pthread -shared -o $@ mdb.o midl.o $(SOLIBS) + +mdb_stat: mdb_stat.o liblmdb.a +mdb_copy: mdb_copy.o liblmdb.a +mdb_dump: mdb_dump.o liblmdb.a +mdb_load: mdb_load.o liblmdb.a +mtest: mtest.o liblmdb.a +mtest2: mtest2.o liblmdb.a +mtest3: mtest3.o liblmdb.a +mtest4: mtest4.o liblmdb.a +mtest5: mtest5.o liblmdb.a +mtest6: mtest6.o liblmdb.a + +mdb.o: mdb.c lmdb.h midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -c mdb.c + +midl.o: midl.c midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -c midl.c + +%: %.o + $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ + +%.o: %.c lmdb.h + $(CC) $(CFLAGS) $(CPPFLAGS) -c $< + +COV_FLAGS=-fprofile-arcs -ftest-coverage +COV_OBJS=xmdb.o xmidl.o + +coverage: xmtest + for i in mtest*.c [0-9]*.c; do j=`basename \$$i .c`; $(MAKE) $$j.o; \ + gcc -o x$$j $$j.o $(COV_OBJS) -pthread $(COV_FLAGS); \ + rm -rf testdb; mkdir testdb; ./x$$j; done + gcov xmdb.c + gcov xmidl.c + +xmtest: mtest.o xmdb.o xmidl.o + gcc -o xmtest mtest.o xmdb.o xmidl.o -pthread $(COV_FLAGS) + +xmdb.o: mdb.c lmdb.h midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 $(COV_FLAGS) -c mdb.c -o $@ + +xmidl.o: midl.c midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 $(COV_FLAGS) -c midl.c -o $@ diff --git a/external/db_drivers/liblmdb32/lmdb.h b/external/db_drivers/liblmdb32/lmdb.h new file mode 100644 index 000000000..bdbb0b909 --- /dev/null +++ b/external/db_drivers/liblmdb32/lmdb.h @@ -0,0 +1,1553 @@ +/** @file lmdb.h + * @brief Lightning memory-mapped database library + * + * @mainpage Lightning Memory-Mapped Database Manager (LMDB) + * + * @section intro_sec Introduction + * LMDB is a Btree-based database management library modeled loosely on the + * BerkeleyDB API, but much simplified. The entire database is exposed + * in a memory map, and all data fetches return data directly + * from the mapped memory, so no malloc's or memcpy's occur during + * data fetches. As such, the library is extremely simple because it + * requires no page caching layer of its own, and it is extremely high + * performance and memory-efficient. It is also fully transactional with + * full ACID semantics, and when the memory map is read-only, the + * database integrity cannot be corrupted by stray pointer writes from + * application code. + * + * The library is fully thread-aware and supports concurrent read/write + * access from multiple processes and threads. Data pages use a copy-on- + * write strategy so no active data pages are ever overwritten, which + * also provides resistance to corruption and eliminates the need of any + * special recovery procedures after a system crash. Writes are fully + * serialized; only one write transaction may be active at a time, which + * guarantees that writers can never deadlock. The database structure is + * multi-versioned so readers run with no locks; writers cannot block + * readers, and readers don't block writers. + * + * Unlike other well-known database mechanisms which use either write-ahead + * transaction logs or append-only data writes, LMDB requires no maintenance + * during operation. Both write-ahead loggers and append-only databases + * require periodic checkpointing and/or compaction of their log or database + * files otherwise they grow without bound. LMDB tracks free pages within + * the database and re-uses them for new write operations, so the database + * size does not grow without bound in normal use. + * + * The memory map can be used as a read-only or read-write map. It is + * read-only by default as this provides total immunity to corruption. + * Using read-write mode offers much higher write performance, but adds + * the possibility for stray application writes thru pointers to silently + * corrupt the database. Of course if your application code is known to + * be bug-free (...) then this is not an issue. + * + * @section caveats_sec Caveats + * Troubleshooting the lock file, plus semaphores on BSD systems: + * + * - A broken lockfile can cause sync issues. + * Stale reader transactions left behind by an aborted program + * cause further writes to grow the database quickly, and + * stale locks can block further operation. + * + * Fix: Check for stale readers periodically, using the + * #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. Or just + * make all programs using the database close it; the lockfile + * is always reset on first open of the environment. + * + * - On BSD systems or others configured with MDB_USE_POSIX_SEM, + * startup can fail due to semaphores owned by another userid. + * + * Fix: Open and close the database as the user which owns the + * semaphores (likely last user) or as root, while no other + * process is using the database. + * + * Restrictions/caveats (in addition to those listed for some functions): + * + * - Only the database owner should normally use the database on + * BSD systems or when otherwise configured with MDB_USE_POSIX_SEM. + * Multiple users can cause startup to fail later, as noted above. + * + * - There is normally no pure read-only mode, since readers need write + * access to locks and lock file. Exceptions: On read-only filesystems + * or with the #MDB_NOLOCK flag described under #mdb_env_open(). + * + * - By default, in versions before 0.9.10, unused portions of the data + * file might receive garbage data from memory freed by other code. + * (This does not happen when using the #MDB_WRITEMAP flag.) As of + * 0.9.10 the default behavior is to initialize such memory before + * writing to the data file. Since there may be a slight performance + * cost due to this initialization, applications may disable it using + * the #MDB_NOMEMINIT flag. Applications handling sensitive data + * which must not be written should not use this flag. This flag is + * irrelevant when using #MDB_WRITEMAP. + * + * - A thread can only use one transaction at a time, plus any child + * transactions. Each transaction belongs to one thread. See below. + * The #MDB_NOTLS flag changes this for read-only transactions. + * + * - Use an MDB_env* in the process which opened it, without fork()ing. + * + * - Do not have open an LMDB database twice in the same process at + * the same time. Not even from a plain open() call - close()ing it + * breaks flock() advisory locking. + * + * - Avoid long-lived transactions. Read transactions prevent + * reuse of pages freed by newer write transactions, thus the + * database can grow quickly. Write transactions prevent + * other write transactions, since writes are serialized. + * + * - Avoid suspending a process with active transactions. These + * would then be "long-lived" as above. Also read transactions + * suspended when writers commit could sometimes see wrong data. + * + * ...when several processes can use a database concurrently: + * + * - Avoid aborting a process with an active transaction. + * The transaction becomes "long-lived" as above until a check + * for stale readers is performed or the lockfile is reset, + * since the process may not remove it from the lockfile. + * + * - If you do that anyway, do a periodic check for stale readers. Or + * close the environment once in a while, so the lockfile can get reset. + * + * - Do not use LMDB databases on remote filesystems, even between + * processes on the same host. This breaks flock() on some OSes, + * possibly memory map sync, and certainly sync between programs + * on different hosts. + * + * - Opening a database can fail if another process is opening or + * closing it at exactly the same time. + * + * @author Howard Chu, Symas Corporation. + * + * @copyright Copyright 2011-2014 Howard Chu, Symas Corp. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + * + * @par Derived From: + * This code is derived from btree.c written by Martin Hedenfalk. + * + * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef _LMDB_H_ +#define _LMDB_H_ + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** Unix permissions for creating files, or dummy definition for Windows */ +#ifdef _MSC_VER +typedef int mdb_mode_t; +#else +typedef mode_t mdb_mode_t; +#endif + +/** An abstraction for a file handle. + * On POSIX systems file handles are small integers. On Windows + * they're opaque pointers. + */ +#ifdef _WIN32 +typedef void *mdb_filehandle_t; +#else +typedef int mdb_filehandle_t; +#endif + +/** @defgroup mdb LMDB API + * @{ + * @brief OpenLDAP Lightning Memory-Mapped Database Manager + */ +/** @defgroup Version Version Macros + * @{ + */ +/** Library major version */ +#define MDB_VERSION_MAJOR 0 +/** Library minor version */ +#define MDB_VERSION_MINOR 9 +/** Library patch version */ +#define MDB_VERSION_PATCH 14 + +/** Combine args a,b,c into a single integer for easy version comparisons */ +#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) + +/** The full library version as a single integer */ +#define MDB_VERSION_FULL \ + MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) + +/** The release date of this library version */ +#define MDB_VERSION_DATE "September 20, 2014" + +/** A stringifier for the version info */ +#define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")" + +/** A helper for the stringifier macro */ +#define MDB_VERFOO(a,b,c,d) MDB_VERSTR(a,b,c,d) + +/** The full library version as a C string */ +#define MDB_VERSION_STRING \ + MDB_VERFOO(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH,MDB_VERSION_DATE) +/** @} */ + +/** @brief Opaque structure for a database environment. + * + * A DB environment supports multiple databases, all residing in the same + * shared-memory map. + */ +typedef struct MDB_env MDB_env; + +/** @brief Opaque structure for a transaction handle. + * + * All database operations require a transaction handle. Transactions may be + * read-only or read-write. + */ +typedef struct MDB_txn MDB_txn; + +/** @brief A handle for an individual database in the DB environment. */ +typedef unsigned int MDB_dbi; + +/** @brief Opaque structure for navigating through a database */ +typedef struct MDB_cursor MDB_cursor; + +/** @brief Generic structure used for passing keys and data in and out + * of the database. + * + * Values returned from the database are valid only until a subsequent + * update operation, or the end of the transaction. Do not modify or + * free them, they commonly point into the database itself. + * + * Key sizes must be between 1 and #mdb_env_get_maxkeysize() inclusive. + * The same applies to data sizes in databases with the #MDB_DUPSORT flag. + * Other data items can in theory be from 0 to 0xffffffff bytes long. + */ +typedef struct MDB_val { + size_t mv_size; /**< size of the data item */ + void *mv_data; /**< address of the data item */ +} MDB_val; + +/** @brief A callback function used to compare two keys in a database */ +typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b); + +/** @brief A callback function used to relocate a position-dependent data item + * in a fixed-address database. + * + * The \b newptr gives the item's desired address in + * the memory map, and \b oldptr gives its previous address. The item's actual + * data resides at the address in \b item. This callback is expected to walk + * through the fields of the record in \b item and modify any + * values based at the \b oldptr address to be relative to the \b newptr address. + * @param[in,out] item The item that is to be relocated. + * @param[in] oldptr The previous address. + * @param[in] newptr The new address to relocate to. + * @param[in] relctx An application-provided context, set by #mdb_set_relctx(). + * @todo This feature is currently unimplemented. + */ +typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx); + +/** @defgroup mdb_env Environment Flags + * @{ + */ + /** mmap at a fixed address (experimental) */ +#define MDB_FIXEDMAP 0x01 + /** no environment directory */ +#define MDB_NOSUBDIR 0x4000 + /** don't fsync after commit */ +#define MDB_NOSYNC 0x10000 + /** read only */ +#define MDB_RDONLY 0x20000 + /** don't fsync metapage after commit */ +#define MDB_NOMETASYNC 0x40000 + /** use writable mmap */ +#define MDB_WRITEMAP 0x80000 + /** use asynchronous msync when #MDB_WRITEMAP is used */ +#define MDB_MAPASYNC 0x100000 + /** tie reader locktable slots to #MDB_txn objects instead of to threads */ +#define MDB_NOTLS 0x200000 + /** don't do any locking, caller must manage their own locks */ +#define MDB_NOLOCK 0x400000 + /** don't do readahead (no effect on Windows) */ +#define MDB_NORDAHEAD 0x800000 + /** don't initialize malloc'd memory before writing to datafile */ +#define MDB_NOMEMINIT 0x1000000 +/** @} */ + +/** @defgroup mdb_dbi_open Database Flags + * @{ + */ + /** use reverse string keys */ +#define MDB_REVERSEKEY 0x02 + /** use sorted duplicates */ +#define MDB_DUPSORT 0x04 + /** numeric keys in native byte order. + * The keys must all be of the same size. */ +#define MDB_INTEGERKEY 0x08 + /** with #MDB_DUPSORT, sorted dup items have fixed size */ +#define MDB_DUPFIXED 0x10 + /** with #MDB_DUPSORT, dups are numeric in native byte order */ +#define MDB_INTEGERDUP 0x20 + /** with #MDB_DUPSORT, use reverse string dups */ +#define MDB_REVERSEDUP 0x40 + /** create DB if not already existing */ +#define MDB_CREATE 0x40000 +/** @} */ + +/** @defgroup mdb_put Write Flags + * @{ + */ +/** For put: Don't write if the key already exists. */ +#define MDB_NOOVERWRITE 0x10 +/** Only for #MDB_DUPSORT<br> + * For put: don't write if the key and data pair already exist.<br> + * For mdb_cursor_del: remove all duplicate data items. + */ +#define MDB_NODUPDATA 0x20 +/** For mdb_cursor_put: overwrite the current key/data pair */ +#define MDB_CURRENT 0x40 +/** For put: Just reserve space for data, don't copy it. Return a + * pointer to the reserved space. + */ +#define MDB_RESERVE 0x10000 +/** Data is being appended, don't split full pages. */ +#define MDB_APPEND 0x20000 +/** Duplicate data is being appended, don't split full pages. */ +#define MDB_APPENDDUP 0x40000 +/** Store multiple data items in one call. Only for #MDB_DUPFIXED. */ +#define MDB_MULTIPLE 0x80000 +/* @} */ + +/** @defgroup mdb_copy Copy Flags + * @{ + */ +/** Compacting copy: Omit free space from copy, and renumber all + * pages sequentially. + */ +#define MDB_CP_COMPACT 0x01 +/* @} */ + +/** @brief Cursor Get operations. + * + * This is the set of all operations for retrieving data + * using a cursor. + */ +typedef enum MDB_cursor_op { + MDB_FIRST, /**< Position at first key/data item */ + MDB_FIRST_DUP, /**< Position at first data item of current key. + Only for #MDB_DUPSORT */ + MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */ + MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */ + MDB_GET_CURRENT, /**< Return key/data at current cursor position */ + MDB_GET_MULTIPLE, /**< Return key and up to a page of duplicate data items + from current cursor position. Move cursor to prepare + for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ + MDB_LAST, /**< Position at last key/data item */ + MDB_LAST_DUP, /**< Position at last data item of current key. + Only for #MDB_DUPSORT */ + MDB_NEXT, /**< Position at next data item */ + MDB_NEXT_DUP, /**< Position at next data item of current key. + Only for #MDB_DUPSORT */ + MDB_NEXT_MULTIPLE, /**< Return key and up to a page of duplicate data items + from next cursor position. Move cursor to prepare + for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ + MDB_NEXT_NODUP, /**< Position at first data item of next key */ + MDB_PREV, /**< Position at previous data item */ + MDB_PREV_DUP, /**< Position at previous data item of current key. + Only for #MDB_DUPSORT */ + MDB_PREV_NODUP, /**< Position at last data item of previous key */ + MDB_SET, /**< Position at specified key */ + MDB_SET_KEY, /**< Position at specified key, return key + data */ + MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */ +} MDB_cursor_op; + +/** @defgroup errors Return Codes + * + * BerkeleyDB uses -30800 to -30999, we'll go under them + * @{ + */ + /** Successful result */ +#define MDB_SUCCESS 0 + /** key/data pair already exists */ +#define MDB_KEYEXIST (-30799) + /** key/data pair not found (EOF) */ +#define MDB_NOTFOUND (-30798) + /** Requested page not found - this usually indicates corruption */ +#define MDB_PAGE_NOTFOUND (-30797) + /** Located page was wrong type */ +#define MDB_CORRUPTED (-30796) + /** Update of meta page failed, probably I/O error */ +#define MDB_PANIC (-30795) + /** Environment version mismatch */ +#define MDB_VERSION_MISMATCH (-30794) + /** File is not a valid LMDB file */ +#define MDB_INVALID (-30793) + /** Environment mapsize reached */ +#define MDB_MAP_FULL (-30792) + /** Environment maxdbs reached */ +#define MDB_DBS_FULL (-30791) + /** Environment maxreaders reached */ +#define MDB_READERS_FULL (-30790) + /** Too many TLS keys in use - Windows only */ +#define MDB_TLS_FULL (-30789) + /** Txn has too many dirty pages */ +#define MDB_TXN_FULL (-30788) + /** Cursor stack too deep - internal error */ +#define MDB_CURSOR_FULL (-30787) + /** Page has not enough space - internal error */ +#define MDB_PAGE_FULL (-30786) + /** Database contents grew beyond environment mapsize */ +#define MDB_MAP_RESIZED (-30785) + /** MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed */ +#define MDB_INCOMPATIBLE (-30784) + /** Invalid reuse of reader locktable slot */ +#define MDB_BAD_RSLOT (-30783) + /** Transaction cannot recover - it must be aborted */ +#define MDB_BAD_TXN (-30782) + /** Unsupported size of key/DB name/data, or wrong DUPFIXED size */ +#define MDB_BAD_VALSIZE (-30781) + /** The specified DBI was changed unexpectedly */ +#define MDB_BAD_DBI (-30780) + /** The last defined error code */ +#define MDB_LAST_ERRCODE MDB_BAD_DBI +/** @} */ + +/** @brief Statistics for a database in the environment */ +typedef struct MDB_stat { + unsigned int ms_psize; /**< Size of a database page. + This is currently the same for all databases. */ + unsigned int ms_depth; /**< Depth (height) of the B-tree */ + size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ + size_t ms_leaf_pages; /**< Number of leaf pages */ + size_t ms_overflow_pages; /**< Number of overflow pages */ + size_t ms_entries; /**< Number of data items */ +} MDB_stat; + +/** @brief Information about the environment */ +typedef struct MDB_envinfo { + void *me_mapaddr; /**< Address of map, if fixed */ + size_t me_mapsize; /**< Size of the data memory map */ + size_t me_last_pgno; /**< ID of the last used page */ + size_t me_last_txnid; /**< ID of the last committed transaction */ + unsigned int me_maxreaders; /**< max reader slots in the environment */ + unsigned int me_numreaders; /**< max reader slots used in the environment */ +} MDB_envinfo; + + /** @brief Return the LMDB library version information. + * + * @param[out] major if non-NULL, the library major version number is copied here + * @param[out] minor if non-NULL, the library minor version number is copied here + * @param[out] patch if non-NULL, the library patch version number is copied here + * @retval "version string" The library version as a string + */ +char *mdb_version(int *major, int *minor, int *patch); + + /** @brief Return a string describing a given error code. + * + * This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3) + * function. If the error code is greater than or equal to 0, then the string + * returned by the system function strerror(3) is returned. If the error code + * is less than 0, an error string corresponding to the LMDB library error is + * returned. See @ref errors for a list of LMDB-specific error codes. + * @param[in] err The error code + * @retval "error message" The description of the error + */ +char *mdb_strerror(int err); + + /** @brief Create an LMDB environment handle. + * + * This function allocates memory for a #MDB_env structure. To release + * the allocated memory and discard the handle, call #mdb_env_close(). + * Before the handle may be used, it must be opened using #mdb_env_open(). + * Various other options may also need to be set before opening the handle, + * e.g. #mdb_env_set_mapsize(), #mdb_env_set_maxreaders(), #mdb_env_set_maxdbs(), + * depending on usage requirements. + * @param[out] env The address where the new handle will be stored + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_create(MDB_env **env); + + /** @brief Open an environment handle. + * + * If this function fails, #mdb_env_close() must be called to discard the #MDB_env handle. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] path The directory in which the database files reside. This + * directory must already exist and be writable. + * @param[in] flags Special options for this environment. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * Flags set by mdb_env_set_flags() are also used. + * <ul> + * <li>#MDB_FIXEDMAP + * use a fixed address for the mmap region. This flag must be specified + * when creating the environment, and is stored persistently in the environment. + * If successful, the memory map will always reside at the same virtual address + * and pointers used to reference data items in the database will be constant + * across multiple invocations. This option may not always work, depending on + * how the operating system has allocated memory to shared libraries and other uses. + * The feature is highly experimental. + * <li>#MDB_NOSUBDIR + * By default, LMDB creates its environment in a directory whose + * pathname is given in \b path, and creates its data and lock files + * under that directory. With this option, \b path is used as-is for + * the database main data file. The database lock file is the \b path + * with "-lock" appended. + * <li>#MDB_RDONLY + * Open the environment in read-only mode. No write operations will be + * allowed. LMDB will still modify the lock file - except on read-only + * filesystems, where LMDB does not use locks. + * <li>#MDB_WRITEMAP + * Use a writeable memory map unless MDB_RDONLY is set. This is faster + * and uses fewer mallocs, but loses protection from application bugs + * like wild pointer writes and other bad updates into the database. + * Incompatible with nested transactions. + * Processes with and without MDB_WRITEMAP on the same environment do + * not cooperate well. + * <li>#MDB_NOMETASYNC + * Flush system buffers to disk only once per transaction, omit the + * metadata flush. Defer that until the system flushes files to disk, + * or next non-MDB_RDONLY commit or #mdb_env_sync(). This optimization + * maintains database integrity, but a system crash may undo the last + * committed transaction. I.e. it preserves the ACI (atomicity, + * consistency, isolation) but not D (durability) database property. + * This flag may be changed at any time using #mdb_env_set_flags(). + * <li>#MDB_NOSYNC + * Don't flush system buffers to disk when committing a transaction. + * This optimization means a system crash can corrupt the database or + * lose the last transactions if buffers are not yet flushed to disk. + * The risk is governed by how often the system flushes dirty buffers + * to disk and how often #mdb_env_sync() is called. However, if the + * filesystem preserves write order and the #MDB_WRITEMAP flag is not + * used, transactions exhibit ACI (atomicity, consistency, isolation) + * properties and only lose D (durability). I.e. database integrity + * is maintained, but a system crash may undo the final transactions. + * Note that (#MDB_NOSYNC | #MDB_WRITEMAP) leaves the system with no + * hint for when to write transactions to disk, unless #mdb_env_sync() + * is called. (#MDB_MAPASYNC | #MDB_WRITEMAP) may be preferable. + * This flag may be changed at any time using #mdb_env_set_flags(). + * <li>#MDB_MAPASYNC + * When using #MDB_WRITEMAP, use asynchronous flushes to disk. + * As with #MDB_NOSYNC, a system crash can then corrupt the + * database or lose the last transactions. Calling #mdb_env_sync() + * ensures on-disk database integrity until next commit. + * This flag may be changed at any time using #mdb_env_set_flags(). + * <li>#MDB_NOTLS + * Don't use Thread-Local Storage. Tie reader locktable slots to + * #MDB_txn objects instead of to threads. I.e. #mdb_txn_reset() keeps + * the slot reseved for the #MDB_txn object. A thread may use parallel + * read-only transactions. A read-only transaction may span threads if + * the user synchronizes its use. Applications that multiplex many + * user threads over individual OS threads need this option. Such an + * application must also serialize the write transactions in an OS + * thread, since LMDB's write locking is unaware of the user threads. + * <li>#MDB_NOLOCK + * Don't do any locking. If concurrent access is anticipated, the + * caller must manage all concurrency itself. For proper operation + * the caller must enforce single-writer semantics, and must ensure + * that no readers are using old transactions while a writer is + * active. The simplest approach is to use an exclusive lock so that + * no readers may be active at all when a writer begins. + * <li>#MDB_NORDAHEAD + * Turn off readahead. Most operating systems perform readahead on + * read requests by default. This option turns it off if the OS + * supports it. Turning it off may help random read performance + * when the DB is larger than RAM and system RAM is full. + * The option is not implemented on Windows. + * <li>#MDB_NOMEMINIT + * Don't initialize malloc'd memory before writing to unused spaces + * in the data file. By default, memory for pages written to the data + * file is obtained using malloc. While these pages may be reused in + * subsequent transactions, freshly malloc'd pages will be initialized + * to zeroes before use. This avoids persisting leftover data from other + * code (that used the heap and subsequently freed the memory) into the + * data file. Note that many other system libraries may allocate + * and free memory from the heap for arbitrary uses. E.g., stdio may + * use the heap for file I/O buffers. This initialization step has a + * modest performance cost so some applications may want to disable + * it using this flag. This option can be a problem for applications + * which handle sensitive data like passwords, and it makes memory + * checkers like Valgrind noisy. This flag is not needed with #MDB_WRITEMAP, + * which writes directly to the mmap instead of using malloc for pages. The + * initialization is also skipped if #MDB_RESERVE is used; the + * caller is expected to overwrite all of the memory that was + * reserved in that case. + * This flag may be changed at any time using #mdb_env_set_flags(). + * </ul> + * @param[in] mode The UNIX permissions to set on created files. This parameter + * is ignored on Windows. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_VERSION_MISMATCH - the version of the LMDB library doesn't match the + * version that created the database environment. + * <li>#MDB_INVALID - the environment file headers are corrupted. + * <li>ENOENT - the directory specified by the path parameter doesn't exist. + * <li>EACCES - the user didn't have permission to access the environment files. + * <li>EAGAIN - the environment was locked by another process. + * </ul> + */ +int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode); + + /** @brief Copy an LMDB environment to the specified path. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] path The directory in which the copy will reside. This + * directory must already exist and be writable but must otherwise be + * empty. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copy(MDB_env *env, const char *path); + + /** @brief Copy an LMDB environment to the specified file descriptor. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] fd The filedescriptor to write the copy to. It must + * have already been opened for Write access. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); + + /** @brief Copy an LMDB environment to the specified path, with options. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] path The directory in which the copy will reside. This + * directory must already exist and be writable but must otherwise be + * empty. + * @param[in] flags Special options for this operation. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * <ul> + * <li>#MDB_CP_COMPACT - Perform compaction while copying: omit free + * pages and sequentially renumber all pages in output. This option + * consumes more CPU and runs more slowly than the default. + * </ul> + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags); + + /** @brief Copy an LMDB environment to the specified file descriptor, + * with options. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. See + * #mdb_env_copy2() for further details. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] fd The filedescriptor to write the copy to. It must + * have already been opened for Write access. + * @param[in] flags Special options for this operation. + * See #mdb_env_copy2() for options. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd, unsigned int flags); + + /** @brief Return statistics about the LMDB environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] stat The address of an #MDB_stat structure + * where the statistics will be copied + */ +int mdb_env_stat(MDB_env *env, MDB_stat *stat); + + /** @brief Return information about the LMDB environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] stat The address of an #MDB_envinfo structure + * where the information will be copied + */ +int mdb_env_info(MDB_env *env, MDB_envinfo *stat); + + /** @brief Flush the data buffers to disk. + * + * Data is always written to disk when #mdb_txn_commit() is called, + * but the operating system may keep it buffered. LMDB always flushes + * the OS buffers upon commit as well, unless the environment was + * opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] force If non-zero, force a synchronous flush. Otherwise + * if the environment has the #MDB_NOSYNC flag set the flushes + * will be omitted, and with #MDB_MAPASYNC they will be asynchronous. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * <li>EIO - an error occurred during synchronization. + * </ul> + */ +int mdb_env_sync(MDB_env *env, int force); + + /** @brief Close the environment and release the memory map. + * + * Only a single thread may call this function. All transactions, databases, + * and cursors must already be closed before calling this function. Attempts to + * use any such handles after calling this function will cause a SIGSEGV. + * The environment handle will be freed and must not be used again after this call. + * @param[in] env An environment handle returned by #mdb_env_create() + */ +void mdb_env_close(MDB_env *env); + + /** @brief Set environment flags. + * + * This may be used to set some flags in addition to those from + * #mdb_env_open(), or to unset these flags. If several threads + * change the flags at the same time, the result is undefined. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] flags The flags to change, bitwise OR'ed together + * @param[in] onoff A non-zero value sets the flags, zero clears them. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_env_set_flags(MDB_env *env, unsigned int flags, int onoff); + + /** @brief Get environment flags. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] flags The address of an integer to store the flags + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_env_get_flags(MDB_env *env, unsigned int *flags); + + /** @brief Return the path that was used in #mdb_env_open(). + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] path Address of a string pointer to contain the path. This + * is the actual string in the environment, not a copy. It should not be + * altered in any way. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_env_get_path(MDB_env *env, const char **path); + + /** @brief Return the filedescriptor for the given environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] fd Address of a mdb_filehandle_t to contain the descriptor. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd); + + /** @brief Set the size of the memory map to use for this environment. + * + * The size should be a multiple of the OS page size. The default is + * 10485760 bytes. The size of the memory map is also the maximum size + * of the database. The value should be chosen as large as possible, + * to accommodate future growth of the database. + * This function should be called after #mdb_env_create() and before #mdb_env_open(). + * It may be called at later times if no transactions are active in + * this process. Note that the library does not check for this condition, + * the caller must ensure it explicitly. + * + * The new size takes effect immediately for the current process but + * will not be persisted to any others until a write transaction has been + * committed by the current process. Also, only mapsize increases are + * persisted into the environment. + * + * If the mapsize is increased by another process, and data has grown + * beyond the range of the current mapsize, #mdb_txn_begin() will + * return #MDB_MAP_RESIZED. This function may be called with a size + * of zero to adopt the new size. + * + * Any attempt to set a size smaller than the space already consumed + * by the environment will be silently changed to the current size of the used space. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] size The size in bytes + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified, or the environment has + * an active write transaction. + * </ul> + */ +int mdb_env_set_mapsize(MDB_env *env, size_t size); + + /** @brief Set the maximum number of threads/reader slots for the environment. + * + * This defines the number of slots in the lock table that is used to track readers in the + * the environment. The default is 126. + * Starting a read-only transaction normally ties a lock table slot to the + * current thread until the environment closes or the thread exits. If + * MDB_NOTLS is in use, #mdb_txn_begin() instead ties the slot to the + * MDB_txn object until it or the #MDB_env object is destroyed. + * This function may only be called after #mdb_env_create() and before #mdb_env_open(). + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] readers The maximum number of reader lock table slots + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified, or the environment is already open. + * </ul> + */ +int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers); + + /** @brief Get the maximum number of threads/reader slots for the environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] readers Address of an integer to store the number of readers + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers); + + /** @brief Set the maximum number of named databases for the environment. + * + * This function is only needed if multiple databases will be used in the + * environment. Simpler applications that use the environment as a single + * unnamed database can ignore this option. + * This function may only be called after #mdb_env_create() and before #mdb_env_open(). + * + * Currently a moderate number of slots are cheap but a huge number gets + * expensive: 7-120 words per transaction, and every #mdb_dbi_open() + * does a linear search of the opened slots. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] dbs The maximum number of databases + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified, or the environment is already open. + * </ul> + */ +int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs); + + /** @brief Get the maximum size of keys and #MDB_DUPSORT data we can write. + * + * Depends on the compile-time constant #MDB_MAXKEYSIZE. Default 511. + * See @ref MDB_val. + * @param[in] env An environment handle returned by #mdb_env_create() + * @return The maximum size of a key we can write + */ +int mdb_env_get_maxkeysize(MDB_env *env); + + /** @brief Set application information associated with the #MDB_env. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] ctx An arbitrary pointer for whatever the application needs. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_set_userctx(MDB_env *env, void *ctx); + + /** @brief Get the application information associated with the #MDB_env. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @return The pointer set by #mdb_env_set_userctx(). + */ +void *mdb_env_get_userctx(MDB_env *env); + + /** @brief A callback function for most LMDB assert() failures, + * called before printing the message and aborting. + * + * @param[in] env An environment handle returned by #mdb_env_create(). + * @param[in] msg The assertion message, not including newline. + */ +typedef void MDB_assert_func(MDB_env *env, const char *msg); + + /** Set or reset the assert() callback of the environment. + * Disabled if liblmdb is buillt with NDEBUG. + * @note This hack should become obsolete as lmdb's error handling matures. + * @param[in] env An environment handle returned by #mdb_env_create(). + * @param[in] func An #MDB_assert_func function, or 0. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_set_assert(MDB_env *env, MDB_assert_func *func); + + /** @brief Create a transaction for use with the environment. + * + * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit(). + * @note A transaction and its cursors must only be used by a single + * thread, and a thread may only have a single transaction at a time. + * If #MDB_NOTLS is in use, this does not apply to read-only transactions. + * @note Cursors may not span transactions. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] parent If this parameter is non-NULL, the new transaction + * will be a nested transaction, with the transaction indicated by \b parent + * as its parent. Transactions may be nested to any level. A parent + * transaction and its cursors may not issue any other operations than + * mdb_txn_commit and mdb_txn_abort while it has active child transactions. + * @param[in] flags Special options for this transaction. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * <ul> + * <li>#MDB_RDONLY + * This transaction will not perform any write operations. + * </ul> + * @param[out] txn Address where the new #MDB_txn handle will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_PANIC - a fatal error occurred earlier and the environment + * must be shut down. + * <li>#MDB_MAP_RESIZED - another process wrote data beyond this MDB_env's + * mapsize and this environment's map must be resized as well. + * See #mdb_env_set_mapsize(). + * <li>#MDB_READERS_FULL - a read-only transaction was requested and + * the reader lock table is full. See #mdb_env_set_maxreaders(). + * <li>ENOMEM - out of memory. + * </ul> + */ +int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn); + + /** @brief Returns the transaction's #MDB_env + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + */ +MDB_env *mdb_txn_env(MDB_txn *txn); + + /** @brief Commit all the operations of a transaction into the database. + * + * The transaction handle is freed. It and its cursors must not be used + * again after this call, except with #mdb_cursor_renew(). + * @note Earlier documentation incorrectly said all cursors would be freed. + * Only write-transactions free cursors. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * <li>ENOSPC - no more disk space. + * <li>EIO - a low-level I/O error occurred while writing. + * <li>ENOMEM - out of memory. + * </ul> + */ +int mdb_txn_commit(MDB_txn *txn); + + /** @brief Abandon all the operations of the transaction instead of saving them. + * + * The transaction handle is freed. It and its cursors must not be used + * again after this call, except with #mdb_cursor_renew(). + * @note Earlier documentation incorrectly said all cursors would be freed. + * Only write-transactions free cursors. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + */ +void mdb_txn_abort(MDB_txn *txn); + + /** @brief Reset a read-only transaction. + * + * Abort the transaction like #mdb_txn_abort(), but keep the transaction + * handle. #mdb_txn_renew() may reuse the handle. This saves allocation + * overhead if the process will start a new read-only transaction soon, + * and also locking overhead if #MDB_NOTLS is in use. The reader table + * lock is released, but the table slot stays tied to its thread or + * #MDB_txn. Use mdb_txn_abort() to discard a reset handle, and to free + * its lock table slot if MDB_NOTLS is in use. + * Cursors opened within the transaction must not be used + * again after this call, except with #mdb_cursor_renew(). + * Reader locks generally don't interfere with writers, but they keep old + * versions of database pages allocated. Thus they prevent the old pages + * from being reused when writers commit new data, and so under heavy load + * the database size may grow much more rapidly than otherwise. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + */ +void mdb_txn_reset(MDB_txn *txn); + + /** @brief Renew a read-only transaction. + * + * This acquires a new reader lock for a transaction handle that had been + * released by #mdb_txn_reset(). It must be called before a reset transaction + * may be used again. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_PANIC - a fatal error occurred earlier and the environment + * must be shut down. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_txn_renew(MDB_txn *txn); + +/** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */ +#define mdb_open(txn,name,flags,dbi) mdb_dbi_open(txn,name,flags,dbi) +/** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */ +#define mdb_close(env,dbi) mdb_dbi_close(env,dbi) + + /** @brief Open a database in the environment. + * + * A database handle denotes the name and parameters of a database, + * independently of whether such a database exists. + * The database handle may be discarded by calling #mdb_dbi_close(). + * The old database handle is returned if the database was already open. + * The handle may only be closed once. + * The database handle will be private to the current transaction until + * the transaction is successfully committed. If the transaction is + * aborted the handle will be closed automatically. + * After a successful commit the + * handle will reside in the shared environment, and may be used + * by other transactions. This function must not be called from + * multiple concurrent transactions. A transaction that uses this function + * must finish (either commit or abort) before any other transaction may + * use this function. + * + * To use named databases (with name != NULL), #mdb_env_set_maxdbs() + * must be called before opening the environment. Database names + * are kept as keys in the unnamed database. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] name The name of the database to open. If only a single + * database is needed in the environment, this value may be NULL. + * @param[in] flags Special options for this database. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * <ul> + * <li>#MDB_REVERSEKEY + * Keys are strings to be compared in reverse order, from the end + * of the strings to the beginning. By default, Keys are treated as strings and + * compared from beginning to end. + * <li>#MDB_DUPSORT + * Duplicate keys may be used in the database. (Or, from another perspective, + * keys may have multiple data items, stored in sorted order.) By default + * keys must be unique and may have only a single data item. + * <li>#MDB_INTEGERKEY + * Keys are binary integers in native byte order. Setting this option + * requires all keys to be the same size, typically sizeof(int) + * or sizeof(size_t). + * <li>#MDB_DUPFIXED + * This flag may only be used in combination with #MDB_DUPSORT. This option + * tells the library that the data items for this database are all the same + * size, which allows further optimizations in storage and retrieval. When + * all data items are the same size, the #MDB_GET_MULTIPLE and #MDB_NEXT_MULTIPLE + * cursor operations may be used to retrieve multiple items at once. + * <li>#MDB_INTEGERDUP + * This option specifies that duplicate data items are also integers, and + * should be sorted as such. + * <li>#MDB_REVERSEDUP + * This option specifies that duplicate data items should be compared as + * strings in reverse order. + * <li>#MDB_CREATE + * Create the named database if it doesn't exist. This option is not + * allowed in a read-only transaction or a read-only environment. + * </ul> + * @param[out] dbi Address where the new #MDB_dbi handle will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_NOTFOUND - the specified database doesn't exist in the environment + * and #MDB_CREATE was not specified. + * <li>#MDB_DBS_FULL - too many databases have been opened. See #mdb_env_set_maxdbs(). + * </ul> + */ +int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi); + + /** @brief Retrieve statistics for a database. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[out] stat The address of an #MDB_stat structure + * where the statistics will be copied + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat); + + /** @brief Retrieve the DB flags for a database handle. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[out] flags Address where the flags will be returned. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags); + + /** @brief Close a database handle. Normally unnecessary. Use with care: + * + * This call is not mutex protected. Handles should only be closed by + * a single thread, and only if no other threads are going to reference + * the database handle or one of its cursors any further. Do not close + * a handle if an existing transaction has modified its database. + * Doing so can cause misbehavior from database corruption to errors + * like MDB_BAD_VALSIZE (since the DB name is gone). + * + * Closing a database handle is not necessary, but lets #mdb_dbi_open() + * reuse the handle value. Usually it's better to set a bigger + * #mdb_env_set_maxdbs(), unless that value would be large. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + */ +void mdb_dbi_close(MDB_env *env, MDB_dbi dbi); + + /** @brief Empty or delete+close a database. + * + * See #mdb_dbi_close() for restrictions about closing the DB handle. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] del 0 to empty the DB, 1 to delete it from the + * environment and close the DB handle. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del); + + /** @brief Set a custom key comparison function for a database. + * + * The comparison function is called whenever it is necessary to compare a + * key specified by the application with a key currently stored in the database. + * If no comparison function is specified, and no special key flags were specified + * with #mdb_dbi_open(), the keys are compared lexically, with shorter keys collating + * before longer keys. + * @warning This function must be called before any data access functions are used, + * otherwise data corruption may occur. The same comparison function must be used by every + * program accessing the database, every time the database is used. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] cmp A #MDB_cmp_func function + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp); + + /** @brief Set a custom data comparison function for a #MDB_DUPSORT database. + * + * This comparison function is called whenever it is necessary to compare a data + * item specified by the application with a data item currently stored in the database. + * This function only takes effect if the database was opened with the #MDB_DUPSORT + * flag. + * If no comparison function is specified, and no special key flags were specified + * with #mdb_dbi_open(), the data items are compared lexically, with shorter items collating + * before longer items. + * @warning This function must be called before any data access functions are used, + * otherwise data corruption may occur. The same comparison function must be used by every + * program accessing the database, every time the database is used. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] cmp A #MDB_cmp_func function + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp); + + /** @brief Set a relocation function for a #MDB_FIXEDMAP database. + * + * @todo The relocation function is called whenever it is necessary to move the data + * of an item to a different position in the database (e.g. through tree + * balancing operations, shifts as a result of adds or deletes, etc.). It is + * intended to allow address/position-dependent data items to be stored in + * a database in an environment opened with the #MDB_FIXEDMAP option. + * Currently the relocation feature is unimplemented and setting + * this function has no effect. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] rel A #MDB_rel_func function + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel); + + /** @brief Set a context pointer for a #MDB_FIXEDMAP database's relocation function. + * + * See #mdb_set_relfunc and #MDB_rel_func for more details. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] ctx An arbitrary pointer for whatever the application needs. + * It will be passed to the callback function set by #mdb_set_relfunc + * as its \b relctx parameter whenever the callback is invoked. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx); + + /** @brief Get items from a database. + * + * This function retrieves key/data pairs from the database. The address + * and length of the data associated with the specified \b key are returned + * in the structure to which \b data refers. + * If the database supports duplicate keys (#MDB_DUPSORT) then the + * first data item for the key will be returned. Retrieval of other + * items requires the use of #mdb_cursor_get(). + * + * @note The memory pointed to by the returned values is owned by the + * database. The caller need not dispose of the memory, and may not + * modify it in any way. For values returned in a read-only transaction + * any modification attempts will cause a SIGSEGV. + * @note Values returned from the database are valid only until a + * subsequent update operation, or the end of the transaction. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to search for in the database + * @param[out] data The data corresponding to the key + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_NOTFOUND - the key was not in the database. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); + + /** @brief Store items into a database. + * + * This function stores key/data pairs in the database. The default behavior + * is to enter the new key/data pair, replacing any previously existing key + * if duplicates are disallowed, or adding a duplicate data item if + * duplicates are allowed (#MDB_DUPSORT). + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to store in the database + * @param[in,out] data The data to store + * @param[in] flags Special options for this operation. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * <ul> + * <li>#MDB_NODUPDATA - enter the new key/data pair only if it does not + * already appear in the database. This flag may only be specified + * if the database was opened with #MDB_DUPSORT. The function will + * return #MDB_KEYEXIST if the key/data pair already appears in the + * database. + * <li>#MDB_NOOVERWRITE - enter the new key/data pair only if the key + * does not already appear in the database. The function will return + * #MDB_KEYEXIST if the key already appears in the database, even if + * the database supports duplicates (#MDB_DUPSORT). The \b data + * parameter will be set to point to the existing item. + * <li>#MDB_RESERVE - reserve space for data of the given size, but + * don't copy the given data. Instead, return a pointer to the + * reserved space, which the caller can fill in later - before + * the next update operation or the transaction ends. This saves + * an extra memcpy if the data is being generated later. + * LMDB does nothing else with this memory, the caller is expected + * to modify all of the space requested. + * <li>#MDB_APPEND - append the given key/data pair to the end of the + * database. No key comparisons are performed. This option allows + * fast bulk loading when keys are already known to be in the + * correct order. Loading unsorted keys with this flag will cause + * data corruption. + * <li>#MDB_APPENDDUP - as above, but for sorted dup data. + * </ul> + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). + * <li>#MDB_TXN_FULL - the transaction has too many dirty pages. + * <li>EACCES - an attempt was made to write in a read-only transaction. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + unsigned int flags); + + /** @brief Delete items from a database. + * + * This function removes key/data pairs from the database. + * If the database does not support sorted duplicate data items + * (#MDB_DUPSORT) the data parameter is ignored. + * If the database supports sorted duplicates and the data parameter + * is NULL, all of the duplicate data items for the key will be + * deleted. Otherwise, if the data parameter is non-NULL + * only the matching data item will be deleted. + * This function will return #MDB_NOTFOUND if the specified key/data + * pair is not in the database. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to delete from the database + * @param[in] data The data to delete + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EACCES - an attempt was made to write in a read-only transaction. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_del(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); + + /** @brief Create a cursor handle. + * + * A cursor is associated with a specific transaction and database. + * A cursor cannot be used when its database handle is closed. Nor + * when its transaction has ended, except with #mdb_cursor_renew(). + * It can be discarded with #mdb_cursor_close(). + * A cursor in a write-transaction can be closed before its transaction + * ends, and will otherwise be closed when its transaction ends. + * A cursor in a read-only transaction must be closed explicitly, before + * or after its transaction ends. It can be reused with + * #mdb_cursor_renew() before finally closing it. + * @note Earlier documentation said that cursors in every transaction + * were closed when the transaction committed or aborted. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[out] cursor Address where the new #MDB_cursor handle will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **cursor); + + /** @brief Close a cursor handle. + * + * The cursor handle will be freed and must not be used again after this call. + * Its transaction must still be live if it is a write-transaction. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + */ +void mdb_cursor_close(MDB_cursor *cursor); + + /** @brief Renew a cursor handle. + * + * A cursor is associated with a specific transaction and database. + * Cursors that are only used in read-only + * transactions may be re-used, to avoid unnecessary malloc/free overhead. + * The cursor may be associated with a new read-only transaction, and + * referencing the same database handle as it was created with. + * This may be done whether the previous transaction is live or dead. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_renew(MDB_txn *txn, MDB_cursor *cursor); + + /** @brief Return the cursor's transaction handle. + * + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + */ +MDB_txn *mdb_cursor_txn(MDB_cursor *cursor); + + /** @brief Return the cursor's database handle. + * + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + */ +MDB_dbi mdb_cursor_dbi(MDB_cursor *cursor); + + /** @brief Retrieve by cursor. + * + * This function retrieves key/data pairs from the database. The address and length + * of the key are returned in the object to which \b key refers (except for the + * case of the #MDB_SET option, in which the \b key object is unchanged), and + * the address and length of the data are returned in the object to which \b data + * refers. + * See #mdb_get() for restrictions on using the output values. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in,out] key The key for a retrieved item + * @param[in,out] data The data of a retrieved item + * @param[in] op A cursor operation #MDB_cursor_op + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_NOTFOUND - no matching key found. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + MDB_cursor_op op); + + /** @brief Store by cursor. + * + * This function stores key/data pairs into the database. + * The cursor is positioned at the new item, or on failure usually near it. + * @note Earlier documentation incorrectly said errors would leave the + * state of the cursor unchanged. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in] key The key operated on. + * @param[in] data The data operated on. + * @param[in] flags Options for this operation. This parameter + * must be set to 0 or one of the values described here. + * <ul> + * <li>#MDB_CURRENT - replace the item at the current cursor position. + * The \b key parameter must still be provided, and must match it. + * If using sorted duplicates (#MDB_DUPSORT) the data item must still + * sort into the same place. This is intended to be used when the + * new data is the same size as the old. Otherwise it will simply + * perform a delete of the old record followed by an insert. + * <li>#MDB_NODUPDATA - enter the new key/data pair only if it does not + * already appear in the database. This flag may only be specified + * if the database was opened with #MDB_DUPSORT. The function will + * return #MDB_KEYEXIST if the key/data pair already appears in the + * database. + * <li>#MDB_NOOVERWRITE - enter the new key/data pair only if the key + * does not already appear in the database. The function will return + * #MDB_KEYEXIST if the key already appears in the database, even if + * the database supports duplicates (#MDB_DUPSORT). + * <li>#MDB_RESERVE - reserve space for data of the given size, but + * don't copy the given data. Instead, return a pointer to the + * reserved space, which the caller can fill in later. This saves + * an extra memcpy if the data is being generated later. + * <li>#MDB_APPEND - append the given key/data pair to the end of the + * database. No key comparisons are performed. This option allows + * fast bulk loading when keys are already known to be in the + * correct order. Loading unsorted keys with this flag will cause + * data corruption. + * <li>#MDB_APPENDDUP - as above, but for sorted dup data. + * <li>#MDB_MULTIPLE - store multiple contiguous data elements in a + * single request. This flag may only be specified if the database + * was opened with #MDB_DUPFIXED. The \b data argument must be an + * array of two MDB_vals. The mv_size of the first MDB_val must be + * the size of a single data element. The mv_data of the first MDB_val + * must point to the beginning of the array of contiguous data elements. + * The mv_size of the second MDB_val must be the count of the number + * of data elements to store. On return this field will be set to + * the count of the number of elements actually written. The mv_data + * of the second MDB_val is unused. + * </ul> + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). + * <li>#MDB_TXN_FULL - the transaction has too many dirty pages. + * <li>EACCES - an attempt was made to modify a read-only database. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + unsigned int flags); + + /** @brief Delete current key/data pair + * + * This function deletes the key/data pair to which the cursor refers. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in] flags Options for this operation. This parameter + * must be set to 0 or one of the values described here. + * <ul> + * <li>#MDB_NODUPDATA - delete all of the data items for the current key. + * This flag may only be specified if the database was opened with #MDB_DUPSORT. + * </ul> + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EACCES - an attempt was made to modify a read-only database. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_del(MDB_cursor *cursor, unsigned int flags); + + /** @brief Return count of duplicates for current key. + * + * This call is only valid on databases that support sorted duplicate + * data items #MDB_DUPSORT. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[out] countp Address where the count will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - cursor is not initialized, or an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_count(MDB_cursor *cursor, size_t *countp); + + /** @brief Compare two data items according to a particular database. + * + * This returns a comparison as if the two data items were keys in the + * specified database. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] a The first item to compare + * @param[in] b The second item to compare + * @return < 0 if a < b, 0 if a == b, > 0 if a > b + */ +int mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); + + /** @brief Compare two data items according to a particular database. + * + * This returns a comparison as if the two items were data items of + * the specified database. The database must have the #MDB_DUPSORT flag. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] a The first item to compare + * @param[in] b The second item to compare + * @return < 0 if a < b, 0 if a == b, > 0 if a > b + */ +int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); + + /** @brief A callback function used to print a message from the library. + * + * @param[in] msg The string to be printed. + * @param[in] ctx An arbitrary context pointer for the callback. + * @return < 0 on failure, >= 0 on success. + */ +typedef int (MDB_msg_func)(const char *msg, void *ctx); + + /** @brief Dump the entries in the reader lock table. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] func A #MDB_msg_func function + * @param[in] ctx Anything the message function needs + * @return < 0 on failure, >= 0 on success. + */ +int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx); + + /** @brief Check for stale entries in the reader lock table. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] dead Number of stale slots that were cleared + * @return 0 on success, non-zero on failure. + */ +int mdb_reader_check(MDB_env *env, int *dead); +/** @} */ + +#ifdef __cplusplus +} +#endif +/** @page tools LMDB Command Line Tools + The following describes the command line tools that are available for LMDB. + \li \ref mdb_copy_1 + \li \ref mdb_dump_1 + \li \ref mdb_load_1 + \li \ref mdb_stat_1 +*/ + +#endif /* _LMDB_H_ */ diff --git a/external/db_drivers/liblmdb32/mdb.c b/external/db_drivers/liblmdb32/mdb.c new file mode 100644 index 000000000..63fece3d7 --- /dev/null +++ b/external/db_drivers/liblmdb32/mdb.c @@ -0,0 +1,9572 @@ +/** @file mdb.c + * @brief Lightning memory-mapped database library + * + * A Btree-based database management library modeled loosely on the + * BerkeleyDB API, but much simplified. + */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + * + * This code is derived from btree.c written by Martin Hedenfalk. + * + * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +#ifdef _WIN32 +#include <malloc.h> +#include <windows.h> +/** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it + * as int64 which is wrong. MSVC doesn't define it at all, so just + * don't use it. + */ +#define MDB_PID_T int +#define MDB_THR_T DWORD +#include <sys/types.h> +#include <sys/stat.h> +#ifdef __GNUC__ +# include <sys/param.h> +#else +# define LITTLE_ENDIAN 1234 +# define BIG_ENDIAN 4321 +# define BYTE_ORDER LITTLE_ENDIAN +# ifndef SSIZE_MAX +# define SSIZE_MAX INT_MAX +# endif +#endif +#else +#include <sys/types.h> +#include <sys/stat.h> +#define MDB_PID_T pid_t +#define MDB_THR_T pthread_t +#include <sys/param.h> +#include <sys/uio.h> +#include <sys/mman.h> +#ifdef HAVE_SYS_FILE_H +#include <sys/file.h> +#endif +#include <fcntl.h> +#endif + +#if defined(__mips) && defined(__linux) +/* MIPS has cache coherency issues, requires explicit cache control */ +#include <asm/cachectl.h> +extern int cacheflush(char *addr, int nbytes, int cache); +#define CACHEFLUSH(addr, bytes, cache) cacheflush(addr, bytes, cache) +#else +#define CACHEFLUSH(addr, bytes, cache) +#endif + + +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#if defined(__sun) || defined(ANDROID) +/* Most platforms have posix_memalign, older may only have memalign */ +#define HAVE_MEMALIGN 1 +#include <malloc.h> +#endif + +#if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER)) +#include <netinet/in.h> +#include <resolv.h> /* defines BYTE_ORDER on HPUX and Solaris */ +#endif + +#if defined(__APPLE__) || defined (BSD) +# define MDB_USE_POSIX_SEM 1 +# define MDB_FDATASYNC fsync +#elif defined(ANDROID) +# define MDB_FDATASYNC fsync +#endif + +#ifndef _WIN32 +#include <pthread.h> +#ifdef MDB_USE_POSIX_SEM +# define MDB_USE_HASH 1 +#include <semaphore.h> +#endif +#endif + +#ifdef USE_VALGRIND +#include <valgrind/memcheck.h> +#define VGMEMP_CREATE(h,r,z) VALGRIND_CREATE_MEMPOOL(h,r,z) +#define VGMEMP_ALLOC(h,a,s) VALGRIND_MEMPOOL_ALLOC(h,a,s) +#define VGMEMP_FREE(h,a) VALGRIND_MEMPOOL_FREE(h,a) +#define VGMEMP_DESTROY(h) VALGRIND_DESTROY_MEMPOOL(h) +#define VGMEMP_DEFINED(a,s) VALGRIND_MAKE_MEM_DEFINED(a,s) +#else +#define VGMEMP_CREATE(h,r,z) +#define VGMEMP_ALLOC(h,a,s) +#define VGMEMP_FREE(h,a) +#define VGMEMP_DESTROY(h) +#define VGMEMP_DEFINED(a,s) +#endif + +#ifndef BYTE_ORDER +# if (defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)) +/* Solaris just defines one or the other */ +# define LITTLE_ENDIAN 1234 +# define BIG_ENDIAN 4321 +# ifdef _LITTLE_ENDIAN +# define BYTE_ORDER LITTLE_ENDIAN +# else +# define BYTE_ORDER BIG_ENDIAN +# endif +# else +# define BYTE_ORDER __BYTE_ORDER +# endif +#endif + +#ifndef LITTLE_ENDIAN +#define LITTLE_ENDIAN __LITTLE_ENDIAN +#endif +#ifndef BIG_ENDIAN +#define BIG_ENDIAN __BIG_ENDIAN +#endif + +#if defined(__i386) || defined(__x86_64) || defined(_M_IX86) +#define MISALIGNED_OK 1 +#endif + +#include "lmdb.h" +#include "midl.h" + +#if (BYTE_ORDER == LITTLE_ENDIAN) == (BYTE_ORDER == BIG_ENDIAN) +# error "Unknown or unsupported endianness (BYTE_ORDER)" +#elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF +# error "Two's complement, reasonably sized integer types, please" +#endif + +#ifdef __GNUC__ +/** Put infrequently used env functions in separate section */ +# ifdef __APPLE__ +# define ESECT __attribute__ ((section("__TEXT,text_env"))) +# else +# define ESECT __attribute__ ((section("text_env"))) +# endif +#else +#define ESECT +#endif + +/** @defgroup internal LMDB Internals + * @{ + */ +/** @defgroup compat Compatibility Macros + * A bunch of macros to minimize the amount of platform-specific ifdefs + * needed throughout the rest of the code. When the features this library + * needs are similar enough to POSIX to be hidden in a one-or-two line + * replacement, this macro approach is used. + * @{ + */ + + /** Features under development */ +#ifndef MDB_DEVEL +#define MDB_DEVEL 0 +#endif + + /** Wrapper around __func__, which is a C99 feature */ +#if __STDC_VERSION__ >= 199901L +# define mdb_func_ __func__ +#elif __GNUC__ >= 2 || _MSC_VER >= 1300 +# define mdb_func_ __FUNCTION__ +#else +/* If a debug message says <mdb_unknown>(), update the #if statements above */ +# define mdb_func_ "<mdb_unknown>" +#endif + +#ifdef _WIN32 +#define MDB_USE_HASH 1 +#define MDB_PIDLOCK 0 +#define THREAD_RET DWORD +#define pthread_t HANDLE +#define pthread_mutex_t HANDLE +#define pthread_cond_t HANDLE +#define pthread_key_t DWORD +#define pthread_self() GetCurrentThreadId() +#define pthread_key_create(x,y) \ + ((*(x) = TlsAlloc()) == TLS_OUT_OF_INDEXES ? ErrCode() : 0) +#define pthread_key_delete(x) TlsFree(x) +#define pthread_getspecific(x) TlsGetValue(x) +#define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode()) +#define pthread_mutex_unlock(x) ReleaseMutex(*x) +#define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE) +#define pthread_cond_signal(x) SetEvent(*x) +#define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0) +#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL) +#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE) +#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_rmutex) +#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_rmutex) +#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_wmutex) +#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_wmutex) +#define getpid() GetCurrentProcessId() +#define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd)) +#define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len)) +#define ErrCode() GetLastError() +#define GET_PAGESIZE(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;} +#define close(fd) (CloseHandle(fd) ? 0 : -1) +#define munmap(ptr,len) UnmapViewOfFile(ptr) +#ifdef PROCESS_QUERY_LIMITED_INFORMATION +#define MDB_PROCESS_QUERY_LIMITED_INFORMATION PROCESS_QUERY_LIMITED_INFORMATION +#else +#define MDB_PROCESS_QUERY_LIMITED_INFORMATION 0x1000 +#endif +#define Z "I" +#else +#define THREAD_RET void * +#define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg) +#define THREAD_FINISH(thr) pthread_join(thr,NULL) +#define Z "z" /**< printf format modifier for size_t */ + + /** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */ +#define MDB_PIDLOCK 1 + +#ifdef MDB_USE_POSIX_SEM + +#define LOCK_MUTEX_R(env) mdb_sem_wait((env)->me_rmutex) +#define UNLOCK_MUTEX_R(env) sem_post((env)->me_rmutex) +#define LOCK_MUTEX_W(env) mdb_sem_wait((env)->me_wmutex) +#define UNLOCK_MUTEX_W(env) sem_post((env)->me_wmutex) + +static int +mdb_sem_wait(sem_t *sem) +{ + int rc; + while ((rc = sem_wait(sem)) && (rc = errno) == EINTR) ; + return rc; +} + +#else + /** Lock the reader mutex. + */ +#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex) + /** Unlock the reader mutex. + */ +#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_txns->mti_mutex) + + /** Lock the writer mutex. + * Only a single write transaction is allowed at a time. Other writers + * will block waiting for this mutex. + */ +#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_txns->mti_wmutex) + /** Unlock the writer mutex. + */ +#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_txns->mti_wmutex) +#endif /* MDB_USE_POSIX_SEM */ + + /** Get the error code for the last failed system function. + */ +#define ErrCode() errno + + /** An abstraction for a file handle. + * On POSIX systems file handles are small integers. On Windows + * they're opaque pointers. + */ +#define HANDLE int + + /** A value for an invalid file handle. + * Mainly used to initialize file variables and signify that they are + * unused. + */ +#define INVALID_HANDLE_VALUE (-1) + + /** Get the size of a memory page for the system. + * This is the basic size that the platform's memory manager uses, and is + * fundamental to the use of memory-mapped files. + */ +#define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE)) +#endif + +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) +#define MNAME_LEN 32 +#else +#define MNAME_LEN (sizeof(pthread_mutex_t)) +#endif + +/** @} */ + +#ifndef _WIN32 +/** A flag for opening a file and requesting synchronous data writes. + * This is only used when writing a meta page. It's not strictly needed; + * we could just do a normal write and then immediately perform a flush. + * But if this flag is available it saves us an extra system call. + * + * @note If O_DSYNC is undefined but exists in /usr/include, + * preferably set some compiler flag to get the definition. + * Otherwise compile with the less efficient -DMDB_DSYNC=O_SYNC. + */ +#ifndef MDB_DSYNC +# define MDB_DSYNC O_DSYNC +#endif +#endif + +/** Function for flushing the data of a file. Define this to fsync + * if fdatasync() is not supported. + */ +#ifndef MDB_FDATASYNC +# define MDB_FDATASYNC fdatasync +#endif + +#ifndef MDB_MSYNC +# define MDB_MSYNC(addr,len,flags) msync(addr,len,flags) +#endif + +#ifndef MS_SYNC +#define MS_SYNC 1 +#endif + +#ifndef MS_ASYNC +#define MS_ASYNC 0 +#endif + + /** A page number in the database. + * Note that 64 bit page numbers are overkill, since pages themselves + * already represent 12-13 bits of addressable memory, and the OS will + * always limit applications to a maximum of 63 bits of address space. + * + * @note In the #MDB_node structure, we only store 48 bits of this value, + * which thus limits us to only 60 bits of addressable data. + */ +typedef MDB_ID pgno_t; + + /** A transaction ID. + * See struct MDB_txn.mt_txnid for details. + */ +typedef MDB_ID txnid_t; + +/** @defgroup debug Debug Macros + * @{ + */ +#ifndef MDB_DEBUG + /** Enable debug output. Needs variable argument macros (a C99 feature). + * Set this to 1 for copious tracing. Set to 2 to add dumps of all IDLs + * read from and written to the database (used for free space management). + */ +#define MDB_DEBUG 0 +#endif + +#if MDB_DEBUG +static int mdb_debug; +static txnid_t mdb_debug_start; + + /** Print a debug message with printf formatting. + * Requires double parenthesis around 2 or more args. + */ +# define DPRINTF(args) ((void) ((mdb_debug) && DPRINTF0 args)) +# define DPRINTF0(fmt, ...) \ + fprintf(stderr, "%s:%d " fmt "\n", mdb_func_, __LINE__, __VA_ARGS__) +#else +# define DPRINTF(args) ((void) 0) +#endif + /** Print a debug string. + * The string is printed literally, with no format processing. + */ +#define DPUTS(arg) DPRINTF(("%s", arg)) + /** Debuging output value of a cursor DBI: Negative in a sub-cursor. */ +#define DDBI(mc) \ + (((mc)->mc_flags & C_SUB) ? -(int)(mc)->mc_dbi : (int)(mc)->mc_dbi) +/** @} */ + + /** @brief The maximum size of a database page. + * + * It is 32k or 64k, since value-PAGEBASE must fit in + * #MDB_page.%mp_upper. + * + * LMDB will use database pages < OS pages if needed. + * That causes more I/O in write transactions: The OS must + * know (read) the whole page before writing a partial page. + * + * Note that we don't currently support Huge pages. On Linux, + * regular data files cannot use Huge pages, and in general + * Huge pages aren't actually pageable. We rely on the OS + * demand-pager to read our data and page it out when memory + * pressure from other processes is high. So until OSs have + * actual paging support for Huge pages, they're not viable. + */ +#define MAX_PAGESIZE (PAGEBASE ? 0x10000 : 0x8000) + + /** The minimum number of keys required in a database page. + * Setting this to a larger value will place a smaller bound on the + * maximum size of a data item. Data items larger than this size will + * be pushed into overflow pages instead of being stored directly in + * the B-tree node. This value used to default to 4. With a page size + * of 4096 bytes that meant that any item larger than 1024 bytes would + * go into an overflow page. That also meant that on average 2-3KB of + * each overflow page was wasted space. The value cannot be lower than + * 2 because then there would no longer be a tree structure. With this + * value, items larger than 2KB will go into overflow pages, and on + * average only 1KB will be wasted. + */ +#define MDB_MINKEYS 2 + + /** A stamp that identifies a file as an LMDB file. + * There's nothing special about this value other than that it is easily + * recognizable, and it will reflect any byte order mismatches. + */ +#define MDB_MAGIC 0xBEEFC0DE + + /** The version number for a database's datafile format. */ +#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1) + /** The version number for a database's lockfile format. */ +#define MDB_LOCK_VERSION 1 + + /** @brief The max size of a key we can write, or 0 for dynamic max. + * + * Define this as 0 to compute the max from the page size. 511 + * is default for backwards compat: liblmdb <= 0.9.10 can break + * when modifying a DB with keys/dupsort data bigger than its max. + * #MDB_DEVEL sets the default to 0. + * + * Data items in an #MDB_DUPSORT database are also limited to + * this size, since they're actually keys of a sub-DB. Keys and + * #MDB_DUPSORT data items must fit on a node in a regular page. + */ +#ifndef MDB_MAXKEYSIZE +#define MDB_MAXKEYSIZE ((MDB_DEVEL) ? 0 : 511) +#endif + + /** The maximum size of a key we can write to the environment. */ +#if MDB_MAXKEYSIZE +#define ENV_MAXKEY(env) (MDB_MAXKEYSIZE) +#else +#define ENV_MAXKEY(env) ((env)->me_maxkey) +#endif + + /** @brief The maximum size of a data item. + * + * We only store a 32 bit value for node sizes. + */ +#define MAXDATASIZE 0xffffffffUL + +#if MDB_DEBUG + /** Key size which fits in a #DKBUF. + * @ingroup debug + */ +#define DKBUF_MAXKEYSIZE ((MDB_MAXKEYSIZE) > 0 ? (MDB_MAXKEYSIZE) : 511) + /** A key buffer. + * @ingroup debug + * This is used for printing a hex dump of a key's contents. + */ +#define DKBUF char kbuf[DKBUF_MAXKEYSIZE*2+1] + /** Display a key in hex. + * @ingroup debug + * Invoke a function to display a key in hex. + */ +#define DKEY(x) mdb_dkey(x, kbuf) +#else +#define DKBUF +#define DKEY(x) 0 +#endif + + /** An invalid page number. + * Mainly used to denote an empty tree. + */ +#define P_INVALID (~(pgno_t)0) + + /** Test if the flags \b f are set in a flag word \b w. */ +#define F_ISSET(w, f) (((w) & (f)) == (f)) + + /** Round \b n up to an even number. */ +#define EVEN(n) (((n) + 1U) & -2) /* sign-extending -2 to match n+1U */ + + /** Used for offsets within a single page. + * Since memory pages are typically 4 or 8KB in size, 12-13 bits, + * this is plenty. + */ +typedef uint16_t indx_t; + + /** Default size of memory map. + * This is certainly too small for any actual applications. Apps should always set + * the size explicitly using #mdb_env_set_mapsize(). + */ +#define DEFAULT_MAPSIZE 1048576 + +/** @defgroup readers Reader Lock Table + * Readers don't acquire any locks for their data access. Instead, they + * simply record their transaction ID in the reader table. The reader + * mutex is needed just to find an empty slot in the reader table. The + * slot's address is saved in thread-specific data so that subsequent read + * transactions started by the same thread need no further locking to proceed. + * + * If #MDB_NOTLS is set, the slot address is not saved in thread-specific data. + * + * No reader table is used if the database is on a read-only filesystem, or + * if #MDB_NOLOCK is set. + * + * Since the database uses multi-version concurrency control, readers don't + * actually need any locking. This table is used to keep track of which + * readers are using data from which old transactions, so that we'll know + * when a particular old transaction is no longer in use. Old transactions + * that have discarded any data pages can then have those pages reclaimed + * for use by a later write transaction. + * + * The lock table is constructed such that reader slots are aligned with the + * processor's cache line size. Any slot is only ever used by one thread. + * This alignment guarantees that there will be no contention or cache + * thrashing as threads update their own slot info, and also eliminates + * any need for locking when accessing a slot. + * + * A writer thread will scan every slot in the table to determine the oldest + * outstanding reader transaction. Any freed pages older than this will be + * reclaimed by the writer. The writer doesn't use any locks when scanning + * this table. This means that there's no guarantee that the writer will + * see the most up-to-date reader info, but that's not required for correct + * operation - all we need is to know the upper bound on the oldest reader, + * we don't care at all about the newest reader. So the only consequence of + * reading stale information here is that old pages might hang around a + * while longer before being reclaimed. That's actually good anyway, because + * the longer we delay reclaiming old pages, the more likely it is that a + * string of contiguous pages can be found after coalescing old pages from + * many old transactions together. + * @{ + */ + /** Number of slots in the reader table. + * This value was chosen somewhat arbitrarily. 126 readers plus a + * couple mutexes fit exactly into 8KB on my development machine. + * Applications should set the table size using #mdb_env_set_maxreaders(). + */ +#define DEFAULT_READERS 126 + + /** The size of a CPU cache line in bytes. We want our lock structures + * aligned to this size to avoid false cache line sharing in the + * lock table. + * This value works for most CPUs. For Itanium this should be 128. + */ +#ifndef CACHELINE +#define CACHELINE 64 +#endif + + /** The information we store in a single slot of the reader table. + * In addition to a transaction ID, we also record the process and + * thread ID that owns a slot, so that we can detect stale information, + * e.g. threads or processes that went away without cleaning up. + * @note We currently don't check for stale records. We simply re-init + * the table when we know that we're the only process opening the + * lock file. + */ +typedef struct MDB_rxbody { + /** Current Transaction ID when this transaction began, or (txnid_t)-1. + * Multiple readers that start at the same time will probably have the + * same ID here. Again, it's not important to exclude them from + * anything; all we need to know is which version of the DB they + * started from so we can avoid overwriting any data used in that + * particular version. + */ + txnid_t mrb_txnid; + /** The process ID of the process owning this reader txn. */ + MDB_PID_T mrb_pid; + /** The thread ID of the thread owning this txn. */ + MDB_THR_T mrb_tid; +} MDB_rxbody; + + /** The actual reader record, with cacheline padding. */ +typedef struct MDB_reader { + union { + MDB_rxbody mrx; + /** shorthand for mrb_txnid */ +#define mr_txnid mru.mrx.mrb_txnid +#define mr_pid mru.mrx.mrb_pid +#define mr_tid mru.mrx.mrb_tid + /** cache line alignment */ + char pad[(sizeof(MDB_rxbody)+CACHELINE-1) & ~(CACHELINE-1)]; + } mru; +} MDB_reader; + + /** The header for the reader table. + * The table resides in a memory-mapped file. (This is a different file + * than is used for the main database.) + * + * For POSIX the actual mutexes reside in the shared memory of this + * mapped file. On Windows, mutexes are named objects allocated by the + * kernel; we store the mutex names in this mapped file so that other + * processes can grab them. This same approach is also used on + * MacOSX/Darwin (using named semaphores) since MacOSX doesn't support + * process-shared POSIX mutexes. For these cases where a named object + * is used, the object name is derived from a 64 bit FNV hash of the + * environment pathname. As such, naming collisions are extremely + * unlikely. If a collision occurs, the results are unpredictable. + */ +typedef struct MDB_txbody { + /** Stamp identifying this as an LMDB file. It must be set + * to #MDB_MAGIC. */ + uint32_t mtb_magic; + /** Format of this lock file. Must be set to #MDB_LOCK_FORMAT. */ + uint32_t mtb_format; +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) + char mtb_rmname[MNAME_LEN]; +#else + /** Mutex protecting access to this table. + * This is the reader lock that #LOCK_MUTEX_R acquires. + */ + pthread_mutex_t mtb_mutex; +#endif + /** The ID of the last transaction committed to the database. + * This is recorded here only for convenience; the value can always + * be determined by reading the main database meta pages. + */ + txnid_t mtb_txnid; + /** The number of slots that have been used in the reader table. + * This always records the maximum count, it is not decremented + * when readers release their slots. + */ + unsigned mtb_numreaders; +} MDB_txbody; + + /** The actual reader table definition. */ +typedef struct MDB_txninfo { + union { + MDB_txbody mtb; +#define mti_magic mt1.mtb.mtb_magic +#define mti_format mt1.mtb.mtb_format +#define mti_mutex mt1.mtb.mtb_mutex +#define mti_rmname mt1.mtb.mtb_rmname +#define mti_txnid mt1.mtb.mtb_txnid +#define mti_numreaders mt1.mtb.mtb_numreaders + char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)]; + } mt1; + union { +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) + char mt2_wmname[MNAME_LEN]; +#define mti_wmname mt2.mt2_wmname +#else + pthread_mutex_t mt2_wmutex; +#define mti_wmutex mt2.mt2_wmutex +#endif + char pad[(MNAME_LEN+CACHELINE-1) & ~(CACHELINE-1)]; + } mt2; + MDB_reader mti_readers[1]; +} MDB_txninfo; + + /** Lockfile format signature: version, features and field layout */ +#define MDB_LOCK_FORMAT \ + ((uint32_t) \ + ((MDB_LOCK_VERSION) \ + /* Flags which describe functionality */ \ + + (((MDB_PIDLOCK) != 0) << 16))) +/** @} */ + +/** Common header for all page types. + * Overflow records occupy a number of contiguous pages with no + * headers on any page after the first. + */ +typedef struct MDB_page { +#define mp_pgno mp_p.p_pgno +#define mp_next mp_p.p_next + union { + pgno_t p_pgno; /**< page number */ + struct MDB_page *p_next; /**< for in-memory list of freed pages */ + } mp_p; + uint16_t mp_pad; +/** @defgroup mdb_page Page Flags + * @ingroup internal + * Flags for the page headers. + * @{ + */ +#define P_BRANCH 0x01 /**< branch page */ +#define P_LEAF 0x02 /**< leaf page */ +#define P_OVERFLOW 0x04 /**< overflow page */ +#define P_META 0x08 /**< meta page */ +#define P_DIRTY 0x10 /**< dirty page, also set for #P_SUBP pages */ +#define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */ +#define P_SUBP 0x40 /**< for #MDB_DUPSORT sub-pages */ +#define P_LOOSE 0x4000 /**< page was dirtied then freed, can be reused */ +#define P_KEEP 0x8000 /**< leave this page alone during spill */ +/** @} */ + uint16_t mp_flags; /**< @ref mdb_page */ +#define mp_lower mp_pb.pb.pb_lower +#define mp_upper mp_pb.pb.pb_upper +#define mp_pages mp_pb.pb_pages + union { + struct { + indx_t pb_lower; /**< lower bound of free space */ + indx_t pb_upper; /**< upper bound of free space */ + } pb; + uint32_t pb_pages; /**< number of overflow pages */ + } mp_pb; + indx_t mp_ptrs[1]; /**< dynamic size */ +} MDB_page; + + /** Size of the page header, excluding dynamic data at the end */ +#define PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs)) + + /** Address of first usable data byte in a page, after the header */ +#define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ)) + + /** ITS#7713, change PAGEBASE to handle 65536 byte pages */ +#define PAGEBASE ((MDB_DEVEL) ? PAGEHDRSZ : 0) + + /** Number of nodes on a page */ +#define NUMKEYS(p) (((p)->mp_lower - (PAGEHDRSZ-PAGEBASE)) >> 1) + + /** The amount of space remaining in the page */ +#define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower) + + /** The percentage of space used in the page, in tenths of a percent. */ +#define PAGEFILL(env, p) (1000L * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \ + ((env)->me_psize - PAGEHDRSZ)) + /** The minimum page fill factor, in tenths of a percent. + * Pages emptier than this are candidates for merging. + */ +#define FILL_THRESHOLD 250 + + /** Test if a page is a leaf page */ +#define IS_LEAF(p) F_ISSET((p)->mp_flags, P_LEAF) + /** Test if a page is a LEAF2 page */ +#define IS_LEAF2(p) F_ISSET((p)->mp_flags, P_LEAF2) + /** Test if a page is a branch page */ +#define IS_BRANCH(p) F_ISSET((p)->mp_flags, P_BRANCH) + /** Test if a page is an overflow page */ +#define IS_OVERFLOW(p) F_ISSET((p)->mp_flags, P_OVERFLOW) + /** Test if a page is a sub page */ +#define IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP) + + /** The number of overflow pages needed to store the given size. */ +#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) + + /** Link in #MDB_txn.%mt_loose_pgs list */ +#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2)) + + /** Header for a single key/data pair within a page. + * Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2. + * We guarantee 2-byte alignment for 'MDB_node's. + */ +typedef struct MDB_node { + /** lo and hi are used for data size on leaf nodes and for + * child pgno on branch nodes. On 64 bit platforms, flags + * is also used for pgno. (Branch nodes have no flags). + * They are in host byte order in case that lets some + * accesses be optimized into a 32-bit word access. + */ +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned short mn_lo, mn_hi; /**< part of data size or pgno */ +#else + unsigned short mn_hi, mn_lo; +#endif +/** @defgroup mdb_node Node Flags + * @ingroup internal + * Flags for node headers. + * @{ + */ +#define F_BIGDATA 0x01 /**< data put on overflow page */ +#define F_SUBDATA 0x02 /**< data is a sub-database */ +#define F_DUPDATA 0x04 /**< data has duplicates */ + +/** valid flags for #mdb_node_add() */ +#define NODE_ADD_FLAGS (F_DUPDATA|F_SUBDATA|MDB_RESERVE|MDB_APPEND) + +/** @} */ + unsigned short mn_flags; /**< @ref mdb_node */ + unsigned short mn_ksize; /**< key size */ + char mn_data[1]; /**< key and data are appended here */ +} MDB_node; + + /** Size of the node header, excluding dynamic data at the end */ +#define NODESIZE offsetof(MDB_node, mn_data) + + /** Bit position of top word in page number, for shifting mn_flags */ +#define PGNO_TOPWORD ((pgno_t)-1 > 0xffffffffu ? 32 : 0) + + /** Size of a node in a branch page with a given key. + * This is just the node header plus the key, there is no data. + */ +#define INDXSIZE(k) (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size)) + + /** Size of a node in a leaf page with a given key and data. + * This is node header plus key plus data size. + */ +#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size) + + /** Address of node \b i in page \b p */ +#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] + PAGEBASE)) + + /** Address of the key for the node */ +#define NODEKEY(node) (void *)((node)->mn_data) + + /** Address of the data for a node */ +#define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize) + + /** Get the page number pointed to by a branch node */ +#define NODEPGNO(node) \ + ((node)->mn_lo | ((pgno_t) (node)->mn_hi << 16) | \ + (PGNO_TOPWORD ? ((pgno_t) (node)->mn_flags << PGNO_TOPWORD) : 0)) + /** Set the page number in a branch node */ +#define SETPGNO(node,pgno) do { \ + (node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16; \ + if (PGNO_TOPWORD) (node)->mn_flags = (pgno) >> PGNO_TOPWORD; } while(0) + + /** Get the size of the data in a leaf node */ +#define NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16)) + /** Set the size of the data for a leaf node */ +#define SETDSZ(node,size) do { \ + (node)->mn_lo = (size) & 0xffff; (node)->mn_hi = (size) >> 16;} while(0) + /** The size of a key in a node */ +#define NODEKSZ(node) ((node)->mn_ksize) + + /** Copy a page number from src to dst */ +#ifdef MISALIGNED_OK +#define COPY_PGNO(dst,src) dst = src +#else +#if SIZE_MAX > 4294967295UL +#define COPY_PGNO(dst,src) do { \ + unsigned short *s, *d; \ + s = (unsigned short *)&(src); \ + d = (unsigned short *)&(dst); \ + *d++ = *s++; \ + *d++ = *s++; \ + *d++ = *s++; \ + *d = *s; \ +} while (0) +#else +#define COPY_PGNO(dst,src) do { \ + unsigned short *s, *d; \ + s = (unsigned short *)&(src); \ + d = (unsigned short *)&(dst); \ + *d++ = *s++; \ + *d = *s; \ +} while (0) +#endif +#endif + /** The address of a key in a LEAF2 page. + * LEAF2 pages are used for #MDB_DUPFIXED sorted-duplicate sub-DBs. + * There are no node headers, keys are stored contiguously. + */ +#define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks))) + + /** Set the \b node's key into \b keyptr, if requested. */ +#define MDB_GET_KEY(node, keyptr) { if ((keyptr) != NULL) { \ + (keyptr)->mv_size = NODEKSZ(node); (keyptr)->mv_data = NODEKEY(node); } } + + /** Set the \b node's key into \b key. */ +#define MDB_GET_KEY2(node, key) { key.mv_size = NODEKSZ(node); key.mv_data = NODEKEY(node); } + + /** Information about a single database in the environment. */ +typedef struct MDB_db { + uint32_t md_pad; /**< also ksize for LEAF2 pages */ + uint16_t md_flags; /**< @ref mdb_dbi_open */ + uint16_t md_depth; /**< depth of this tree */ + pgno_t md_branch_pages; /**< number of internal pages */ + pgno_t md_leaf_pages; /**< number of leaf pages */ + pgno_t md_overflow_pages; /**< number of overflow pages */ + size_t md_entries; /**< number of data items */ + pgno_t md_root; /**< the root page of this tree */ +} MDB_db; + + /** mdb_dbi_open flags */ +#define MDB_VALID 0x8000 /**< DB handle is valid, for me_dbflags */ +#define PERSISTENT_FLAGS (0xffff & ~(MDB_VALID)) +#define VALID_FLAGS (MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|\ + MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE) + + /** Handle for the DB used to track free pages. */ +#define FREE_DBI 0 + /** Handle for the default DB. */ +#define MAIN_DBI 1 + + /** Meta page content. + * A meta page is the start point for accessing a database snapshot. + * Pages 0-1 are meta pages. Transaction N writes meta page #(N % 2). + */ +typedef struct MDB_meta { + /** Stamp identifying this as an LMDB file. It must be set + * to #MDB_MAGIC. */ + uint32_t mm_magic; + /** Version number of this lock file. Must be set to #MDB_DATA_VERSION. */ + uint32_t mm_version; + void *mm_address; /**< address for fixed mapping */ + size_t mm_mapsize; /**< size of mmap region */ + MDB_db mm_dbs[2]; /**< first is free space, 2nd is main db */ + /** The size of pages used in this DB */ +#define mm_psize mm_dbs[0].md_pad + /** Any persistent environment flags. @ref mdb_env */ +#define mm_flags mm_dbs[0].md_flags + pgno_t mm_last_pg; /**< last used page in file */ + txnid_t mm_txnid; /**< txnid that committed this page */ +} MDB_meta; + + /** Buffer for a stack-allocated meta page. + * The members define size and alignment, and silence type + * aliasing warnings. They are not used directly; that could + * mean incorrectly using several union members in parallel. + */ +typedef union MDB_metabuf { + MDB_page mb_page; + struct { + char mm_pad[PAGEHDRSZ]; + MDB_meta mm_meta; + } mb_metabuf; +} MDB_metabuf; + + /** Auxiliary DB info. + * The information here is mostly static/read-only. There is + * only a single copy of this record in the environment. + */ +typedef struct MDB_dbx { + MDB_val md_name; /**< name of the database */ + MDB_cmp_func *md_cmp; /**< function for comparing keys */ + MDB_cmp_func *md_dcmp; /**< function for comparing data items */ + MDB_rel_func *md_rel; /**< user relocate function */ + void *md_relctx; /**< user-provided context for md_rel */ +} MDB_dbx; + + /** A database transaction. + * Every operation requires a transaction handle. + */ +struct MDB_txn { + MDB_txn *mt_parent; /**< parent of a nested txn */ + MDB_txn *mt_child; /**< nested txn under this txn */ + pgno_t mt_next_pgno; /**< next unallocated page */ + /** The ID of this transaction. IDs are integers incrementing from 1. + * Only committed write transactions increment the ID. If a transaction + * aborts, the ID may be re-used by the next writer. + */ + txnid_t mt_txnid; + MDB_env *mt_env; /**< the DB environment */ + /** The list of pages that became unused during this transaction. + */ + MDB_IDL mt_free_pgs; + /** The list of loose pages that became unused and may be reused + * in this transaction, linked through #NEXT_LOOSE_PAGE(page). + */ + MDB_page *mt_loose_pgs; + /* #Number of loose pages (#mt_loose_pgs) */ + int mt_loose_count; + /** The sorted list of dirty pages we temporarily wrote to disk + * because the dirty list was full. page numbers in here are + * shifted left by 1, deleted slots have the LSB set. + */ + MDB_IDL mt_spill_pgs; + union { + /** For write txns: Modified pages. Sorted when not MDB_WRITEMAP. */ + MDB_ID2L dirty_list; + /** For read txns: This thread/txn's reader table slot, or NULL. */ + MDB_reader *reader; + } mt_u; + /** Array of records for each DB known in the environment. */ + MDB_dbx *mt_dbxs; + /** Array of MDB_db records for each known DB */ + MDB_db *mt_dbs; + /** Array of sequence numbers for each DB handle */ + unsigned int *mt_dbiseqs; +/** @defgroup mt_dbflag Transaction DB Flags + * @ingroup internal + * @{ + */ +#define DB_DIRTY 0x01 /**< DB was modified or is DUPSORT data */ +#define DB_STALE 0x02 /**< Named-DB record is older than txnID */ +#define DB_NEW 0x04 /**< Named-DB handle opened in this txn */ +#define DB_VALID 0x08 /**< DB handle is valid, see also #MDB_VALID */ +/** @} */ + /** In write txns, array of cursors for each DB */ + MDB_cursor **mt_cursors; + /** Array of flags for each DB */ + unsigned char *mt_dbflags; +#ifdef VL32 + /** List of read-only pages */ + MDB_ID2L mt_rpages; +#endif + /** Number of DB records in use. This number only ever increments; + * we don't decrement it when individual DB handles are closed. + */ + MDB_dbi mt_numdbs; + +/** @defgroup mdb_txn Transaction Flags + * @ingroup internal + * @{ + */ +#define MDB_TXN_RDONLY 0x01 /**< read-only transaction */ +#define MDB_TXN_ERROR 0x02 /**< txn is unusable after an error */ +#define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */ +#define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */ +/** @} */ + unsigned int mt_flags; /**< @ref mdb_txn */ + /** #dirty_list room: Array size - \#dirty pages visible to this txn. + * Includes ancestor txns' dirty pages not hidden by other txns' + * dirty/spilled pages. Thus commit(nested txn) has room to merge + * dirty_list into mt_parent after freeing hidden mt_parent pages. + */ + unsigned int mt_dirty_room; +}; + +/** Enough space for 2^32 nodes with minimum of 2 keys per node. I.e., plenty. + * At 4 keys per node, enough for 2^64 nodes, so there's probably no need to + * raise this on a 64 bit machine. + */ +#define CURSOR_STACK 32 + +struct MDB_xcursor; + + /** Cursors are used for all DB operations. + * A cursor holds a path of (page pointer, key index) from the DB + * root to a position in the DB, plus other state. #MDB_DUPSORT + * cursors include an xcursor to the current data item. Write txns + * track their cursors and keep them up to date when data moves. + * Exception: An xcursor's pointer to a #P_SUBP page can be stale. + * (A node with #F_DUPDATA but no #F_SUBDATA contains a subpage). + */ +struct MDB_cursor { + /** Next cursor on this DB in this txn */ + MDB_cursor *mc_next; + /** Backup of the original cursor if this cursor is a shadow */ + MDB_cursor *mc_backup; + /** Context used for databases with #MDB_DUPSORT, otherwise NULL */ + struct MDB_xcursor *mc_xcursor; + /** The transaction that owns this cursor */ + MDB_txn *mc_txn; + /** The database handle this cursor operates on */ + MDB_dbi mc_dbi; + /** The database record for this cursor */ + MDB_db *mc_db; + /** The database auxiliary record for this cursor */ + MDB_dbx *mc_dbx; + /** The @ref mt_dbflag for this database */ + unsigned char *mc_dbflag; + unsigned short mc_snum; /**< number of pushed pages */ + unsigned short mc_top; /**< index of top page, normally mc_snum-1 */ +/** @defgroup mdb_cursor Cursor Flags + * @ingroup internal + * Cursor state flags. + * @{ + */ +#define C_INITIALIZED 0x01 /**< cursor has been initialized and is valid */ +#define C_EOF 0x02 /**< No more data */ +#define C_SUB 0x04 /**< Cursor is a sub-cursor */ +#define C_DEL 0x08 /**< last op was a cursor_del */ +#define C_SPLITTING 0x20 /**< Cursor is in page_split */ +#define C_UNTRACK 0x40 /**< Un-track cursor when closing */ +/** @} */ + unsigned int mc_flags; /**< @ref mdb_cursor */ + MDB_page *mc_pg[CURSOR_STACK]; /**< stack of pushed pages */ + indx_t mc_ki[CURSOR_STACK]; /**< stack of page indices */ +}; + + /** Context for sorted-dup records. + * We could have gone to a fully recursive design, with arbitrarily + * deep nesting of sub-databases. But for now we only handle these + * levels - main DB, optional sub-DB, sorted-duplicate DB. + */ +typedef struct MDB_xcursor { + /** A sub-cursor for traversing the Dup DB */ + MDB_cursor mx_cursor; + /** The database record for this Dup DB */ + MDB_db mx_db; + /** The auxiliary DB record for this Dup DB */ + MDB_dbx mx_dbx; + /** The @ref mt_dbflag for this Dup DB */ + unsigned char mx_dbflag; +} MDB_xcursor; + + /** State of FreeDB old pages, stored in the MDB_env */ +typedef struct MDB_pgstate { + pgno_t *mf_pghead; /**< Reclaimed freeDB pages, or NULL before use */ + txnid_t mf_pglast; /**< ID of last used record, or 0 if !mf_pghead */ +} MDB_pgstate; + + /** The database environment. */ +struct MDB_env { + HANDLE me_fd; /**< The main data file */ + HANDLE me_lfd; /**< The lock file */ + HANDLE me_mfd; /**< just for writing the meta pages */ +#if defined(VL32) && defined(_WIN32) + HANDLE me_fmh; /**< File Mapping handle */ +#endif + /** Failed to update the meta page. Probably an I/O error. */ +#define MDB_FATAL_ERROR 0x80000000U + /** Some fields are initialized. */ +#define MDB_ENV_ACTIVE 0x20000000U + /** me_txkey is set */ +#define MDB_ENV_TXKEY 0x10000000U + uint32_t me_flags; /**< @ref mdb_env */ + unsigned int me_psize; /**< DB page size, inited from me_os_psize */ + unsigned int me_os_psize; /**< OS page size, from #GET_PAGESIZE */ + unsigned int me_maxreaders; /**< size of the reader table */ + unsigned int me_numreaders; /**< max numreaders set by this env */ + MDB_dbi me_numdbs; /**< number of DBs opened */ + MDB_dbi me_maxdbs; /**< size of the DB table */ + MDB_PID_T me_pid; /**< process ID of this env */ + char *me_path; /**< path to the DB files */ + char *me_map; /**< the memory map of the data file */ + MDB_txninfo *me_txns; /**< the memory map of the lock file or NULL */ + MDB_meta *me_metas[2]; /**< pointers to the two meta pages */ + void *me_pbuf; /**< scratch area for DUPSORT put() */ + MDB_txn *me_txn; /**< current write transaction */ + MDB_txn *me_txn0; /**< prealloc'd write transaction */ + size_t me_mapsize; /**< size of the data memory map */ + off_t me_size; /**< current file size */ + pgno_t me_maxpg; /**< me_mapsize / me_psize */ + MDB_dbx *me_dbxs; /**< array of static DB info */ + uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */ + unsigned int *me_dbiseqs; /**< array of dbi sequence numbers */ + pthread_key_t me_txkey; /**< thread-key for readers */ + txnid_t me_pgoldest; /**< ID of oldest reader last time we looked */ + MDB_pgstate me_pgstate; /**< state of old pages from freeDB */ +# define me_pglast me_pgstate.mf_pglast +# define me_pghead me_pgstate.mf_pghead + MDB_page *me_dpages; /**< list of malloc'd blocks for re-use */ + /** IDL of pages that became unused in a write txn */ + MDB_IDL me_free_pgs; + /** ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE. */ + MDB_ID2L me_dirty_list; + /** Max number of freelist items that can fit in a single overflow page */ + int me_maxfree_1pg; + /** Max size of a node on a page */ + unsigned int me_nodemax; +#if !(MDB_MAXKEYSIZE) + unsigned int me_maxkey; /**< max size of a key */ +#endif + int me_live_reader; /**< have liveness lock in reader table */ +#ifdef _WIN32 + int me_pidquery; /**< Used in OpenProcess */ + HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */ + HANDLE me_wmutex; +#elif defined(MDB_USE_POSIX_SEM) + sem_t *me_rmutex; /* Shared mutexes are not supported */ + sem_t *me_wmutex; +#endif + void *me_userctx; /**< User-settable context */ + MDB_assert_func *me_assert_func; /**< Callback for assertion failures */ +}; + + /** Nested transaction */ +typedef struct MDB_ntxn { + MDB_txn mnt_txn; /**< the transaction */ + MDB_pgstate mnt_pgstate; /**< parent transaction's saved freestate */ +} MDB_ntxn; + + /** max number of pages to commit in one writev() call */ +#define MDB_COMMIT_PAGES 64 +#if defined(IOV_MAX) && IOV_MAX < MDB_COMMIT_PAGES +#undef MDB_COMMIT_PAGES +#define MDB_COMMIT_PAGES IOV_MAX +#endif + + /** max bytes to write in one call */ +#define MAX_WRITE (0x80000000U >> (sizeof(ssize_t) == 4)) + + /** Check \b txn and \b dbi arguments to a function */ +#define TXN_DBI_EXIST(txn, dbi) \ + ((txn) && (dbi) < (txn)->mt_numdbs && ((txn)->mt_dbflags[dbi] & DB_VALID)) + + /** Check for misused \b dbi handles */ +#define TXN_DBI_CHANGED(txn, dbi) \ + ((txn)->mt_dbiseqs[dbi] != (txn)->mt_env->me_dbiseqs[dbi]) + +static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp); +static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp); +static int mdb_page_touch(MDB_cursor *mc); + +static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl); +static int mdb_page_search_root(MDB_cursor *mc, + MDB_val *key, int modify); +#define MDB_PS_MODIFY 1 +#define MDB_PS_ROOTONLY 2 +#define MDB_PS_FIRST 4 +#define MDB_PS_LAST 8 +static int mdb_page_search(MDB_cursor *mc, + MDB_val *key, int flags); +static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst); + +#define MDB_SPLIT_REPLACE MDB_APPENDDUP /**< newkey is not new */ +static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, + pgno_t newpgno, unsigned int nflags); + +static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); +static int mdb_env_pick_meta(const MDB_env *env); +static int mdb_env_write_meta(MDB_txn *txn); +#if !(defined(_WIN32) || defined(MDB_USE_POSIX_SEM)) /* Drop unused excl arg */ +# define mdb_env_close0(env, excl) mdb_env_close1(env) +#endif +static void mdb_env_close0(MDB_env *env, int excl); + +static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp); +static int mdb_node_add(MDB_cursor *mc, indx_t indx, + MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags); +static void mdb_node_del(MDB_cursor *mc, int ksize); +static void mdb_node_shrink(MDB_page *mp, indx_t indx); +static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst); +static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data); +static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data); +static size_t mdb_branch_size(MDB_env *env, MDB_val *key); + +static int mdb_rebalance(MDB_cursor *mc); +static int mdb_update_key(MDB_cursor *mc, MDB_val *key); + +static void mdb_cursor_pop(MDB_cursor *mc); +static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp); + +static int mdb_cursor_del0(MDB_cursor *mc); +static int mdb_del0(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags); +static int mdb_cursor_sibling(MDB_cursor *mc, int move_right); +static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); +static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); +static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op, + int *exactp); +static int mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data); +static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data); + +static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx); +static void mdb_xcursor_init0(MDB_cursor *mc); +static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node); + +static int mdb_drop0(MDB_cursor *mc, int subs); +static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); + +/** @cond */ +static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long; +/** @endcond */ + +#ifdef _WIN32 +static SECURITY_DESCRIPTOR mdb_null_sd; +static SECURITY_ATTRIBUTES mdb_all_sa; +static int mdb_sec_inited; +#endif + +/** Return the library version info. */ +char * +mdb_version(int *major, int *minor, int *patch) +{ + if (major) *major = MDB_VERSION_MAJOR; + if (minor) *minor = MDB_VERSION_MINOR; + if (patch) *patch = MDB_VERSION_PATCH; + return MDB_VERSION_STRING; +} + +/** Table of descriptions for LMDB @ref errors */ +static char *const mdb_errstr[] = { + "MDB_KEYEXIST: Key/data pair already exists", + "MDB_NOTFOUND: No matching key/data pair found", + "MDB_PAGE_NOTFOUND: Requested page not found", + "MDB_CORRUPTED: Located page was wrong type", + "MDB_PANIC: Update of meta page failed", + "MDB_VERSION_MISMATCH: Database environment version mismatch", + "MDB_INVALID: File is not an LMDB file", + "MDB_MAP_FULL: Environment mapsize limit reached", + "MDB_DBS_FULL: Environment maxdbs limit reached", + "MDB_READERS_FULL: Environment maxreaders limit reached", + "MDB_TLS_FULL: Thread-local storage keys full - too many environments open", + "MDB_TXN_FULL: Transaction has too many dirty pages - transaction too big", + "MDB_CURSOR_FULL: Internal error - cursor stack limit reached", + "MDB_PAGE_FULL: Internal error - page has no more space", + "MDB_MAP_RESIZED: Database contents grew beyond environment mapsize", + "MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed", + "MDB_BAD_RSLOT: Invalid reuse of reader locktable slot", + "MDB_BAD_TXN: Transaction cannot recover - it must be aborted", + "MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size", + "MDB_BAD_DBI: The specified DBI handle was closed/changed unexpectedly", +}; + +char * +mdb_strerror(int err) +{ +#ifdef _WIN32 + /** HACK: pad 4KB on stack over the buf. Return system msgs in buf. + * This works as long as no function between the call to mdb_strerror + * and the actual use of the message uses more than 4K of stack. + */ + char pad[4096]; + char buf[1024], *ptr = buf; +#endif + int i; + if (!err) + return ("Successful return: 0"); + + if (err >= MDB_KEYEXIST && err <= MDB_LAST_ERRCODE) { + i = err - MDB_KEYEXIST; + return mdb_errstr[i]; + } + +#ifdef _WIN32 + /* These are the C-runtime error codes we use. The comment indicates + * their numeric value, and the Win32 error they would correspond to + * if the error actually came from a Win32 API. A major mess, we should + * have used LMDB-specific error codes for everything. + */ + switch(err) { + case ENOENT: /* 2, FILE_NOT_FOUND */ + case EIO: /* 5, ACCESS_DENIED */ + case ENOMEM: /* 12, INVALID_ACCESS */ + case EACCES: /* 13, INVALID_DATA */ + case EBUSY: /* 16, CURRENT_DIRECTORY */ + case EINVAL: /* 22, BAD_COMMAND */ + case ENOSPC: /* 28, OUT_OF_PAPER */ + return strerror(err); + default: + ; + } + buf[0] = 0; + FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, err, 0, ptr, sizeof(buf), pad); + return ptr; +#else + return strerror(err); +#endif +} + +/** assert(3) variant in cursor context */ +#define mdb_cassert(mc, expr) mdb_assert0((mc)->mc_txn->mt_env, expr, #expr) +/** assert(3) variant in transaction context */ +#define mdb_tassert(mc, expr) mdb_assert0((txn)->mt_env, expr, #expr) +/** assert(3) variant in environment context */ +#define mdb_eassert(env, expr) mdb_assert0(env, expr, #expr) + +#ifndef NDEBUG +# define mdb_assert0(env, expr, expr_txt) ((expr) ? (void)0 : \ + mdb_assert_fail(env, expr_txt, mdb_func_, __FILE__, __LINE__)) + +static void +mdb_assert_fail(MDB_env *env, const char *expr_txt, + const char *func, const char *file, int line) +{ + char buf[400]; + sprintf(buf, "%.100s:%d: Assertion '%.200s' failed in %.40s()", + file, line, expr_txt, func); + if (env->me_assert_func) + env->me_assert_func(env, buf); + fprintf(stderr, "%s\n", buf); + abort(); +} +#else +# define mdb_assert0(env, expr, expr_txt) ((void) 0) +#endif /* NDEBUG */ + +#if MDB_DEBUG +/** Return the page number of \b mp which may be sub-page, for debug output */ +static pgno_t +mdb_dbg_pgno(MDB_page *mp) +{ + pgno_t ret; + COPY_PGNO(ret, mp->mp_pgno); + return ret; +} + +/** Display a key in hexadecimal and return the address of the result. + * @param[in] key the key to display + * @param[in] buf the buffer to write into. Should always be #DKBUF. + * @return The key in hexadecimal form. + */ +char * +mdb_dkey(MDB_val *key, char *buf) +{ + char *ptr = buf; + unsigned char *c = key->mv_data; + unsigned int i; + + if (!key) + return ""; + + if (key->mv_size > DKBUF_MAXKEYSIZE) + return "MDB_MAXKEYSIZE"; + /* may want to make this a dynamic check: if the key is mostly + * printable characters, print it as-is instead of converting to hex. + */ +#if 1 + buf[0] = '\0'; + for (i=0; i<key->mv_size; i++) + ptr += sprintf(ptr, "%02x", *c++); +#else + sprintf(buf, "%.*s", key->mv_size, key->mv_data); +#endif + return buf; +} + +static const char * +mdb_leafnode_type(MDB_node *n) +{ + static char *const tp[2][2] = {{"", ": DB"}, {": sub-page", ": sub-DB"}}; + return F_ISSET(n->mn_flags, F_BIGDATA) ? ": overflow page" : + tp[F_ISSET(n->mn_flags, F_DUPDATA)][F_ISSET(n->mn_flags, F_SUBDATA)]; +} + +/** Display all the keys in the page. */ +void +mdb_page_list(MDB_page *mp) +{ + pgno_t pgno = mdb_dbg_pgno(mp); + const char *type, *state = (mp->mp_flags & P_DIRTY) ? ", dirty" : ""; + MDB_node *node; + unsigned int i, nkeys, nsize, total = 0; + MDB_val key; + DKBUF; + + switch (mp->mp_flags & (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP)) { + case P_BRANCH: type = "Branch page"; break; + case P_LEAF: type = "Leaf page"; break; + case P_LEAF|P_SUBP: type = "Sub-page"; break; + case P_LEAF|P_LEAF2: type = "LEAF2 page"; break; + case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break; + case P_OVERFLOW: + fprintf(stderr, "Overflow page %"Z"u pages %u%s\n", + pgno, mp->mp_pages, state); + return; + case P_META: + fprintf(stderr, "Meta-page %"Z"u txnid %"Z"u\n", + pgno, ((MDB_meta *)METADATA(mp))->mm_txnid); + return; + default: + fprintf(stderr, "Bad page %"Z"u flags 0x%u\n", pgno, mp->mp_flags); + return; + } + + nkeys = NUMKEYS(mp); + fprintf(stderr, "%s %"Z"u numkeys %d%s\n", type, pgno, nkeys, state); + + for (i=0; i<nkeys; i++) { + if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */ + key.mv_size = nsize = mp->mp_pad; + key.mv_data = LEAF2KEY(mp, i, nsize); + total += nsize; + fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key)); + continue; + } + node = NODEPTR(mp, i); + key.mv_size = node->mn_ksize; + key.mv_data = node->mn_data; + nsize = NODESIZE + key.mv_size; + if (IS_BRANCH(mp)) { + fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node), + DKEY(&key)); + total += nsize; + } else { + if (F_ISSET(node->mn_flags, F_BIGDATA)) + nsize += sizeof(pgno_t); + else + nsize += NODEDSZ(node); + total += nsize; + nsize += sizeof(indx_t); + fprintf(stderr, "key %d: nsize %d, %s%s\n", + i, nsize, DKEY(&key), mdb_leafnode_type(node)); + } + total = EVEN(total); + } + fprintf(stderr, "Total: header %d + contents %d + unused %d\n", + IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + mp->mp_lower, total, SIZELEFT(mp)); +} + +void +mdb_cursor_chk(MDB_cursor *mc) +{ + unsigned int i; + MDB_node *node; + MDB_page *mp; + + if (!mc->mc_snum && !(mc->mc_flags & C_INITIALIZED)) return; + for (i=0; i<mc->mc_top; i++) { + mp = mc->mc_pg[i]; + node = NODEPTR(mp, mc->mc_ki[i]); + if (NODEPGNO(node) != mc->mc_pg[i+1]->mp_pgno) + printf("oops!\n"); + } + if (mc->mc_ki[i] >= NUMKEYS(mc->mc_pg[i])) + printf("ack!\n"); +} +#endif + +#if (MDB_DEBUG) > 2 +/** Count all the pages in each DB and in the freelist + * and make sure it matches the actual number of pages + * being used. + * All named DBs must be open for a correct count. + */ +static void mdb_audit(MDB_txn *txn) +{ + MDB_cursor mc; + MDB_val key, data; + MDB_ID freecount, count; + MDB_dbi i; + int rc; + + freecount = 0; + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) + freecount += *(MDB_ID *)data.mv_data; + mdb_tassert(txn, rc == MDB_NOTFOUND); + + count = 0; + for (i = 0; i<txn->mt_numdbs; i++) { + MDB_xcursor mx; + if (!(txn->mt_dbflags[i] & DB_VALID)) + continue; + mdb_cursor_init(&mc, txn, i, &mx); + if (txn->mt_dbs[i].md_root == P_INVALID) + continue; + count += txn->mt_dbs[i].md_branch_pages + + txn->mt_dbs[i].md_leaf_pages + + txn->mt_dbs[i].md_overflow_pages; + if (txn->mt_dbs[i].md_flags & MDB_DUPSORT) { + rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST); + for (; rc == MDB_SUCCESS; rc = mdb_cursor_sibling(&mc, 1)) { + unsigned j; + MDB_page *mp; + mp = mc.mc_pg[mc.mc_top]; + for (j=0; j<NUMKEYS(mp); j++) { + MDB_node *leaf = NODEPTR(mp, j); + if (leaf->mn_flags & F_SUBDATA) { + MDB_db db; + memcpy(&db, NODEDATA(leaf), sizeof(db)); + count += db.md_branch_pages + db.md_leaf_pages + + db.md_overflow_pages; + } + } + } + mdb_tassert(txn, rc == MDB_NOTFOUND); + } + } + if (freecount + count + 2 /* metapages */ != txn->mt_next_pgno) { + fprintf(stderr, "audit: %lu freecount: %lu count: %lu total: %lu next_pgno: %lu\n", + txn->mt_txnid, freecount, count+2, freecount+count+2, txn->mt_next_pgno); + } +} +#endif + +int +mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) +{ + return txn->mt_dbxs[dbi].md_cmp(a, b); +} + +int +mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) +{ + return txn->mt_dbxs[dbi].md_dcmp(a, b); +} + +/** Allocate memory for a page. + * Re-use old malloc'd pages first for singletons, otherwise just malloc. + */ +static MDB_page * +mdb_page_malloc(MDB_txn *txn, unsigned num) +{ + MDB_env *env = txn->mt_env; + MDB_page *ret = env->me_dpages; + size_t psize = env->me_psize, sz = psize, off; + /* For ! #MDB_NOMEMINIT, psize counts how much to init. + * For a single page alloc, we init everything after the page header. + * For multi-page, we init the final page; if the caller needed that + * many pages they will be filling in at least up to the last page. + */ + if (num == 1) { + if (ret) { + VGMEMP_ALLOC(env, ret, sz); + VGMEMP_DEFINED(ret, sizeof(ret->mp_next)); + env->me_dpages = ret->mp_next; + return ret; + } + psize -= off = PAGEHDRSZ; + } else { + sz *= num; + off = sz - psize; + } + if ((ret = malloc(sz)) != NULL) { + VGMEMP_ALLOC(env, ret, sz); + if (!(env->me_flags & MDB_NOMEMINIT)) { + memset((char *)ret + off, 0, psize); + ret->mp_pad = 0; + } + } else { + txn->mt_flags |= MDB_TXN_ERROR; + } + return ret; +} +/** Free a single page. + * Saves single pages to a list, for future reuse. + * (This is not used for multi-page overflow pages.) + */ +static void +mdb_page_free(MDB_env *env, MDB_page *mp) +{ + mp->mp_next = env->me_dpages; + VGMEMP_FREE(env, mp); + env->me_dpages = mp; +} + +/** Free a dirty page */ +static void +mdb_dpage_free(MDB_env *env, MDB_page *dp) +{ + if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) { + mdb_page_free(env, dp); + } else { + /* large pages just get freed directly */ + VGMEMP_FREE(env, dp); + free(dp); + } +} + +/** Return all dirty pages to dpage list */ +static void +mdb_dlist_free(MDB_txn *txn) +{ + MDB_env *env = txn->mt_env; + MDB_ID2L dl = txn->mt_u.dirty_list; + unsigned i, n = dl[0].mid; + + for (i = 1; i <= n; i++) { + mdb_dpage_free(env, dl[i].mptr); + } + dl[0].mid = 0; +} + +/** Loosen or free a single page. + * Saves single pages to a list for future reuse + * in this same txn. It has been pulled from the freeDB + * and already resides on the dirty list, but has been + * deleted. Use these pages first before pulling again + * from the freeDB. + * + * If the page wasn't dirtied in this txn, just add it + * to this txn's free list. + */ +static int +mdb_page_loose(MDB_cursor *mc, MDB_page *mp) +{ + int loose = 0; + pgno_t pgno = mp->mp_pgno; + MDB_txn *txn = mc->mc_txn; + + if ((mp->mp_flags & P_DIRTY) && mc->mc_dbi != FREE_DBI) { + if (txn->mt_parent) { + MDB_ID2 *dl = txn->mt_u.dirty_list; + /* If txn has a parent, make sure the page is in our + * dirty list. + */ + if (dl[0].mid) { + unsigned x = mdb_mid2l_search(dl, pgno); + if (x <= dl[0].mid && dl[x].mid == pgno) { + if (mp != dl[x].mptr) { /* bad cursor? */ + mc->mc_flags &= ~(C_INITIALIZED|C_EOF); + txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CORRUPTED; + } + /* ok, it's ours */ + loose = 1; + } + } + } else { + /* no parent txn, so it's just ours */ + loose = 1; + } + } + if (loose) { + DPRINTF(("loosen db %d page %"Z"u", DDBI(mc), + mp->mp_pgno)); + NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs; + txn->mt_loose_pgs = mp; + txn->mt_loose_count++; + mp->mp_flags |= P_LOOSE; + } else { + int rc = mdb_midl_append(&txn->mt_free_pgs, pgno); + if (rc) + return rc; + } + + return MDB_SUCCESS; +} + +/** Set or clear P_KEEP in dirty, non-overflow, non-sub pages watched by txn. + * @param[in] mc A cursor handle for the current operation. + * @param[in] pflags Flags of the pages to update: + * P_DIRTY to set P_KEEP, P_DIRTY|P_KEEP to clear it. + * @param[in] all No shortcuts. Needed except after a full #mdb_page_flush(). + * @return 0 on success, non-zero on failure. + */ +static int +mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) +{ + enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP }; + MDB_txn *txn = mc->mc_txn; + MDB_cursor *m3; + MDB_xcursor *mx; + MDB_page *dp, *mp; + MDB_node *leaf; + unsigned i, j; + int rc = MDB_SUCCESS, level; + + /* Mark pages seen by cursors */ + if (mc->mc_flags & C_UNTRACK) + mc = NULL; /* will find mc in mt_cursors */ + for (i = txn->mt_numdbs;; mc = txn->mt_cursors[--i]) { + for (; mc; mc=mc->mc_next) { + if (!(mc->mc_flags & C_INITIALIZED)) + continue; + for (m3 = mc;; m3 = &mx->mx_cursor) { + mp = NULL; + for (j=0; j<m3->mc_snum; j++) { + mp = m3->mc_pg[j]; + if ((mp->mp_flags & Mask) == pflags) + mp->mp_flags ^= P_KEEP; + } + mx = m3->mc_xcursor; + /* Proceed to mx if it is at a sub-database */ + if (! (mx && (mx->mx_cursor.mc_flags & C_INITIALIZED))) + break; + if (! (mp && (mp->mp_flags & P_LEAF))) + break; + leaf = NODEPTR(mp, m3->mc_ki[j-1]); + if (!(leaf->mn_flags & F_SUBDATA)) + break; + } + } + if (i == 0) + break; + } + + if (all) { + /* Mark dirty root pages */ + for (i=0; i<txn->mt_numdbs; i++) { + if (txn->mt_dbflags[i] & DB_DIRTY) { + pgno_t pgno = txn->mt_dbs[i].md_root; + if (pgno == P_INVALID) + continue; + if ((rc = mdb_page_get(txn, pgno, &dp, &level)) != MDB_SUCCESS) + break; + if ((dp->mp_flags & Mask) == pflags && level <= 1) + dp->mp_flags ^= P_KEEP; + } + } + } + + return rc; +} + +static int mdb_page_flush(MDB_txn *txn, int keep); + +/** Spill pages from the dirty list back to disk. + * This is intended to prevent running into #MDB_TXN_FULL situations, + * but note that they may still occur in a few cases: + * 1) our estimate of the txn size could be too small. Currently this + * seems unlikely, except with a large number of #MDB_MULTIPLE items. + * 2) child txns may run out of space if their parents dirtied a + * lot of pages and never spilled them. TODO: we probably should do + * a preemptive spill during #mdb_txn_begin() of a child txn, if + * the parent's dirty_room is below a given threshold. + * + * Otherwise, if not using nested txns, it is expected that apps will + * not run into #MDB_TXN_FULL any more. The pages are flushed to disk + * the same way as for a txn commit, e.g. their P_DIRTY flag is cleared. + * If the txn never references them again, they can be left alone. + * If the txn only reads them, they can be used without any fuss. + * If the txn writes them again, they can be dirtied immediately without + * going thru all of the work of #mdb_page_touch(). Such references are + * handled by #mdb_page_unspill(). + * + * Also note, we never spill DB root pages, nor pages of active cursors, + * because we'll need these back again soon anyway. And in nested txns, + * we can't spill a page in a child txn if it was already spilled in a + * parent txn. That would alter the parent txns' data even though + * the child hasn't committed yet, and we'd have no way to undo it if + * the child aborted. + * + * @param[in] m0 cursor A cursor handle identifying the transaction and + * database for which we are checking space. + * @param[in] key For a put operation, the key being stored. + * @param[in] data For a put operation, the data being stored. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data) +{ + MDB_txn *txn = m0->mc_txn; + MDB_page *dp; + MDB_ID2L dl = txn->mt_u.dirty_list; + unsigned int i, j, need; + int rc; + + if (m0->mc_flags & C_SUB) + return MDB_SUCCESS; + + /* Estimate how much space this op will take */ + i = m0->mc_db->md_depth; + /* Named DBs also dirty the main DB */ + if (m0->mc_dbi > MAIN_DBI) + i += txn->mt_dbs[MAIN_DBI].md_depth; + /* For puts, roughly factor in the key+data size */ + if (key) + i += (LEAFSIZE(key, data) + txn->mt_env->me_psize) / txn->mt_env->me_psize; + i += i; /* double it for good measure */ + need = i; + + if (txn->mt_dirty_room > i) + return MDB_SUCCESS; + + if (!txn->mt_spill_pgs) { + txn->mt_spill_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX); + if (!txn->mt_spill_pgs) + return ENOMEM; + } else { + /* purge deleted slots */ + MDB_IDL sl = txn->mt_spill_pgs; + unsigned int num = sl[0]; + j=0; + for (i=1; i<=num; i++) { + if (!(sl[i] & 1)) + sl[++j] = sl[i]; + } + sl[0] = j; + } + + /* Preserve pages which may soon be dirtied again */ + if ((rc = mdb_pages_xkeep(m0, P_DIRTY, 1)) != MDB_SUCCESS) + goto done; + + /* Less aggressive spill - we originally spilled the entire dirty list, + * with a few exceptions for cursor pages and DB root pages. But this + * turns out to be a lot of wasted effort because in a large txn many + * of those pages will need to be used again. So now we spill only 1/8th + * of the dirty pages. Testing revealed this to be a good tradeoff, + * better than 1/2, 1/4, or 1/10. + */ + if (need < MDB_IDL_UM_MAX / 8) + need = MDB_IDL_UM_MAX / 8; + + /* Save the page IDs of all the pages we're flushing */ + /* flush from the tail forward, this saves a lot of shifting later on. */ + for (i=dl[0].mid; i && need; i--) { + MDB_ID pn = dl[i].mid << 1; + dp = dl[i].mptr; + if (dp->mp_flags & (P_LOOSE|P_KEEP)) + continue; + /* Can't spill twice, make sure it's not already in a parent's + * spill list. + */ + if (txn->mt_parent) { + MDB_txn *tx2; + for (tx2 = txn->mt_parent; tx2; tx2 = tx2->mt_parent) { + if (tx2->mt_spill_pgs) { + j = mdb_midl_search(tx2->mt_spill_pgs, pn); + if (j <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[j] == pn) { + dp->mp_flags |= P_KEEP; + break; + } + } + } + if (tx2) + continue; + } + if ((rc = mdb_midl_append(&txn->mt_spill_pgs, pn))) + goto done; + need--; + } + mdb_midl_sort(txn->mt_spill_pgs); + + /* Flush the spilled part of dirty list */ + if ((rc = mdb_page_flush(txn, i)) != MDB_SUCCESS) + goto done; + + /* Reset any dirty pages we kept that page_flush didn't see */ + rc = mdb_pages_xkeep(m0, P_DIRTY|P_KEEP, i); + +done: + txn->mt_flags |= rc ? MDB_TXN_ERROR : MDB_TXN_SPILLS; + return rc; +} + +/** Find oldest txnid still referenced. Expects txn->mt_txnid > 0. */ +static txnid_t +mdb_find_oldest(MDB_txn *txn) +{ + int i; + txnid_t mr, oldest = txn->mt_txnid - 1; + if (txn->mt_env->me_txns) { + MDB_reader *r = txn->mt_env->me_txns->mti_readers; + for (i = txn->mt_env->me_txns->mti_numreaders; --i >= 0; ) { + if (r[i].mr_pid) { + mr = r[i].mr_txnid; + if (oldest > mr) + oldest = mr; + } + } + } + return oldest; +} + +/** Add a page to the txn's dirty list */ +static void +mdb_page_dirty(MDB_txn *txn, MDB_page *mp) +{ + MDB_ID2 mid; + int rc, (*insert)(MDB_ID2L, MDB_ID2 *); + + if (txn->mt_env->me_flags & MDB_WRITEMAP) { + insert = mdb_mid2l_append; + } else { + insert = mdb_mid2l_insert; + } + mid.mid = mp->mp_pgno; + mid.mptr = mp; + rc = insert(txn->mt_u.dirty_list, &mid); + mdb_tassert(txn, rc == 0); + txn->mt_dirty_room--; +} + +/** Allocate page numbers and memory for writing. Maintain me_pglast, + * me_pghead and mt_next_pgno. + * + * If there are free pages available from older transactions, they + * are re-used first. Otherwise allocate a new page at mt_next_pgno. + * Do not modify the freedB, just merge freeDB records into me_pghead[] + * and move me_pglast to say which records were consumed. Only this + * function can create me_pghead and move me_pglast/mt_next_pgno. + * @param[in] mc cursor A cursor handle identifying the transaction and + * database for which we are allocating. + * @param[in] num the number of pages to allocate. + * @param[out] mp Address of the allocated page(s). Requests for multiple pages + * will always be satisfied by a single contiguous chunk of memory. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) +{ +#ifdef MDB_PARANOID /* Seems like we can ignore this now */ + /* Get at most <Max_retries> more freeDB records once me_pghead + * has enough pages. If not enough, use new pages from the map. + * If <Paranoid> and mc is updating the freeDB, only get new + * records if me_pghead is empty. Then the freelist cannot play + * catch-up with itself by growing while trying to save it. + */ + enum { Paranoid = 1, Max_retries = 500 }; +#else + enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ }; +#endif + int rc, retry = num * 60; + MDB_txn *txn = mc->mc_txn; + MDB_env *env = txn->mt_env; + pgno_t pgno, *mop = env->me_pghead; + unsigned i, j, mop_len = mop ? mop[0] : 0, n2 = num-1; + MDB_page *np; + txnid_t oldest = 0, last; + MDB_cursor_op op; + MDB_cursor m2; + int found_old = 0; + + /* If there are any loose pages, just use them */ + if (num == 1 && txn->mt_loose_pgs) { + np = txn->mt_loose_pgs; + txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np); + txn->mt_loose_count--; + DPRINTF(("db %d use loose page %"Z"u", DDBI(mc), + np->mp_pgno)); + *mp = np; + return MDB_SUCCESS; + } + + *mp = NULL; + + /* If our dirty list is already full, we can't do anything */ + if (txn->mt_dirty_room == 0) { + rc = MDB_TXN_FULL; + goto fail; + } + + for (op = MDB_FIRST;; op = MDB_NEXT) { + MDB_val key, data; + MDB_node *leaf; + pgno_t *idl; + + /* Seek a big enough contiguous page range. Prefer + * pages at the tail, just truncating the list. + */ + if (mop_len > n2) { + i = mop_len; + do { + pgno = mop[i]; + if (mop[i-n2] == pgno+n2) + goto search_done; + } while (--i > n2); + if (--retry < 0) + break; + } + + if (op == MDB_FIRST) { /* 1st iteration */ + /* Prepare to fetch more and coalesce */ + last = env->me_pglast; + oldest = env->me_pgoldest; + mdb_cursor_init(&m2, txn, FREE_DBI, NULL); + if (last) { + op = MDB_SET_RANGE; + key.mv_data = &last; /* will look up last+1 */ + key.mv_size = sizeof(last); + } + if (Paranoid && mc->mc_dbi == FREE_DBI) + retry = -1; + } + if (Paranoid && retry < 0 && mop_len) + break; + + last++; + /* Do not fetch more if the record will be too recent */ + if (oldest <= last) { + if (!found_old) { + oldest = mdb_find_oldest(txn); + env->me_pgoldest = oldest; + found_old = 1; + } + if (oldest <= last) + break; + } + rc = mdb_cursor_get(&m2, &key, NULL, op); + if (rc) { + if (rc == MDB_NOTFOUND) + break; + goto fail; + } + last = *(txnid_t*)key.mv_data; + if (oldest <= last) { + if (!found_old) { + oldest = mdb_find_oldest(txn); + env->me_pgoldest = oldest; + found_old = 1; + } + if (oldest <= last) + break; + } + np = m2.mc_pg[m2.mc_top]; + leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]); + if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS) + return rc; + + idl = (MDB_ID *) data.mv_data; + i = idl[0]; + if (!mop) { + if (!(env->me_pghead = mop = mdb_midl_alloc(i))) { + rc = ENOMEM; + goto fail; + } + } else { + if ((rc = mdb_midl_need(&env->me_pghead, i)) != 0) + goto fail; + mop = env->me_pghead; + } + env->me_pglast = last; +#if (MDB_DEBUG) > 1 + DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u", + last, txn->mt_dbs[FREE_DBI].md_root, i)); + for (j = i; j; j--) + DPRINTF(("IDL %"Z"u", idl[j])); +#endif + /* Merge in descending sorted order */ + mdb_midl_xmerge(mop, idl); + mop_len = mop[0]; + } + + /* Use new pages from the map when nothing suitable in the freeDB */ + i = 0; + pgno = txn->mt_next_pgno; + if (pgno + num >= env->me_maxpg) { + DPUTS("DB size maxed out"); + rc = MDB_MAP_FULL; + goto fail; + } + +search_done: + if (env->me_flags & MDB_WRITEMAP) { + np = (MDB_page *)(env->me_map + env->me_psize * pgno); + } else { + if (!(np = mdb_page_malloc(txn, num))) { + rc = ENOMEM; + goto fail; + } + } + if (i) { + mop[0] = mop_len -= num; + /* Move any stragglers down */ + for (j = i-num; j < mop_len; ) + mop[++j] = mop[++i]; + } else { + txn->mt_next_pgno = pgno + num; + } + np->mp_pgno = pgno; + mdb_page_dirty(txn, np); + *mp = np; + + return MDB_SUCCESS; + +fail: + txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +/** Copy the used portions of a non-overflow page. + * @param[in] dst page to copy into + * @param[in] src page to copy from + * @param[in] psize size of a page + */ +static void +mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize) +{ + enum { Align = sizeof(pgno_t) }; + indx_t upper = src->mp_upper, lower = src->mp_lower, unused = upper-lower; + + /* If page isn't full, just copy the used portion. Adjust + * alignment so memcpy may copy words instead of bytes. + */ + if ((unused &= -Align) && !IS_LEAF2(src)) { + upper = (upper + PAGEBASE) & -Align; + memcpy(dst, src, (lower + PAGEBASE + (Align-1)) & -Align); + memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper), + psize - upper); + } else { + memcpy(dst, src, psize - unused); + } +} + +/** Pull a page off the txn's spill list, if present. + * If a page being referenced was spilled to disk in this txn, bring + * it back and make it dirty/writable again. + * @param[in] txn the transaction handle. + * @param[in] mp the page being referenced. It must not be dirty. + * @param[out] ret the writable page, if any. ret is unchanged if + * mp wasn't spilled. + */ +static int +mdb_page_unspill(MDB_txn *txn, MDB_page *mp, MDB_page **ret) +{ + MDB_env *env = txn->mt_env; + const MDB_txn *tx2; + unsigned x; + pgno_t pgno = mp->mp_pgno, pn = pgno << 1; + + for (tx2 = txn; tx2; tx2=tx2->mt_parent) { + if (!tx2->mt_spill_pgs) + continue; + x = mdb_midl_search(tx2->mt_spill_pgs, pn); + if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) { + MDB_page *np; + int num; + if (txn->mt_dirty_room == 0) + return MDB_TXN_FULL; + if (IS_OVERFLOW(mp)) + num = mp->mp_pages; + else + num = 1; + if (env->me_flags & MDB_WRITEMAP) { + np = mp; + } else { + np = mdb_page_malloc(txn, num); + if (!np) + return ENOMEM; + if (num > 1) + memcpy(np, mp, num * env->me_psize); + else + mdb_page_copy(np, mp, env->me_psize); + } + if (tx2 == txn) { + /* If in current txn, this page is no longer spilled. + * If it happens to be the last page, truncate the spill list. + * Otherwise mark it as deleted by setting the LSB. + */ + if (x == txn->mt_spill_pgs[0]) + txn->mt_spill_pgs[0]--; + else + txn->mt_spill_pgs[x] |= 1; + } /* otherwise, if belonging to a parent txn, the + * page remains spilled until child commits + */ + + mdb_page_dirty(txn, np); + np->mp_flags |= P_DIRTY; + *ret = np; + break; + } + } + return MDB_SUCCESS; +} + +/** Touch a page: make it dirty and re-insert into tree with updated pgno. + * @param[in] mc cursor pointing to the page to be touched + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_touch(MDB_cursor *mc) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top], *np; + MDB_txn *txn = mc->mc_txn; + MDB_cursor *m2, *m3; + pgno_t pgno; + int rc; + + if (!F_ISSET(mp->mp_flags, P_DIRTY)) { + if (txn->mt_flags & MDB_TXN_SPILLS) { + np = NULL; + rc = mdb_page_unspill(txn, mp, &np); + if (rc) + goto fail; + if (np) + goto done; + } + if ((rc = mdb_midl_need(&txn->mt_free_pgs, 1)) || + (rc = mdb_page_alloc(mc, 1, &np))) + goto fail; + pgno = np->mp_pgno; + DPRINTF(("touched db %d page %"Z"u -> %"Z"u", DDBI(mc), + mp->mp_pgno, pgno)); + mdb_cassert(mc, mp->mp_pgno != pgno); + mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); + /* Update the parent page, if any, to point to the new page */ + if (mc->mc_top) { + MDB_page *parent = mc->mc_pg[mc->mc_top-1]; + MDB_node *node = NODEPTR(parent, mc->mc_ki[mc->mc_top-1]); + SETPGNO(node, pgno); + } else { + mc->mc_db->md_root = pgno; + } + } else if (txn->mt_parent && !IS_SUBP(mp)) { + MDB_ID2 mid, *dl = txn->mt_u.dirty_list; + pgno = mp->mp_pgno; + /* If txn has a parent, make sure the page is in our + * dirty list. + */ + if (dl[0].mid) { + unsigned x = mdb_mid2l_search(dl, pgno); + if (x <= dl[0].mid && dl[x].mid == pgno) { + if (mp != dl[x].mptr) { /* bad cursor? */ + mc->mc_flags &= ~(C_INITIALIZED|C_EOF); + txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CORRUPTED; + } + return 0; + } + } + mdb_cassert(mc, dl[0].mid < MDB_IDL_UM_MAX); + /* No - copy it */ + np = mdb_page_malloc(txn, 1); + if (!np) + return ENOMEM; + mid.mid = pgno; + mid.mptr = np; + rc = mdb_mid2l_insert(dl, &mid); + mdb_cassert(mc, rc == 0); + } else { + return 0; + } + + mdb_page_copy(np, mp, txn->mt_env->me_psize); + np->mp_pgno = pgno; + np->mp_flags |= P_DIRTY; + +done: + /* Adjust cursors pointing to mp */ + mc->mc_pg[mc->mc_top] = np; + m2 = txn->mt_cursors[mc->mc_dbi]; + if (mc->mc_flags & C_SUB) { + for (; m2; m2=m2->mc_next) { + m3 = &m2->mc_xcursor->mx_cursor; + if (m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[mc->mc_top] == mp) + m3->mc_pg[mc->mc_top] = np; + } + } else { + for (; m2; m2=m2->mc_next) { + if (m2->mc_snum < mc->mc_snum) continue; + if (m2->mc_pg[mc->mc_top] == mp) { + m2->mc_pg[mc->mc_top] = np; + if ((mc->mc_db->md_flags & MDB_DUPSORT) && + IS_LEAF(np) && + m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) + { + MDB_node *leaf = NODEPTR(np, mc->mc_ki[mc->mc_top]); + if (!(leaf->mn_flags & F_SUBDATA)) + m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + } + } + } + } +#ifdef VL32 + { + MDB_ID2L rl = mc->mc_txn->mt_rpages; + unsigned x = mdb_mid2l_search(rl, mp->mp_pgno); + if (x <= rl[0].mid && rl[x].mid == mp->mp_pgno) { + munmap(mp, mc->mc_txn->mt_env->me_psize); + while (x < rl[0].mid) { + rl[x] = rl[x+1]; + x++; + } + rl[0].mid--; + } + } +#endif + return 0; + +fail: + txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +int +mdb_env_sync(MDB_env *env, int force) +{ + int rc = 0; + if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) { + if (env->me_flags & MDB_WRITEMAP) { + int flags = ((env->me_flags & MDB_MAPASYNC) && !force) + ? MS_ASYNC : MS_SYNC; + if (MDB_MSYNC(env->me_map, env->me_mapsize, flags)) + rc = ErrCode(); +#ifdef _WIN32 + else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd)) + rc = ErrCode(); +#endif + } else { + if (MDB_FDATASYNC(env->me_fd)) + rc = ErrCode(); + } + } + return rc; +} + +/** Back up parent txn's cursors, then grab the originals for tracking */ +static int +mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst) +{ + MDB_cursor *mc, *bk; + MDB_xcursor *mx; + size_t size; + int i; + + for (i = src->mt_numdbs; --i >= 0; ) { + if ((mc = src->mt_cursors[i]) != NULL) { + size = sizeof(MDB_cursor); + if (mc->mc_xcursor) + size += sizeof(MDB_xcursor); + for (; mc; mc = bk->mc_next) { + bk = malloc(size); + if (!bk) + return ENOMEM; + *bk = *mc; + mc->mc_backup = bk; + mc->mc_db = &dst->mt_dbs[i]; + /* Kill pointers into src - and dst to reduce abuse: The + * user may not use mc until dst ends. Otherwise we'd... + */ + mc->mc_txn = NULL; /* ...set this to dst */ + mc->mc_dbflag = NULL; /* ...and &dst->mt_dbflags[i] */ + if ((mx = mc->mc_xcursor) != NULL) { + *(MDB_xcursor *)(bk+1) = *mx; + mx->mx_cursor.mc_txn = NULL; /* ...and dst. */ + } + mc->mc_next = dst->mt_cursors[i]; + dst->mt_cursors[i] = mc; + } + } + } + return MDB_SUCCESS; +} + +/** Close this write txn's cursors, give parent txn's cursors back to parent. + * @param[in] txn the transaction handle. + * @param[in] merge true to keep changes to parent cursors, false to revert. + * @return 0 on success, non-zero on failure. + */ +static void +mdb_cursors_close(MDB_txn *txn, unsigned merge) +{ + MDB_cursor **cursors = txn->mt_cursors, *mc, *next, *bk; + MDB_xcursor *mx; + int i; + + for (i = txn->mt_numdbs; --i >= 0; ) { + for (mc = cursors[i]; mc; mc = next) { + next = mc->mc_next; + if ((bk = mc->mc_backup) != NULL) { + if (merge) { + /* Commit changes to parent txn */ + mc->mc_next = bk->mc_next; + mc->mc_backup = bk->mc_backup; + mc->mc_txn = bk->mc_txn; + mc->mc_db = bk->mc_db; + mc->mc_dbflag = bk->mc_dbflag; + if ((mx = mc->mc_xcursor) != NULL) + mx->mx_cursor.mc_txn = bk->mc_txn; + } else { + /* Abort nested txn */ + *mc = *bk; + if ((mx = mc->mc_xcursor) != NULL) + *mx = *(MDB_xcursor *)(bk+1); + } + mc = bk; + } + /* Only malloced cursors are permanently tracked. */ + free(mc); + } + cursors[i] = NULL; + } +} + +#if !(MDB_DEBUG) +#define mdb_txn_reset0(txn, act) mdb_txn_reset0(txn) +#endif +static void +mdb_txn_reset0(MDB_txn *txn, const char *act); + +#if !(MDB_PIDLOCK) /* Currently the same as defined(_WIN32) */ +enum Pidlock_op { + Pidset, Pidcheck +}; +#else +enum Pidlock_op { + Pidset = F_SETLK, Pidcheck = F_GETLK +}; +#endif + +/** Set or check a pid lock. Set returns 0 on success. + * Check returns 0 if the process is certainly dead, nonzero if it may + * be alive (the lock exists or an error happened so we do not know). + * + * On Windows Pidset is a no-op, we merely check for the existence + * of the process with the given pid. On POSIX we use a single byte + * lock on the lockfile, set at an offset equal to the pid. + */ +static int +mdb_reader_pid(MDB_env *env, enum Pidlock_op op, MDB_PID_T pid) +{ +#if !(MDB_PIDLOCK) /* Currently the same as defined(_WIN32) */ + int ret = 0; + HANDLE h; + if (op == Pidcheck) { + h = OpenProcess(env->me_pidquery, FALSE, pid); + /* No documented "no such process" code, but other program use this: */ + if (!h) + return ErrCode() != ERROR_INVALID_PARAMETER; + /* A process exists until all handles to it close. Has it exited? */ + ret = WaitForSingleObject(h, 0) != 0; + CloseHandle(h); + } + return ret; +#else + for (;;) { + int rc; + struct flock lock_info; + memset(&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_WRLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = pid; + lock_info.l_len = 1; + if ((rc = fcntl(env->me_lfd, op, &lock_info)) == 0) { + if (op == F_GETLK && lock_info.l_type != F_UNLCK) + rc = -1; + } else if ((rc = ErrCode()) == EINTR) { + continue; + } + return rc; + } +#endif +} + +/** Common code for #mdb_txn_begin() and #mdb_txn_renew(). + * @param[in] txn the transaction handle to initialize + * @return 0 on success, non-zero on failure. + */ +static int +mdb_txn_renew0(MDB_txn *txn) +{ + MDB_env *env = txn->mt_env; + MDB_txninfo *ti = env->me_txns; + MDB_meta *meta; + unsigned int i, nr; + uint16_t x; + int rc, new_notls = 0; + + /* Setup db info */ + txn->mt_numdbs = env->me_numdbs; + txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ + + if (txn->mt_flags & MDB_TXN_RDONLY) { + if (!ti) { + meta = env->me_metas[ mdb_env_pick_meta(env) ]; + txn->mt_txnid = meta->mm_txnid; + txn->mt_u.reader = NULL; + } else { + MDB_reader *r = (env->me_flags & MDB_NOTLS) ? txn->mt_u.reader : + pthread_getspecific(env->me_txkey); + if (r) { + if (r->mr_pid != env->me_pid || r->mr_txnid != (txnid_t)-1) + return MDB_BAD_RSLOT; + } else { + MDB_PID_T pid = env->me_pid; + MDB_THR_T tid = pthread_self(); + + if (!env->me_live_reader) { + rc = mdb_reader_pid(env, Pidset, pid); + if (rc) + return rc; + env->me_live_reader = 1; + } + + LOCK_MUTEX_R(env); + nr = ti->mti_numreaders; + for (i=0; i<nr; i++) + if (ti->mti_readers[i].mr_pid == 0) + break; + if (i == env->me_maxreaders) { + UNLOCK_MUTEX_R(env); + return MDB_READERS_FULL; + } + ti->mti_readers[i].mr_pid = pid; + ti->mti_readers[i].mr_tid = tid; + if (i == nr) + ti->mti_numreaders = ++nr; + /* Save numreaders for un-mutexed mdb_env_close() */ + env->me_numreaders = nr; + UNLOCK_MUTEX_R(env); + + r = &ti->mti_readers[i]; + new_notls = (env->me_flags & MDB_NOTLS); + if (!new_notls && (rc=pthread_setspecific(env->me_txkey, r))) { + r->mr_pid = 0; + return rc; + } + } + txn->mt_txnid = r->mr_txnid = ti->mti_txnid; + txn->mt_u.reader = r; + meta = env->me_metas[txn->mt_txnid & 1]; + } + } else { + if (ti) { + LOCK_MUTEX_W(env); + + txn->mt_txnid = ti->mti_txnid; + meta = env->me_metas[txn->mt_txnid & 1]; + } else { + meta = env->me_metas[ mdb_env_pick_meta(env) ]; + txn->mt_txnid = meta->mm_txnid; + } + txn->mt_txnid++; +#if MDB_DEBUG + if (txn->mt_txnid == mdb_debug_start) + mdb_debug = 1; +#endif + txn->mt_dirty_room = MDB_IDL_UM_MAX; + txn->mt_u.dirty_list = env->me_dirty_list; + txn->mt_u.dirty_list[0].mid = 0; + txn->mt_free_pgs = env->me_free_pgs; + txn->mt_free_pgs[0] = 0; + txn->mt_spill_pgs = NULL; + env->me_txn = txn; + memcpy(txn->mt_dbiseqs, env->me_dbiseqs, env->me_maxdbs * sizeof(unsigned int)); + } + + /* Copy the DB info and flags */ + memcpy(txn->mt_dbs, meta->mm_dbs, 2 * sizeof(MDB_db)); + + /* Moved to here to avoid a data race in read TXNs */ + txn->mt_next_pgno = meta->mm_last_pg+1; + + for (i=2; i<txn->mt_numdbs; i++) { + x = env->me_dbflags[i]; + txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS; + txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_STALE : 0; + } + txn->mt_dbflags[0] = txn->mt_dbflags[1] = DB_VALID; + + if (env->me_maxpg < txn->mt_next_pgno) { + mdb_txn_reset0(txn, "renew0-mapfail"); + if (new_notls) { + txn->mt_u.reader->mr_pid = 0; + txn->mt_u.reader = NULL; + } + return MDB_MAP_RESIZED; + } + + return MDB_SUCCESS; +} + +int +mdb_txn_renew(MDB_txn *txn) +{ + int rc; + + if (!txn || txn->mt_dbxs) /* A reset txn has mt_dbxs==NULL */ + return EINVAL; + + if (txn->mt_env->me_flags & MDB_FATAL_ERROR) { + DPUTS("environment had fatal error, must shutdown!"); + return MDB_PANIC; + } + + rc = mdb_txn_renew0(txn); + if (rc == MDB_SUCCESS) { + DPRINTF(("renew txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root)); + } + return rc; +} + +int +mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) +{ + MDB_txn *txn; + MDB_ntxn *ntxn; + int rc, size, tsize = sizeof(MDB_txn); + + if (env->me_flags & MDB_FATAL_ERROR) { + DPUTS("environment had fatal error, must shutdown!"); + return MDB_PANIC; + } + if ((env->me_flags & MDB_RDONLY) && !(flags & MDB_RDONLY)) + return EACCES; + if (parent) { + /* Nested transactions: Max 1 child, write txns only, no writemap */ + if (parent->mt_child || + (flags & MDB_RDONLY) || + (parent->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) || + (env->me_flags & MDB_WRITEMAP)) + { + return (parent->mt_flags & MDB_TXN_RDONLY) ? EINVAL : MDB_BAD_TXN; + } + tsize = sizeof(MDB_ntxn); + } + size = tsize + env->me_maxdbs * (sizeof(MDB_db)+1); + if (!(flags & MDB_RDONLY)) { + if (!parent) { + txn = env->me_txn0; + txn->mt_flags = 0; + goto ok; + } + size += env->me_maxdbs * sizeof(MDB_cursor *); + /* child txns use parent's dbiseqs */ + if (!parent) + size += env->me_maxdbs * sizeof(unsigned int); + } + + if ((txn = calloc(1, size)) == NULL) { + DPRINTF(("calloc: %s", strerror(errno))); + return ENOMEM; + } +#ifdef VL32 + if (!parent) { + txn->mt_rpages = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2)); + if (!txn->mt_rpages) { + free(txn); + return ENOMEM; + } + } +#endif + txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); + if (flags & MDB_RDONLY) { + txn->mt_flags |= MDB_TXN_RDONLY; + txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs); + txn->mt_dbiseqs = env->me_dbiseqs; + } else { + txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs); + if (parent) { + txn->mt_dbiseqs = parent->mt_dbiseqs; + txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs); + } else { + txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); + txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); + } + } + txn->mt_env = env; + +ok: + if (parent) { + unsigned int i; + txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE); + if (!txn->mt_u.dirty_list || + !(txn->mt_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX))) + { + free(txn->mt_u.dirty_list); + free(txn); + return ENOMEM; + } + txn->mt_txnid = parent->mt_txnid; + txn->mt_dirty_room = parent->mt_dirty_room; + txn->mt_u.dirty_list[0].mid = 0; + txn->mt_spill_pgs = NULL; + txn->mt_next_pgno = parent->mt_next_pgno; + parent->mt_child = txn; + txn->mt_parent = parent; + txn->mt_numdbs = parent->mt_numdbs; + txn->mt_flags = parent->mt_flags; + txn->mt_dbxs = parent->mt_dbxs; +#ifdef VL32 + txn->mt_rpages = parent->mt_rpages; +#endif + memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); + /* Copy parent's mt_dbflags, but clear DB_NEW */ + for (i=0; i<txn->mt_numdbs; i++) + txn->mt_dbflags[i] = parent->mt_dbflags[i] & ~DB_NEW; + rc = 0; + ntxn = (MDB_ntxn *)txn; + ntxn->mnt_pgstate = env->me_pgstate; /* save parent me_pghead & co */ + if (env->me_pghead) { + size = MDB_IDL_SIZEOF(env->me_pghead); + env->me_pghead = mdb_midl_alloc(env->me_pghead[0]); + if (env->me_pghead) + memcpy(env->me_pghead, ntxn->mnt_pgstate.mf_pghead, size); + else + rc = ENOMEM; + } + if (!rc) + rc = mdb_cursor_shadow(parent, txn); + if (rc) + mdb_txn_reset0(txn, "beginchild-fail"); + } else { + rc = mdb_txn_renew0(txn); + } + if (rc) { + if (txn != env->me_txn0) + free(txn); + } else { + *ret = txn; + DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root)); + } + + return rc; +} + +MDB_env * +mdb_txn_env(MDB_txn *txn) +{ + if(!txn) return NULL; + return txn->mt_env; +} + +/** Export or close DBI handles opened in this txn. */ +static void +mdb_dbis_update(MDB_txn *txn, int keep) +{ + int i; + MDB_dbi n = txn->mt_numdbs; + MDB_env *env = txn->mt_env; + unsigned char *tdbflags = txn->mt_dbflags; + + for (i = n; --i >= 2;) { + if (tdbflags[i] & DB_NEW) { + if (keep) { + env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID; + } else { + char *ptr = env->me_dbxs[i].md_name.mv_data; + if (ptr) { + env->me_dbxs[i].md_name.mv_data = NULL; + env->me_dbxs[i].md_name.mv_size = 0; + env->me_dbflags[i] = 0; + env->me_dbiseqs[i]++; + free(ptr); + } + } + } + } + if (keep && env->me_numdbs < n) + env->me_numdbs = n; +} + +/** Common code for #mdb_txn_reset() and #mdb_txn_abort(). + * May be called twice for readonly txns: First reset it, then abort. + * @param[in] txn the transaction handle to reset + * @param[in] act why the transaction is being reset + */ +static void +mdb_txn_reset0(MDB_txn *txn, const char *act) +{ + MDB_env *env = txn->mt_env; + + /* Close any DBI handles opened in this txn */ + mdb_dbis_update(txn, 0); + + DPRINTF(("%s txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", + act, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root)); + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + if (txn->mt_u.reader) { + txn->mt_u.reader->mr_txnid = (txnid_t)-1; + if (!(env->me_flags & MDB_NOTLS)) + txn->mt_u.reader = NULL; /* txn does not own reader */ + } + txn->mt_numdbs = 0; /* close nothing if called again */ + txn->mt_dbxs = NULL; /* mark txn as reset */ + } else { + mdb_cursors_close(txn, 0); + + if (!(env->me_flags & MDB_WRITEMAP)) { + mdb_dlist_free(txn); + } + mdb_midl_free(env->me_pghead); + + if (txn->mt_parent) { + txn->mt_parent->mt_child = NULL; + env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate; + mdb_midl_free(txn->mt_free_pgs); + mdb_midl_free(txn->mt_spill_pgs); + free(txn->mt_u.dirty_list); + return; + } + + if (mdb_midl_shrink(&txn->mt_free_pgs)) + env->me_free_pgs = txn->mt_free_pgs; + env->me_pghead = NULL; + env->me_pglast = 0; + + env->me_txn = NULL; + /* The writer mutex was locked in mdb_txn_begin. */ + if (env->me_txns) + UNLOCK_MUTEX_W(env); + } +#ifdef VL32 + { + unsigned i, n = txn->mt_rpages[0].mid; + for (i = 1; i <= n; i++) { +#ifdef _WIN32 + UnmapViewOfFile(txn->mt_rpages[i].mptr);) +#else + MDB_page *mp = txn->mt_rpages[i].mptr; + int size = txn->mt_env->me_psize; + if (IS_OVERFLOW(mp)) size *= mp->mp_pages; + munmap(mp, size); +#endif + } + } + txn->mt_rpages[0].mid = 0; +#endif +} + +void +mdb_txn_reset(MDB_txn *txn) +{ + if (txn == NULL) + return; + + /* This call is only valid for read-only txns */ + if (!(txn->mt_flags & MDB_TXN_RDONLY)) + return; + + mdb_txn_reset0(txn, "reset"); +} + +void +mdb_txn_abort(MDB_txn *txn) +{ + if (txn == NULL) + return; + + if (txn->mt_child) + mdb_txn_abort(txn->mt_child); + + mdb_txn_reset0(txn, "abort"); + /* Free reader slot tied to this txn (if MDB_NOTLS && writable FS) */ + if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader) + txn->mt_u.reader->mr_pid = 0; + + if (txn != txn->mt_env->me_txn0) { +#ifdef VL32 + free(txn->mt_rpages); +#endif + free(txn); + } +} + +/** Save the freelist as of this transaction to the freeDB. + * This changes the freelist. Keep trying until it stabilizes. + */ +static int +mdb_freelist_save(MDB_txn *txn) +{ + /* env->me_pghead[] can grow and shrink during this call. + * env->me_pglast and txn->mt_free_pgs[] can only grow. + * Page numbers cannot disappear from txn->mt_free_pgs[]. + */ + MDB_cursor mc; + MDB_env *env = txn->mt_env; + int rc, maxfree_1pg = env->me_maxfree_1pg, more = 1; + txnid_t pglast = 0, head_id = 0; + pgno_t freecnt = 0, *free_pgs, *mop; + ssize_t head_room = 0, total_room = 0, mop_len, clean_limit; + + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + + if (env->me_pghead) { + /* Make sure first page of freeDB is touched and on freelist */ + rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST|MDB_PS_MODIFY); + if (rc && rc != MDB_NOTFOUND) + return rc; + } + + if (!env->me_pghead && txn->mt_loose_pgs) { + /* Put loose page numbers in mt_free_pgs, since + * we may be unable to return them to me_pghead. + */ + MDB_page *mp = txn->mt_loose_pgs; + if ((rc = mdb_midl_need(&txn->mt_free_pgs, txn->mt_loose_count)) != 0) + return rc; + for (; mp; mp = NEXT_LOOSE_PAGE(mp)) + mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); + txn->mt_loose_pgs = NULL; + txn->mt_loose_count = 0; + } + + /* MDB_RESERVE cancels meminit in ovpage malloc (when no WRITEMAP) */ + clean_limit = (env->me_flags & (MDB_NOMEMINIT|MDB_WRITEMAP)) + ? SSIZE_MAX : maxfree_1pg; + + for (;;) { + /* Come back here after each Put() in case freelist changed */ + MDB_val key, data; + pgno_t *pgs; + ssize_t j; + + /* If using records from freeDB which we have not yet + * deleted, delete them and any we reserved for me_pghead. + */ + while (pglast < env->me_pglast) { + rc = mdb_cursor_first(&mc, &key, NULL); + if (rc) + return rc; + pglast = head_id = *(txnid_t *)key.mv_data; + total_room = head_room = 0; + mdb_tassert(txn, pglast <= env->me_pglast); + rc = mdb_cursor_del(&mc, 0); + if (rc) + return rc; + } + + /* Save the IDL of pages freed by this txn, to a single record */ + if (freecnt < txn->mt_free_pgs[0]) { + if (!freecnt) { + /* Make sure last page of freeDB is touched and on freelist */ + rc = mdb_page_search(&mc, NULL, MDB_PS_LAST|MDB_PS_MODIFY); + if (rc && rc != MDB_NOTFOUND) + return rc; + } + free_pgs = txn->mt_free_pgs; + /* Write to last page of freeDB */ + key.mv_size = sizeof(txn->mt_txnid); + key.mv_data = &txn->mt_txnid; + do { + freecnt = free_pgs[0]; + data.mv_size = MDB_IDL_SIZEOF(free_pgs); + rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); + if (rc) + return rc; + /* Retry if mt_free_pgs[] grew during the Put() */ + free_pgs = txn->mt_free_pgs; + } while (freecnt < free_pgs[0]); + mdb_midl_sort(free_pgs); + memcpy(data.mv_data, free_pgs, data.mv_size); +#if (MDB_DEBUG) > 1 + { + unsigned int i = free_pgs[0]; + DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u", + txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i)); + for (; i; i--) + DPRINTF(("IDL %"Z"u", free_pgs[i])); + } +#endif + continue; + } + + mop = env->me_pghead; + mop_len = (mop ? mop[0] : 0) + txn->mt_loose_count; + + /* Reserve records for me_pghead[]. Split it if multi-page, + * to avoid searching freeDB for a page range. Use keys in + * range [1,me_pglast]: Smaller than txnid of oldest reader. + */ + if (total_room >= mop_len) { + if (total_room == mop_len || --more < 0) + break; + } else if (head_room >= maxfree_1pg && head_id > 1) { + /* Keep current record (overflow page), add a new one */ + head_id--; + head_room = 0; + } + /* (Re)write {key = head_id, IDL length = head_room} */ + total_room -= head_room; + head_room = mop_len - total_room; + if (head_room > maxfree_1pg && head_id > 1) { + /* Overflow multi-page for part of me_pghead */ + head_room /= head_id; /* amortize page sizes */ + head_room += maxfree_1pg - head_room % (maxfree_1pg + 1); + } else if (head_room < 0) { + /* Rare case, not bothering to delete this record */ + head_room = 0; + } + key.mv_size = sizeof(head_id); + key.mv_data = &head_id; + data.mv_size = (head_room + 1) * sizeof(pgno_t); + rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); + if (rc) + return rc; + /* IDL is initially empty, zero out at least the length */ + pgs = (pgno_t *)data.mv_data; + j = head_room > clean_limit ? head_room : 0; + do { + pgs[j] = 0; + } while (--j >= 0); + total_room += head_room; + } + + /* Return loose page numbers to me_pghead, though usually none are + * left at this point. The pages themselves remain in dirty_list. + */ + if (txn->mt_loose_pgs) { + MDB_page *mp = txn->mt_loose_pgs; + unsigned count = txn->mt_loose_count; + MDB_IDL loose; + /* Room for loose pages + temp IDL with same */ + if ((rc = mdb_midl_need(&env->me_pghead, 2*count+1)) != 0) + return rc; + mop = env->me_pghead; + loose = mop + MDB_IDL_ALLOCLEN(mop) - count; + for (count = 0; mp; mp = NEXT_LOOSE_PAGE(mp)) + loose[ ++count ] = mp->mp_pgno; + loose[0] = count; + mdb_midl_sort(loose); + mdb_midl_xmerge(mop, loose); + txn->mt_loose_pgs = NULL; + txn->mt_loose_count = 0; + mop_len = mop[0]; + } + + /* Fill in the reserved me_pghead records */ + rc = MDB_SUCCESS; + if (mop_len) { + MDB_val key, data; + + mop += mop_len; + rc = mdb_cursor_first(&mc, &key, &data); + for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) { + txnid_t id = *(txnid_t *)key.mv_data; + ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1; + MDB_ID save; + + mdb_tassert(txn, len >= 0 && id <= env->me_pglast); + key.mv_data = &id; + if (len > mop_len) { + len = mop_len; + data.mv_size = (len + 1) * sizeof(MDB_ID); + } + data.mv_data = mop -= len; + save = mop[0]; + mop[0] = len; + rc = mdb_cursor_put(&mc, &key, &data, MDB_CURRENT); + mop[0] = save; + if (rc || !(mop_len -= len)) + break; + } + } + return rc; +} + +/** Flush (some) dirty pages to the map, after clearing their dirty flag. + * @param[in] txn the transaction that's being committed + * @param[in] keep number of initial pages in dirty_list to keep dirty. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_flush(MDB_txn *txn, int keep) +{ + MDB_env *env = txn->mt_env; + MDB_ID2L dl = txn->mt_u.dirty_list; + unsigned psize = env->me_psize, j; + int i, pagecount = dl[0].mid, rc; + size_t size = 0, pos = 0; + pgno_t pgno = 0; + MDB_page *dp = NULL; +#ifdef _WIN32 + OVERLAPPED ov; +#else + struct iovec iov[MDB_COMMIT_PAGES]; + ssize_t wpos = 0, wsize = 0, wres; + size_t next_pos = 1; /* impossible pos, so pos != next_pos */ + int n = 0; +#endif + + j = i = keep; + + if (env->me_flags & MDB_WRITEMAP) { + /* Clear dirty flags */ + while (++i <= pagecount) { + dp = dl[i].mptr; + /* Don't flush this page yet */ + if (dp->mp_flags & (P_LOOSE|P_KEEP)) { + dp->mp_flags &= ~P_KEEP; + dl[++j] = dl[i]; + continue; + } + dp->mp_flags &= ~P_DIRTY; + } + goto done; + } + + /* Write the pages */ + for (;;) { + if (++i <= pagecount) { + dp = dl[i].mptr; + /* Don't flush this page yet */ + if (dp->mp_flags & (P_LOOSE|P_KEEP)) { + dp->mp_flags &= ~P_KEEP; + dl[i].mid = 0; + continue; + } + pgno = dl[i].mid; + /* clear dirty flag */ + dp->mp_flags &= ~P_DIRTY; + pos = pgno * psize; + size = psize; + if (IS_OVERFLOW(dp)) size *= dp->mp_pages; + } +#ifdef _WIN32 + else break; + + /* Windows actually supports scatter/gather I/O, but only on + * unbuffered file handles. Since we're relying on the OS page + * cache for all our data, that's self-defeating. So we just + * write pages one at a time. We use the ov structure to set + * the write offset, to at least save the overhead of a Seek + * system call. + */ + DPRINTF(("committing page %"Z"u", pgno)); + memset(&ov, 0, sizeof(ov)); + ov.Offset = pos & 0xffffffff; + ov.OffsetHigh = pos >> 16 >> 16; + if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) { + rc = ErrCode(); + DPRINTF(("WriteFile: %d", rc)); + return rc; + } +#else + /* Write up to MDB_COMMIT_PAGES dirty pages at a time. */ + if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) { + if (n) { + /* Write previous page(s) */ +#ifdef MDB_USE_PWRITEV + wres = pwritev(env->me_fd, iov, n, wpos); +#else + if (n == 1) { + wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos); + } else { + if (lseek(env->me_fd, wpos, SEEK_SET) == -1) { + rc = ErrCode(); + DPRINTF(("lseek: %s", strerror(rc))); + return rc; + } + wres = writev(env->me_fd, iov, n); + } +#endif + if (wres != wsize) { + if (wres < 0) { + rc = ErrCode(); + DPRINTF(("Write error: %s", strerror(rc))); + } else { + rc = EIO; /* TODO: Use which error code? */ + DPUTS("short write, filesystem full?"); + } + return rc; + } + n = 0; + } + if (i > pagecount) + break; + wpos = pos; + wsize = 0; + } + DPRINTF(("committing page %"Z"u", pgno)); + next_pos = pos + size; + iov[n].iov_len = size; + iov[n].iov_base = (char *)dp; + wsize += size; + n++; +#endif /* _WIN32 */ + } + + /* MIPS has cache coherency issues, this is a no-op everywhere else + * Note: for any size >= on-chip cache size, entire on-chip cache is + * flushed. + */ + CACHEFLUSH(env->me_map, txn->mt_next_pgno * env->me_psize, DCACHE); + + for (i = keep; ++i <= pagecount; ) { + dp = dl[i].mptr; + /* This is a page we skipped above */ + if (!dl[i].mid) { + dl[++j] = dl[i]; + dl[j].mid = dp->mp_pgno; + continue; + } + mdb_dpage_free(env, dp); + } + +done: + i--; + txn->mt_dirty_room += i - j; + dl[0].mid = j; + return MDB_SUCCESS; +} + +int +mdb_txn_commit(MDB_txn *txn) +{ + int rc; + unsigned int i; + MDB_env *env; + + if (txn == NULL || txn->mt_env == NULL) + return EINVAL; + + if (txn->mt_child) { + rc = mdb_txn_commit(txn->mt_child); + txn->mt_child = NULL; + if (rc) + goto fail; + } + + env = txn->mt_env; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + mdb_dbis_update(txn, 1); + txn->mt_numdbs = 2; /* so txn_abort() doesn't close any new handles */ + mdb_txn_abort(txn); + return MDB_SUCCESS; + } + + if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) { + DPUTS("error flag is set, can't commit"); + if (txn->mt_parent) + txn->mt_parent->mt_flags |= MDB_TXN_ERROR; + rc = MDB_BAD_TXN; + goto fail; + } + + if (txn->mt_parent) { + MDB_txn *parent = txn->mt_parent; + MDB_page **lp; + MDB_ID2L dst, src; + MDB_IDL pspill; + unsigned x, y, len, ps_len; + + /* Append our free list to parent's */ + rc = mdb_midl_append_list(&parent->mt_free_pgs, txn->mt_free_pgs); + if (rc) + goto fail; + mdb_midl_free(txn->mt_free_pgs); + /* Failures after this must either undo the changes + * to the parent or set MDB_TXN_ERROR in the parent. + */ + + parent->mt_next_pgno = txn->mt_next_pgno; + parent->mt_flags = txn->mt_flags; + + /* Merge our cursors into parent's and close them */ + mdb_cursors_close(txn, 1); + + /* Update parent's DB table. */ + memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); + parent->mt_numdbs = txn->mt_numdbs; + parent->mt_dbflags[0] = txn->mt_dbflags[0]; + parent->mt_dbflags[1] = txn->mt_dbflags[1]; + for (i=2; i<txn->mt_numdbs; i++) { + /* preserve parent's DB_NEW status */ + x = parent->mt_dbflags[i] & DB_NEW; + parent->mt_dbflags[i] = txn->mt_dbflags[i] | x; + } + + dst = parent->mt_u.dirty_list; + src = txn->mt_u.dirty_list; + /* Remove anything in our dirty list from parent's spill list */ + if ((pspill = parent->mt_spill_pgs) && (ps_len = pspill[0])) { + x = y = ps_len; + pspill[0] = (pgno_t)-1; + /* Mark our dirty pages as deleted in parent spill list */ + for (i=0, len=src[0].mid; ++i <= len; ) { + MDB_ID pn = src[i].mid << 1; + while (pn > pspill[x]) + x--; + if (pn == pspill[x]) { + pspill[x] = 1; + y = --x; + } + } + /* Squash deleted pagenums if we deleted any */ + for (x=y; ++x <= ps_len; ) + if (!(pspill[x] & 1)) + pspill[++y] = pspill[x]; + pspill[0] = y; + } + + /* Find len = length of merging our dirty list with parent's */ + x = dst[0].mid; + dst[0].mid = 0; /* simplify loops */ + if (parent->mt_parent) { + len = x + src[0].mid; + y = mdb_mid2l_search(src, dst[x].mid + 1) - 1; + for (i = x; y && i; y--) { + pgno_t yp = src[y].mid; + while (yp < dst[i].mid) + i--; + if (yp == dst[i].mid) { + i--; + len--; + } + } + } else { /* Simplify the above for single-ancestor case */ + len = MDB_IDL_UM_MAX - txn->mt_dirty_room; + } + /* Merge our dirty list with parent's */ + y = src[0].mid; + for (i = len; y; dst[i--] = src[y--]) { + pgno_t yp = src[y].mid; + while (yp < dst[x].mid) + dst[i--] = dst[x--]; + if (yp == dst[x].mid) + free(dst[x--].mptr); + } + mdb_tassert(txn, i == x); + dst[0].mid = len; + free(txn->mt_u.dirty_list); + parent->mt_dirty_room = txn->mt_dirty_room; + if (txn->mt_spill_pgs) { + if (parent->mt_spill_pgs) { + /* TODO: Prevent failure here, so parent does not fail */ + rc = mdb_midl_append_list(&parent->mt_spill_pgs, txn->mt_spill_pgs); + if (rc) + parent->mt_flags |= MDB_TXN_ERROR; + mdb_midl_free(txn->mt_spill_pgs); + mdb_midl_sort(parent->mt_spill_pgs); + } else { + parent->mt_spill_pgs = txn->mt_spill_pgs; + } + } + + /* Append our loose page list to parent's */ + for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(lp)) + ; + *lp = txn->mt_loose_pgs; + parent->mt_loose_count += txn->mt_loose_count; + + parent->mt_child = NULL; + mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead); + free(txn); + return rc; + } + + if (txn != env->me_txn) { + DPUTS("attempt to commit unknown transaction"); + rc = EINVAL; + goto fail; + } + + mdb_cursors_close(txn, 0); + + if (!txn->mt_u.dirty_list[0].mid && + !(txn->mt_flags & (MDB_TXN_DIRTY|MDB_TXN_SPILLS))) + goto done; + + DPRINTF(("committing txn %"Z"u %p on mdbenv %p, root page %"Z"u", + txn->mt_txnid, (void*)txn, (void*)env, txn->mt_dbs[MAIN_DBI].md_root)); + + /* Update DB root pointers */ + if (txn->mt_numdbs > 2) { + MDB_cursor mc; + MDB_dbi i; + MDB_val data; + data.mv_size = sizeof(MDB_db); + + mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); + for (i = 2; i < txn->mt_numdbs; i++) { + if (txn->mt_dbflags[i] & DB_DIRTY) { + if (TXN_DBI_CHANGED(txn, i)) { + rc = MDB_BAD_DBI; + goto fail; + } + data.mv_data = &txn->mt_dbs[i]; + rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0); + if (rc) + goto fail; + } + } + } + + rc = mdb_freelist_save(txn); + if (rc) + goto fail; + + mdb_midl_free(env->me_pghead); + env->me_pghead = NULL; + if (mdb_midl_shrink(&txn->mt_free_pgs)) + env->me_free_pgs = txn->mt_free_pgs; + +#if (MDB_DEBUG) > 2 + mdb_audit(txn); +#endif + + if ((rc = mdb_page_flush(txn, 0)) || + (rc = mdb_env_sync(env, 0)) || + (rc = mdb_env_write_meta(txn))) + goto fail; + + /* Free P_LOOSE pages left behind in dirty_list */ + if (!(env->me_flags & MDB_WRITEMAP)) + mdb_dlist_free(txn); + +done: + env->me_pglast = 0; + env->me_txn = NULL; + mdb_dbis_update(txn, 1); + + if (env->me_txns) + UNLOCK_MUTEX_W(env); + if (txn != env->me_txn0) + free(txn); + + return MDB_SUCCESS; + +fail: + mdb_txn_abort(txn); + return rc; +} + +/** Read the environment parameters of a DB environment before + * mapping it into memory. + * @param[in] env the environment handle + * @param[out] meta address of where to store the meta information + * @return 0 on success, non-zero on failure. + */ +static int ESECT +mdb_env_read_header(MDB_env *env, MDB_meta *meta) +{ + MDB_metabuf pbuf; + MDB_page *p; + MDB_meta *m; + int i, rc, off; + enum { Size = sizeof(pbuf) }; + + /* We don't know the page size yet, so use a minimum value. + * Read both meta pages so we can use the latest one. + */ + + for (i=off=0; i<2; i++, off = meta->mm_psize) { +#ifdef _WIN32 + DWORD len; + OVERLAPPED ov; + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + rc = ReadFile(env->me_fd, &pbuf, Size, &len, &ov) ? (int)len : -1; + if (rc == -1 && ErrCode() == ERROR_HANDLE_EOF) + rc = 0; +#else + rc = pread(env->me_fd, &pbuf, Size, off); +#endif + if (rc != Size) { + if (rc == 0 && off == 0) + return ENOENT; + rc = rc < 0 ? (int) ErrCode() : MDB_INVALID; + DPRINTF(("read: %s", mdb_strerror(rc))); + return rc; + } + + p = (MDB_page *)&pbuf; + + if (!F_ISSET(p->mp_flags, P_META)) { + DPRINTF(("page %"Z"u not a meta page", p->mp_pgno)); + return MDB_INVALID; + } + + m = METADATA(p); + if (m->mm_magic != MDB_MAGIC) { + DPUTS("meta has invalid magic"); + return MDB_INVALID; + } + + if (m->mm_version != MDB_DATA_VERSION) { + DPRINTF(("database is version %u, expected version %u", + m->mm_version, MDB_DATA_VERSION)); + return MDB_VERSION_MISMATCH; + } + + if (off == 0 || m->mm_txnid > meta->mm_txnid) + *meta = *m; + } + return 0; +} + +static void ESECT +mdb_env_init_meta0(MDB_env *env, MDB_meta *meta) +{ + meta->mm_magic = MDB_MAGIC; + meta->mm_version = MDB_DATA_VERSION; + meta->mm_mapsize = env->me_mapsize; + meta->mm_psize = env->me_psize; + meta->mm_last_pg = 1; + meta->mm_flags = env->me_flags & 0xffff; + meta->mm_flags |= MDB_INTEGERKEY; + meta->mm_dbs[0].md_root = P_INVALID; + meta->mm_dbs[1].md_root = P_INVALID; +} + +/** Write the environment parameters of a freshly created DB environment. + * @param[in] env the environment handle + * @param[out] meta address of where to store the meta information + * @return 0 on success, non-zero on failure. + */ +static int ESECT +mdb_env_init_meta(MDB_env *env, MDB_meta *meta) +{ + MDB_page *p, *q; + int rc; + unsigned int psize; +#ifdef _WIN32 + DWORD len; + OVERLAPPED ov; + memset(&ov, 0, sizeof(ov)); +#define DO_PWRITE(rc, fd, ptr, size, len, pos) do { \ + ov.Offset = pos; \ + rc = WriteFile(fd, ptr, size, &len, &ov); } while(0) +#else + int len; +#define DO_PWRITE(rc, fd, ptr, size, len, pos) do { \ + len = pwrite(fd, ptr, size, pos); \ + rc = (len >= 0); } while(0) +#endif + + DPUTS("writing new meta page"); + + psize = env->me_psize; + + mdb_env_init_meta0(env, meta); + + p = calloc(2, psize); + p->mp_pgno = 0; + p->mp_flags = P_META; + *(MDB_meta *)METADATA(p) = *meta; + + q = (MDB_page *)((char *)p + psize); + q->mp_pgno = 1; + q->mp_flags = P_META; + *(MDB_meta *)METADATA(q) = *meta; + + DO_PWRITE(rc, env->me_fd, p, psize * 2, len, 0); + if (!rc) + rc = ErrCode(); + else if ((unsigned) len == psize * 2) + rc = MDB_SUCCESS; + else + rc = ENOSPC; + free(p); + return rc; +} + +/** Update the environment info to commit a transaction. + * @param[in] txn the transaction that's being committed + * @return 0 on success, non-zero on failure. + */ +static int +mdb_env_write_meta(MDB_txn *txn) +{ + MDB_env *env; + MDB_meta meta, metab, *mp; + size_t mapsize; + off_t off; + int rc, len, toggle; + char *ptr; + HANDLE mfd; +#ifdef _WIN32 + OVERLAPPED ov; +#else + int r2; +#endif + + toggle = txn->mt_txnid & 1; + DPRINTF(("writing meta page %d for root page %"Z"u", + toggle, txn->mt_dbs[MAIN_DBI].md_root)); + + env = txn->mt_env; + mp = env->me_metas[toggle]; + mapsize = env->me_metas[toggle ^ 1]->mm_mapsize; + /* Persist any increases of mapsize config */ + if (mapsize < env->me_mapsize) + mapsize = env->me_mapsize; + + if (env->me_flags & MDB_WRITEMAP) { + mp->mm_mapsize = mapsize; + mp->mm_dbs[0] = txn->mt_dbs[0]; + mp->mm_dbs[1] = txn->mt_dbs[1]; + mp->mm_last_pg = txn->mt_next_pgno - 1; + mp->mm_txnid = txn->mt_txnid; + if (!(env->me_flags & (MDB_NOMETASYNC|MDB_NOSYNC))) { + unsigned meta_size = env->me_psize; + rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC; + ptr = env->me_map; + if (toggle) { +#ifndef _WIN32 /* POSIX msync() requires ptr = start of OS page */ + if (meta_size < env->me_os_psize) + meta_size += meta_size; + else +#endif + ptr += meta_size; + } + if (MDB_MSYNC(ptr, meta_size, rc)) { + rc = ErrCode(); + goto fail; + } + } + goto done; + } + metab.mm_txnid = env->me_metas[toggle]->mm_txnid; + metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg; + + meta.mm_mapsize = mapsize; + meta.mm_dbs[0] = txn->mt_dbs[0]; + meta.mm_dbs[1] = txn->mt_dbs[1]; + meta.mm_last_pg = txn->mt_next_pgno - 1; + meta.mm_txnid = txn->mt_txnid; + + off = offsetof(MDB_meta, mm_mapsize); + ptr = (char *)&meta + off; + len = sizeof(MDB_meta) - off; + if (toggle) + off += env->me_psize; + off += PAGEHDRSZ; + + /* Write to the SYNC fd */ + mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ? + env->me_fd : env->me_mfd; +#ifdef _WIN32 + { + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + if (!WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov)) + rc = -1; + } +#else + rc = pwrite(mfd, ptr, len, off); +#endif + if (rc != len) { + rc = rc < 0 ? ErrCode() : EIO; + DPUTS("write failed, disk error?"); + /* On a failure, the pagecache still contains the new data. + * Write some old data back, to prevent it from being used. + * Use the non-SYNC fd; we know it will fail anyway. + */ + meta.mm_last_pg = metab.mm_last_pg; + meta.mm_txnid = metab.mm_txnid; +#ifdef _WIN32 + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + WriteFile(env->me_fd, ptr, len, NULL, &ov); +#else + r2 = pwrite(env->me_fd, ptr, len, off); + (void)r2; /* Silence warnings. We don't care about pwrite's return value */ +#endif +fail: + env->me_flags |= MDB_FATAL_ERROR; + return rc; + } + /* MIPS has cache coherency issues, this is a no-op everywhere else */ + CACHEFLUSH(env->me_map + off, len, DCACHE); +done: + /* Memory ordering issues are irrelevant; since the entire writer + * is wrapped by wmutex, all of these changes will become visible + * after the wmutex is unlocked. Since the DB is multi-version, + * readers will get consistent data regardless of how fresh or + * how stale their view of these values is. + */ + if (env->me_txns) + env->me_txns->mti_txnid = txn->mt_txnid; + + return MDB_SUCCESS; +} + +/** Check both meta pages to see which one is newer. + * @param[in] env the environment handle + * @return meta toggle (0 or 1). + */ +static int +mdb_env_pick_meta(const MDB_env *env) +{ + return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid); +} + +int ESECT +mdb_env_create(MDB_env **env) +{ + MDB_env *e; + + e = calloc(1, sizeof(MDB_env)); + if (!e) + return ENOMEM; + + e->me_maxreaders = DEFAULT_READERS; + e->me_maxdbs = e->me_numdbs = 2; + e->me_fd = INVALID_HANDLE_VALUE; + e->me_lfd = INVALID_HANDLE_VALUE; + e->me_mfd = INVALID_HANDLE_VALUE; +#ifdef MDB_USE_POSIX_SEM + e->me_rmutex = SEM_FAILED; + e->me_wmutex = SEM_FAILED; +#endif + e->me_pid = getpid(); + GET_PAGESIZE(e->me_os_psize); + VGMEMP_CREATE(e,0,0); + *env = e; + return MDB_SUCCESS; +} + +static int ESECT +mdb_env_map(MDB_env *env, void *addr) +{ + MDB_page *p; + unsigned int flags = env->me_flags; +#ifdef _WIN32 + int rc; + HANDLE mh; + LONG sizelo, sizehi; + size_t msize; + + if (flags & MDB_RDONLY) { + /* Don't set explicit map size, use whatever exists */ + msize = 0; + sizelo = 0; + sizehi = 0; + } else { + msize = env->me_mapsize; + sizelo = msize & 0xffffffff; + sizehi = msize >> 16 >> 16; /* only needed on Win64 */ + + /* Windows won't create mappings for zero length files. + * and won't map more than the file size. + * Just set the maxsize right now. + */ + if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo + || !SetEndOfFile(env->me_fd) + || SetFilePointer(env->me_fd, 0, NULL, 0) != 0) + return ErrCode(); + } + + mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ? + PAGE_READWRITE : PAGE_READONLY, + sizehi, sizelo, NULL); + if (!mh) + return ErrCode(); +#ifdef VL32 + msize = 2 * env->me_psize; +#endif + env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ? + FILE_MAP_WRITE : FILE_MAP_READ, + 0, 0, msize, addr); + rc = env->me_map ? 0 : ErrCode(); +#ifdef VL32 + env->me_fmh = mh; +#else + CloseHandle(mh); +#endif + if (rc) + return rc; +#else +#ifdef VL32 + (void) flags; + env->me_map = mmap(addr, 2 * env->me_psize, PROT_READ, MAP_SHARED, + env->me_fd, 0); + if (env->me_map == MAP_FAILED) { + env->me_map = NULL; + return ErrCode(); + } +#else + int prot = PROT_READ; + if (flags & MDB_WRITEMAP) { + prot |= PROT_WRITE; + if (ftruncate(env->me_fd, env->me_mapsize) < 0) + return ErrCode(); + } + env->me_map = mmap(addr, env->me_mapsize, prot, MAP_SHARED, + env->me_fd, 0); + if (env->me_map == MAP_FAILED) { + env->me_map = NULL; + return ErrCode(); + } + + if (flags & MDB_NORDAHEAD) { + /* Turn off readahead. It's harmful when the DB is larger than RAM. */ +#ifdef MADV_RANDOM + madvise(env->me_map, env->me_mapsize, MADV_RANDOM); +#else +#ifdef POSIX_MADV_RANDOM + posix_madvise(env->me_map, env->me_mapsize, POSIX_MADV_RANDOM); +#endif /* POSIX_MADV_RANDOM */ +#endif /* MADV_RANDOM */ + } +#endif /* _WIN32 */ + + /* Can happen because the address argument to mmap() is just a + * hint. mmap() can pick another, e.g. if the range is in use. + * The MAP_FIXED flag would prevent that, but then mmap could + * instead unmap existing pages to make room for the new map. + */ + if (addr && env->me_map != addr) + return EBUSY; /* TODO: Make a new MDB_* error code? */ +#endif + + p = (MDB_page *)env->me_map; + env->me_metas[0] = METADATA(p); + env->me_metas[1] = (MDB_meta *)((char *)env->me_metas[0] + env->me_psize); + + return MDB_SUCCESS; +} + +int ESECT +mdb_env_set_mapsize(MDB_env *env, size_t size) +{ + /* If env is already open, caller is responsible for making + * sure there are no active txns. + */ + if (env->me_map) { + int rc; + void *old; + if (env->me_txn) + return EINVAL; + if (!size) + size = env->me_metas[mdb_env_pick_meta(env)]->mm_mapsize; + else if (size < env->me_mapsize) { + /* If the configured size is smaller, make sure it's + * still big enough. Silently round up to minimum if not. + */ + size_t minsize = (env->me_metas[mdb_env_pick_meta(env)]->mm_last_pg + 1) * env->me_psize; + if (size < minsize) + size = minsize; + } + munmap(env->me_map, env->me_mapsize); + env->me_mapsize = size; + old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL; + rc = mdb_env_map(env, old); + if (rc) + return rc; + } + env->me_mapsize = size; + if (env->me_psize) + env->me_maxpg = env->me_mapsize / env->me_psize; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs) +{ + if (env->me_map) + return EINVAL; + env->me_maxdbs = dbs + 2; /* Named databases + main and free DB */ + return MDB_SUCCESS; +} + +int ESECT +mdb_env_set_maxreaders(MDB_env *env, unsigned int readers) +{ + if (env->me_map || readers < 1) + return EINVAL; + env->me_maxreaders = readers; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) +{ + if (!env || !readers) + return EINVAL; + *readers = env->me_maxreaders; + return MDB_SUCCESS; +} + +/** Further setup required for opening an LMDB environment + */ +static int ESECT +mdb_env_open2(MDB_env *env) +{ + unsigned int flags = env->me_flags; + int i, newenv = 0, rc; + MDB_meta meta; + +#ifdef _WIN32 + /* See if we should use QueryLimited */ + rc = GetVersion(); + if ((rc & 0xff) > 5) + env->me_pidquery = MDB_PROCESS_QUERY_LIMITED_INFORMATION; + else + env->me_pidquery = PROCESS_QUERY_INFORMATION; +#endif /* _WIN32 */ + + memset(&meta, 0, sizeof(meta)); + + if ((i = mdb_env_read_header(env, &meta)) != 0) { + if (i != ENOENT) + return i; + DPUTS("new mdbenv"); + newenv = 1; + env->me_psize = env->me_os_psize; + if (env->me_psize > MAX_PAGESIZE) + env->me_psize = MAX_PAGESIZE; + } else { + env->me_psize = meta.mm_psize; + } + + /* Was a mapsize configured? */ + if (!env->me_mapsize) { + /* If this is a new environment, take the default, + * else use the size recorded in the existing env. + */ + env->me_mapsize = newenv ? DEFAULT_MAPSIZE : meta.mm_mapsize; + } else if (env->me_mapsize < meta.mm_mapsize) { + /* If the configured size is smaller, make sure it's + * still big enough. Silently round up to minimum if not. + */ + size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize; + if (env->me_mapsize < minsize) + env->me_mapsize = minsize; + } + + rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL); + if (rc) + return rc; + + if (newenv) { + if (flags & MDB_FIXEDMAP) + meta.mm_address = env->me_map; + i = mdb_env_init_meta(env, &meta); + if (i != MDB_SUCCESS) { + return i; + } + } + + env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1; + env->me_nodemax = (((env->me_psize - PAGEHDRSZ) / MDB_MINKEYS) & -2) + - sizeof(indx_t); +#if !(MDB_MAXKEYSIZE) + env->me_maxkey = env->me_nodemax - (NODESIZE + sizeof(MDB_db)); +#endif + env->me_maxpg = env->me_mapsize / env->me_psize; + +#if MDB_DEBUG + { + int toggle = mdb_env_pick_meta(env); + MDB_db *db = &env->me_metas[toggle]->mm_dbs[MAIN_DBI]; + + DPRINTF(("opened database version %u, pagesize %u", + env->me_metas[0]->mm_version, env->me_psize)); + DPRINTF(("using meta page %d", toggle)); + DPRINTF(("depth: %u", db->md_depth)); + DPRINTF(("entries: %"Z"u", db->md_entries)); + DPRINTF(("branch pages: %"Z"u", db->md_branch_pages)); + DPRINTF(("leaf pages: %"Z"u", db->md_leaf_pages)); + DPRINTF(("overflow pages: %"Z"u", db->md_overflow_pages)); + DPRINTF(("root: %"Z"u", db->md_root)); + } +#endif + + return MDB_SUCCESS; +} + + +/** Release a reader thread's slot in the reader lock table. + * This function is called automatically when a thread exits. + * @param[in] ptr This points to the slot in the reader lock table. + */ +static void +mdb_env_reader_dest(void *ptr) +{ + MDB_reader *reader = ptr; + + reader->mr_pid = 0; +} + +#ifdef _WIN32 +/** Junk for arranging thread-specific callbacks on Windows. This is + * necessarily platform and compiler-specific. Windows supports up + * to 1088 keys. Let's assume nobody opens more than 64 environments + * in a single process, for now. They can override this if needed. + */ +#ifndef MAX_TLS_KEYS +#define MAX_TLS_KEYS 64 +#endif +static pthread_key_t mdb_tls_keys[MAX_TLS_KEYS]; +static int mdb_tls_nkeys; + +static void NTAPI mdb_tls_callback(PVOID module, DWORD reason, PVOID ptr) +{ + int i; + switch(reason) { + case DLL_PROCESS_ATTACH: break; + case DLL_THREAD_ATTACH: break; + case DLL_THREAD_DETACH: + for (i=0; i<mdb_tls_nkeys; i++) { + MDB_reader *r = pthread_getspecific(mdb_tls_keys[i]); + if (r) { + mdb_env_reader_dest(r); + } + } + break; + case DLL_PROCESS_DETACH: break; + } +} +#ifdef __GNUC__ +#ifdef _WIN64 +const PIMAGE_TLS_CALLBACK mdb_tls_cbp __attribute__((section (".CRT$XLB"))) = mdb_tls_callback; +#else +PIMAGE_TLS_CALLBACK mdb_tls_cbp __attribute__((section (".CRT$XLB"))) = mdb_tls_callback; +#endif +#else +#ifdef _WIN64 +/* Force some symbol references. + * _tls_used forces the linker to create the TLS directory if not already done + * mdb_tls_cbp prevents whole-program-optimizer from dropping the symbol. + */ +#pragma comment(linker, "/INCLUDE:_tls_used") +#pragma comment(linker, "/INCLUDE:mdb_tls_cbp") +#pragma const_seg(".CRT$XLB") +extern const PIMAGE_TLS_CALLBACK mdb_tls_cbp; +const PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback; +#pragma const_seg() +#else /* WIN32 */ +#pragma comment(linker, "/INCLUDE:__tls_used") +#pragma comment(linker, "/INCLUDE:_mdb_tls_cbp") +#pragma data_seg(".CRT$XLB") +PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback; +#pragma data_seg() +#endif /* WIN 32/64 */ +#endif /* !__GNUC__ */ +#endif + +/** Downgrade the exclusive lock on the region back to shared */ +static int ESECT +mdb_env_share_locks(MDB_env *env, int *excl) +{ + int rc = 0, toggle = mdb_env_pick_meta(env); + + env->me_txns->mti_txnid = env->me_metas[toggle]->mm_txnid; + +#ifdef _WIN32 + { + OVERLAPPED ov; + /* First acquire a shared lock. The Unlock will + * then release the existing exclusive lock. + */ + memset(&ov, 0, sizeof(ov)); + if (!LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { + rc = ErrCode(); + } else { + UnlockFile(env->me_lfd, 0, 0, 1, 0); + *excl = 0; + } + } +#else + { + struct flock lock_info; + /* The shared lock replaces the existing lock */ + memset((void *)&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_RDLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = 0; + lock_info.l_len = 1; + while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + *excl = rc ? -1 : 0; /* error may mean we lost the lock */ + } +#endif + + return rc; +} + +/** Try to get exlusive lock, otherwise shared. + * Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive. + */ +static int ESECT +mdb_env_excl_lock(MDB_env *env, int *excl) +{ + int rc = 0; +#ifdef _WIN32 + if (LockFile(env->me_lfd, 0, 0, 1, 0)) { + *excl = 1; + } else { + OVERLAPPED ov; + memset(&ov, 0, sizeof(ov)); + if (LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { + *excl = 0; + } else { + rc = ErrCode(); + } + } +#else + struct flock lock_info; + memset((void *)&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_WRLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = 0; + lock_info.l_len = 1; + while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + if (!rc) { + *excl = 1; + } else +# ifdef MDB_USE_POSIX_SEM + if (*excl < 0) /* always true when !MDB_USE_POSIX_SEM */ +# endif + { + lock_info.l_type = F_RDLCK; + while ((rc = fcntl(env->me_lfd, F_SETLKW, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + if (rc == 0) + *excl = 0; + } +#endif + return rc; +} + +#ifdef MDB_USE_HASH +/* + * hash_64 - 64 bit Fowler/Noll/Vo-0 FNV-1a hash code + * + * @(#) $Revision: 5.1 $ + * @(#) $Id: hash_64a.c,v 5.1 2009/06/30 09:01:38 chongo Exp $ + * @(#) $Source: /usr/local/src/cmd/fnv/RCS/hash_64a.c,v $ + * + * http://www.isthe.com/chongo/tech/comp/fnv/index.html + * + *** + * + * Please do not copyright this code. This code is in the public domain. + * + * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO + * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF + * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + * + * By: + * chongo <Landon Curt Noll> /\oo/\ + * http://www.isthe.com/chongo/ + * + * Share and Enjoy! :-) + */ + +typedef unsigned long long mdb_hash_t; +#define MDB_HASH_INIT ((mdb_hash_t)0xcbf29ce484222325ULL) + +/** perform a 64 bit Fowler/Noll/Vo FNV-1a hash on a buffer + * @param[in] val value to hash + * @param[in] hval initial value for hash + * @return 64 bit hash + * + * NOTE: To use the recommended 64 bit FNV-1a hash, use MDB_HASH_INIT as the + * hval arg on the first call. + */ +static mdb_hash_t +mdb_hash_val(MDB_val *val, mdb_hash_t hval) +{ + unsigned char *s = (unsigned char *)val->mv_data; /* unsigned string */ + unsigned char *end = s + val->mv_size; + /* + * FNV-1a hash each octet of the string + */ + while (s < end) { + /* xor the bottom with the current octet */ + hval ^= (mdb_hash_t)*s++; + + /* multiply by the 64 bit FNV magic prime mod 2^64 */ + hval += (hval << 1) + (hval << 4) + (hval << 5) + + (hval << 7) + (hval << 8) + (hval << 40); + } + /* return our new hash value */ + return hval; +} + +/** Hash the string and output the encoded hash. + * This uses modified RFC1924 Ascii85 encoding to accommodate systems with + * very short name limits. We don't care about the encoding being reversible, + * we just want to preserve as many bits of the input as possible in a + * small printable string. + * @param[in] str string to hash + * @param[out] encbuf an array of 11 chars to hold the hash + */ +static const char mdb_a85[]= "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; + +static void +mdb_pack85(unsigned long l, char *out) +{ + int i; + + for (i=0; i<5; i++) { + *out++ = mdb_a85[l % 85]; + l /= 85; + } +} + +static void +mdb_hash_enc(MDB_val *val, char *encbuf) +{ + mdb_hash_t h = mdb_hash_val(val, MDB_HASH_INIT); + + mdb_pack85(h, encbuf); + mdb_pack85(h>>32, encbuf+5); + encbuf[10] = '\0'; +} +#endif + +/** Open and/or initialize the lock region for the environment. + * @param[in] env The LMDB environment. + * @param[in] lpath The pathname of the file used for the lock region. + * @param[in] mode The Unix permissions for the file, if we create it. + * @param[out] excl Resulting file lock type: -1 none, 0 shared, 1 exclusive + * @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive + * @return 0 on success, non-zero on failure. + */ +static int ESECT +mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) +{ +#ifdef _WIN32 +# define MDB_ERRCODE_ROFS ERROR_WRITE_PROTECT +#else +# define MDB_ERRCODE_ROFS EROFS +#ifdef O_CLOEXEC /* Linux: Open file and set FD_CLOEXEC atomically */ +# define MDB_CLOEXEC O_CLOEXEC +#else + int fdflags; +# define MDB_CLOEXEC 0 +#endif +#endif + int rc; + off_t size, rsize; + +#ifdef _WIN32 + env->me_lfd = CreateFile(lpath, GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); +#else + env->me_lfd = open(lpath, O_RDWR|O_CREAT|MDB_CLOEXEC, mode); +#endif + if (env->me_lfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + if (rc == MDB_ERRCODE_ROFS && (env->me_flags & MDB_RDONLY)) { + return MDB_SUCCESS; + } + goto fail_errno; + } +#if ! ((MDB_CLOEXEC) || defined(_WIN32)) + /* Lose record locks when exec*() */ + if ((fdflags = fcntl(env->me_lfd, F_GETFD) | FD_CLOEXEC) >= 0) + fcntl(env->me_lfd, F_SETFD, fdflags); +#endif + + if (!(env->me_flags & MDB_NOTLS)) { + rc = pthread_key_create(&env->me_txkey, mdb_env_reader_dest); + if (rc) + goto fail; + env->me_flags |= MDB_ENV_TXKEY; +#ifdef _WIN32 + /* Windows TLS callbacks need help finding their TLS info. */ + if (mdb_tls_nkeys >= MAX_TLS_KEYS) { + rc = MDB_TLS_FULL; + goto fail; + } + mdb_tls_keys[mdb_tls_nkeys++] = env->me_txkey; +#endif + } + + /* Try to get exclusive lock. If we succeed, then + * nobody is using the lock region and we should initialize it. + */ + if ((rc = mdb_env_excl_lock(env, excl))) goto fail; + +#ifdef _WIN32 + size = GetFileSize(env->me_lfd, NULL); +#else + size = lseek(env->me_lfd, 0, SEEK_END); + if (size == -1) goto fail_errno; +#endif + rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo); + if (size < rsize && *excl > 0) { +#ifdef _WIN32 + if (SetFilePointer(env->me_lfd, rsize, NULL, FILE_BEGIN) != (DWORD)rsize + || !SetEndOfFile(env->me_lfd)) + goto fail_errno; +#else + if (ftruncate(env->me_lfd, rsize) != 0) goto fail_errno; +#endif + } else { + rsize = size; + size = rsize - sizeof(MDB_txninfo); + env->me_maxreaders = size/sizeof(MDB_reader) + 1; + } + { +#ifdef _WIN32 + HANDLE mh; + mh = CreateFileMapping(env->me_lfd, NULL, PAGE_READWRITE, + 0, 0, NULL); + if (!mh) goto fail_errno; + env->me_txns = MapViewOfFileEx(mh, FILE_MAP_WRITE, 0, 0, rsize, NULL); + CloseHandle(mh); + if (!env->me_txns) goto fail_errno; +#else + void *m = mmap(NULL, rsize, PROT_READ|PROT_WRITE, MAP_SHARED, + env->me_lfd, 0); + if (m == MAP_FAILED) goto fail_errno; + env->me_txns = m; +#endif + } + if (*excl > 0) { +#ifdef _WIN32 + BY_HANDLE_FILE_INFORMATION stbuf; + struct { + DWORD volume; + DWORD nhigh; + DWORD nlow; + } idbuf; + MDB_val val; + char encbuf[11]; + + if (!mdb_sec_inited) { + InitializeSecurityDescriptor(&mdb_null_sd, + SECURITY_DESCRIPTOR_REVISION); + SetSecurityDescriptorDacl(&mdb_null_sd, TRUE, 0, FALSE); + mdb_all_sa.nLength = sizeof(SECURITY_ATTRIBUTES); + mdb_all_sa.bInheritHandle = FALSE; + mdb_all_sa.lpSecurityDescriptor = &mdb_null_sd; + mdb_sec_inited = 1; + } + if (!GetFileInformationByHandle(env->me_lfd, &stbuf)) goto fail_errno; + idbuf.volume = stbuf.dwVolumeSerialNumber; + idbuf.nhigh = stbuf.nFileIndexHigh; + idbuf.nlow = stbuf.nFileIndexLow; + val.mv_data = &idbuf; + val.mv_size = sizeof(idbuf); + mdb_hash_enc(&val, encbuf); + sprintf(env->me_txns->mti_rmname, "Global\\MDBr%s", encbuf); + sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", encbuf); + env->me_rmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_rmname); + if (!env->me_rmutex) goto fail_errno; + env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname); + if (!env->me_wmutex) goto fail_errno; +#elif defined(MDB_USE_POSIX_SEM) + struct stat stbuf; + struct { + dev_t dev; + ino_t ino; + } idbuf; + MDB_val val; + char encbuf[11]; + +#if defined(__NetBSD__) +#define MDB_SHORT_SEMNAMES 1 /* limited to 14 chars */ +#endif + if (fstat(env->me_lfd, &stbuf)) goto fail_errno; + idbuf.dev = stbuf.st_dev; + idbuf.ino = stbuf.st_ino; + val.mv_data = &idbuf; + val.mv_size = sizeof(idbuf); + mdb_hash_enc(&val, encbuf); +#ifdef MDB_SHORT_SEMNAMES + encbuf[9] = '\0'; /* drop name from 15 chars to 14 chars */ +#endif + sprintf(env->me_txns->mti_rmname, "/MDBr%s", encbuf); + sprintf(env->me_txns->mti_wmname, "/MDBw%s", encbuf); + /* Clean up after a previous run, if needed: Try to + * remove both semaphores before doing anything else. + */ + sem_unlink(env->me_txns->mti_rmname); + sem_unlink(env->me_txns->mti_wmname); + env->me_rmutex = sem_open(env->me_txns->mti_rmname, + O_CREAT|O_EXCL, mode, 1); + if (env->me_rmutex == SEM_FAILED) goto fail_errno; + env->me_wmutex = sem_open(env->me_txns->mti_wmname, + O_CREAT|O_EXCL, mode, 1); + if (env->me_wmutex == SEM_FAILED) goto fail_errno; +#else /* MDB_USE_POSIX_SEM */ + pthread_mutexattr_t mattr; + + if ((rc = pthread_mutexattr_init(&mattr)) + || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) + || (rc = pthread_mutex_init(&env->me_txns->mti_mutex, &mattr)) + || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr))) + goto fail; + pthread_mutexattr_destroy(&mattr); +#endif /* _WIN32 || MDB_USE_POSIX_SEM */ + + env->me_txns->mti_magic = MDB_MAGIC; + env->me_txns->mti_format = MDB_LOCK_FORMAT; + env->me_txns->mti_txnid = 0; + env->me_txns->mti_numreaders = 0; + + } else { + if (env->me_txns->mti_magic != MDB_MAGIC) { + DPUTS("lock region has invalid magic"); + rc = MDB_INVALID; + goto fail; + } + if (env->me_txns->mti_format != MDB_LOCK_FORMAT) { + DPRINTF(("lock region has format+version 0x%x, expected 0x%x", + env->me_txns->mti_format, MDB_LOCK_FORMAT)); + rc = MDB_VERSION_MISMATCH; + goto fail; + } + rc = ErrCode(); + if (rc && rc != EACCES && rc != EAGAIN) { + goto fail; + } +#ifdef _WIN32 + env->me_rmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname); + if (!env->me_rmutex) goto fail_errno; + env->me_wmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_wmname); + if (!env->me_wmutex) goto fail_errno; +#elif defined(MDB_USE_POSIX_SEM) + env->me_rmutex = sem_open(env->me_txns->mti_rmname, 0); + if (env->me_rmutex == SEM_FAILED) goto fail_errno; + env->me_wmutex = sem_open(env->me_txns->mti_wmname, 0); + if (env->me_wmutex == SEM_FAILED) goto fail_errno; +#endif + } + return MDB_SUCCESS; + +fail_errno: + rc = ErrCode(); +fail: + return rc; +} + + /** The name of the lock file in the DB environment */ +#define LOCKNAME "/lock.mdb" + /** The name of the data file in the DB environment */ +#define DATANAME "/data.mdb" + /** The suffix of the lock file when no subdir is used */ +#define LOCKSUFF "-lock" + /** Only a subset of the @ref mdb_env flags can be changed + * at runtime. Changing other flags requires closing the + * environment and re-opening it with the new flags. + */ +#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT) +#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP| \ + MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD) + +#if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS) +# error "Persistent DB flags & env flags overlap, but both go in mm_flags" +#endif + +int ESECT +mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode) +{ + int oflags, rc, len, excl = -1; + char *lpath, *dpath; + + if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) + return EINVAL; + +#ifdef VL32 + if (flags & MDB_WRITEMAP) { + /* silently ignore WRITEMAP in 32 bit mode */ + flags ^= MDB_WRITEMAP; + } + if (flags & MDB_FIXEDMAP) { + /* cannot support FIXEDMAP */ + return EINVAL; + } +#endif + + len = strlen(path); + if (flags & MDB_NOSUBDIR) { + rc = len + sizeof(LOCKSUFF) + len + 1; + } else { + rc = len + sizeof(LOCKNAME) + len + sizeof(DATANAME); + } + lpath = malloc(rc); + if (!lpath) + return ENOMEM; + if (flags & MDB_NOSUBDIR) { + dpath = lpath + len + sizeof(LOCKSUFF); + sprintf(lpath, "%s" LOCKSUFF, path); + strcpy(dpath, path); + } else { + dpath = lpath + len + sizeof(LOCKNAME); + sprintf(lpath, "%s" LOCKNAME, path); + sprintf(dpath, "%s" DATANAME, path); + } + + rc = MDB_SUCCESS; + flags |= env->me_flags; + if (flags & MDB_RDONLY) { + /* silently ignore WRITEMAP when we're only getting read access */ + flags &= ~MDB_WRITEMAP; + } else { + if (!((env->me_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX)) && + (env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2))))) + rc = ENOMEM; + } + env->me_flags = flags |= MDB_ENV_ACTIVE; + if (rc) + goto leave; + + env->me_path = strdup(path); + env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx)); + env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t)); + env->me_dbiseqs = calloc(env->me_maxdbs, sizeof(unsigned int)); + if (!(env->me_dbxs && env->me_path && env->me_dbflags && env->me_dbiseqs)) { + rc = ENOMEM; + goto leave; + } + + /* For RDONLY, get lockfile after we know datafile exists */ + if (!(flags & (MDB_RDONLY|MDB_NOLOCK))) { + rc = mdb_env_setup_locks(env, lpath, mode, &excl); + if (rc) + goto leave; + } + +#ifdef _WIN32 + if (F_ISSET(flags, MDB_RDONLY)) { + oflags = GENERIC_READ; + len = OPEN_EXISTING; + } else { + oflags = GENERIC_READ|GENERIC_WRITE; + len = OPEN_ALWAYS; + } + mode = FILE_ATTRIBUTE_NORMAL; + env->me_fd = CreateFile(dpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, + NULL, len, mode, NULL); +#else + if (F_ISSET(flags, MDB_RDONLY)) + oflags = O_RDONLY; + else + oflags = O_RDWR | O_CREAT; + + env->me_fd = open(dpath, oflags, mode); +#endif + if (env->me_fd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + + if ((flags & (MDB_RDONLY|MDB_NOLOCK)) == MDB_RDONLY) { + rc = mdb_env_setup_locks(env, lpath, mode, &excl); + if (rc) + goto leave; + } + + if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) { + if (flags & (MDB_RDONLY|MDB_WRITEMAP)) { + env->me_mfd = env->me_fd; + } else { + /* Synchronous fd for meta writes. Needed even with + * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset. + */ +#ifdef _WIN32 + len = OPEN_EXISTING; + env->me_mfd = CreateFile(dpath, oflags, + FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len, + mode | FILE_FLAG_WRITE_THROUGH, NULL); +#else + oflags &= ~O_CREAT; + env->me_mfd = open(dpath, oflags | MDB_DSYNC, mode); +#endif + if (env->me_mfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + } + DPRINTF(("opened dbenv %p", (void *) env)); + if (excl > 0) { + rc = mdb_env_share_locks(env, &excl); + if (rc) + goto leave; + } + if (!((flags & MDB_RDONLY) || + (env->me_pbuf = calloc(1, env->me_psize)))) + rc = ENOMEM; + if (!(flags & MDB_RDONLY)) { + MDB_txn *txn; + int tsize = sizeof(MDB_txn), size = tsize + env->me_maxdbs * + (sizeof(MDB_db)+sizeof(MDB_cursor)+sizeof(unsigned int)+1); + txn = calloc(1, size); + if (txn) { + txn->mt_dbs = (MDB_db *)((char *)txn + tsize); + txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs); + txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); + txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); + txn->mt_env = env; +#ifdef VL32 + txn->mt_rpages = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2)); + if (!txn->mt_rpages) { + free(txn); + rc = ENOMEM; + } +#endif + env->me_txn0 = txn; + } else { + rc = ENOMEM; + } + } + } + +leave: + if (rc) { + mdb_env_close0(env, excl); + } + free(lpath); + return rc; +} + +/** Destroy resources from mdb_env_open(), clear our readers & DBIs */ +static void ESECT +mdb_env_close0(MDB_env *env, int excl) +{ + int i; + + if (!(env->me_flags & MDB_ENV_ACTIVE)) + return; + + /* Doing this here since me_dbxs may not exist during mdb_env_close */ + for (i = env->me_maxdbs; --i > MAIN_DBI; ) + free(env->me_dbxs[i].md_name.mv_data); + + free(env->me_pbuf); + free(env->me_dbiseqs); + free(env->me_dbflags); + free(env->me_dbxs); + free(env->me_path); + free(env->me_dirty_list); + free(env->me_txn0); + mdb_midl_free(env->me_free_pgs); + + if (env->me_flags & MDB_ENV_TXKEY) { + pthread_key_delete(env->me_txkey); +#ifdef _WIN32 + /* Delete our key from the global list */ + for (i=0; i<mdb_tls_nkeys; i++) + if (mdb_tls_keys[i] == env->me_txkey) { + mdb_tls_keys[i] = mdb_tls_keys[mdb_tls_nkeys-1]; + mdb_tls_nkeys--; + break; + } +#endif + } + + if (env->me_map) { +#ifdef VL32 + munmap(env->me_map, 2*env->me_psize); +#else + munmap(env->me_map, env->me_mapsize); +#endif + } + if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) + (void) close(env->me_mfd); + if (env->me_fd != INVALID_HANDLE_VALUE) + (void) close(env->me_fd); + if (env->me_txns) { + MDB_PID_T pid = env->me_pid; + /* Clearing readers is done in this function because + * me_txkey with its destructor must be disabled first. + */ + for (i = env->me_numreaders; --i >= 0; ) + if (env->me_txns->mti_readers[i].mr_pid == pid) + env->me_txns->mti_readers[i].mr_pid = 0; +#ifdef _WIN32 + if (env->me_rmutex) { + CloseHandle(env->me_rmutex); + if (env->me_wmutex) CloseHandle(env->me_wmutex); + } + /* Windows automatically destroys the mutexes when + * the last handle closes. + */ +#elif defined(MDB_USE_POSIX_SEM) + if (env->me_rmutex != SEM_FAILED) { + sem_close(env->me_rmutex); + if (env->me_wmutex != SEM_FAILED) + sem_close(env->me_wmutex); + /* If we have the filelock: If we are the + * only remaining user, clean up semaphores. + */ + if (excl == 0) + mdb_env_excl_lock(env, &excl); + if (excl > 0) { + sem_unlink(env->me_txns->mti_rmname); + sem_unlink(env->me_txns->mti_wmname); + } + } +#endif + munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); + } + if (env->me_lfd != INVALID_HANDLE_VALUE) { +#ifdef _WIN32 + if (excl >= 0) { + /* Unlock the lockfile. Windows would have unlocked it + * after closing anyway, but not necessarily at once. + */ + UnlockFile(env->me_lfd, 0, 0, 1, 0); + } +#endif + (void) close(env->me_lfd); + } + + env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); +} + + +void ESECT +mdb_env_close(MDB_env *env) +{ + MDB_page *dp; + + if (env == NULL) + return; + + VGMEMP_DESTROY(env); + while ((dp = env->me_dpages) != NULL) { + VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next)); + env->me_dpages = dp->mp_next; + free(dp); + } + + mdb_env_close0(env, 0); + free(env); +} + +/** Compare two items pointing at aligned size_t's */ +static int +mdb_cmp_long(const MDB_val *a, const MDB_val *b) +{ + return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 : + *(size_t *)a->mv_data > *(size_t *)b->mv_data; +} + +/** Compare two items pointing at aligned unsigned int's */ +static int +mdb_cmp_int(const MDB_val *a, const MDB_val *b) +{ + return (*(unsigned int *)a->mv_data < *(unsigned int *)b->mv_data) ? -1 : + *(unsigned int *)a->mv_data > *(unsigned int *)b->mv_data; +} + +/** Compare two items pointing at unsigned ints of unknown alignment. + * Nodes and keys are guaranteed to be 2-byte aligned. + */ +static int +mdb_cmp_cint(const MDB_val *a, const MDB_val *b) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned short *u, *c; + int x; + + u = (unsigned short *) ((char *) a->mv_data + a->mv_size); + c = (unsigned short *) ((char *) b->mv_data + a->mv_size); + do { + x = *--u - *--c; + } while(!x && u > (unsigned short *)a->mv_data); + return x; +#else + unsigned short *u, *c, *end; + int x; + + end = (unsigned short *) ((char *) a->mv_data + a->mv_size); + u = (unsigned short *)a->mv_data; + c = (unsigned short *)b->mv_data; + do { + x = *u++ - *c++; + } while(!x && u < end); + return x; +#endif +} + +/** Compare two items pointing at size_t's of unknown alignment. */ +#ifdef MISALIGNED_OK +# define mdb_cmp_clong mdb_cmp_long +#else +# define mdb_cmp_clong mdb_cmp_cint +#endif + +/** Compare two items lexically */ +static int +mdb_cmp_memn(const MDB_val *a, const MDB_val *b) +{ + int diff; + ssize_t len_diff; + unsigned int len; + + len = a->mv_size; + len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; + if (len_diff > 0) { + len = b->mv_size; + len_diff = 1; + } + + diff = memcmp(a->mv_data, b->mv_data, len); + return diff ? diff : len_diff<0 ? -1 : len_diff; +} + +/** Compare two items in reverse byte order */ +static int +mdb_cmp_memnr(const MDB_val *a, const MDB_val *b) +{ + const unsigned char *p1, *p2, *p1_lim; + ssize_t len_diff; + int diff; + + p1_lim = (const unsigned char *)a->mv_data; + p1 = (const unsigned char *)a->mv_data + a->mv_size; + p2 = (const unsigned char *)b->mv_data + b->mv_size; + + len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; + if (len_diff > 0) { + p1_lim += len_diff; + len_diff = 1; + } + + while (p1 > p1_lim) { + diff = *--p1 - *--p2; + if (diff) + return diff; + } + return len_diff<0 ? -1 : len_diff; +} + +/** Search for key within a page, using binary search. + * Returns the smallest entry larger or equal to the key. + * If exactp is non-null, stores whether the found entry was an exact match + * in *exactp (1 or 0). + * Updates the cursor index with the index of the found entry. + * If no entry larger or equal to the key is found, returns NULL. + */ +static MDB_node * +mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp) +{ + unsigned int i = 0, nkeys; + int low, high; + int rc = 0; + MDB_page *mp = mc->mc_pg[mc->mc_top]; + MDB_node *node = NULL; + MDB_val nodekey; + MDB_cmp_func *cmp; + DKBUF; + + nkeys = NUMKEYS(mp); + + DPRINTF(("searching %u keys in %s %spage %"Z"u", + nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "", + mdb_dbg_pgno(mp))); + + low = IS_LEAF(mp) ? 0 : 1; + high = nkeys - 1; + cmp = mc->mc_dbx->md_cmp; + + /* Branch pages have no data, so if using integer keys, + * alignment is guaranteed. Use faster mdb_cmp_int. + */ + if (cmp == mdb_cmp_cint && IS_BRANCH(mp)) { + if (NODEPTR(mp, 1)->mn_ksize == sizeof(size_t)) + cmp = mdb_cmp_long; + else + cmp = mdb_cmp_int; + } + + if (IS_LEAF2(mp)) { + nodekey.mv_size = mc->mc_db->md_pad; + node = NODEPTR(mp, 0); /* fake */ + while (low <= high) { + i = (low + high) >> 1; + nodekey.mv_data = LEAF2KEY(mp, i, nodekey.mv_size); + rc = cmp(key, &nodekey); + DPRINTF(("found leaf index %u [%s], rc = %i", + i, DKEY(&nodekey), rc)); + if (rc == 0) + break; + if (rc > 0) + low = i + 1; + else + high = i - 1; + } + } else { + while (low <= high) { + i = (low + high) >> 1; + + node = NODEPTR(mp, i); + nodekey.mv_size = NODEKSZ(node); + nodekey.mv_data = NODEKEY(node); + + rc = cmp(key, &nodekey); +#if MDB_DEBUG + if (IS_LEAF(mp)) + DPRINTF(("found leaf index %u [%s], rc = %i", + i, DKEY(&nodekey), rc)); + else + DPRINTF(("found branch index %u [%s -> %"Z"u], rc = %i", + i, DKEY(&nodekey), NODEPGNO(node), rc)); +#endif + if (rc == 0) + break; + if (rc > 0) + low = i + 1; + else + high = i - 1; + } + } + + if (rc > 0) { /* Found entry is less than the key. */ + i++; /* Skip to get the smallest entry larger than key. */ + if (!IS_LEAF2(mp)) + node = NODEPTR(mp, i); + } + if (exactp) + *exactp = (rc == 0 && nkeys > 0); + /* store the key index */ + mc->mc_ki[mc->mc_top] = i; + if (i >= nkeys) + /* There is no entry larger or equal to the key. */ + return NULL; + + /* nodeptr is fake for LEAF2 */ + return node; +} + +#if 0 +static void +mdb_cursor_adjust(MDB_cursor *mc, func) +{ + MDB_cursor *m2; + + for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { + if (m2->mc_pg[m2->mc_top] == mc->mc_pg[mc->mc_top]) { + func(mc, m2); + } + } +} +#endif + +/** Pop a page off the top of the cursor's stack. */ +static void +mdb_cursor_pop(MDB_cursor *mc) +{ + if (mc->mc_snum) { +#if MDB_DEBUG + MDB_page *top = mc->mc_pg[mc->mc_top]; +#endif + mc->mc_snum--; + if (mc->mc_snum) + mc->mc_top--; + + DPRINTF(("popped page %"Z"u off db %d cursor %p", top->mp_pgno, + DDBI(mc), (void *) mc)); + } +} + +/** Push a page onto the top of the cursor's stack. */ +static int +mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) +{ + DPRINTF(("pushing page %"Z"u on db %d cursor %p", mp->mp_pgno, + DDBI(mc), (void *) mc)); + + if (mc->mc_snum >= CURSOR_STACK) { + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CURSOR_FULL; + } + + mc->mc_top = mc->mc_snum++; + mc->mc_pg[mc->mc_top] = mp; + mc->mc_ki[mc->mc_top] = 0; + + return MDB_SUCCESS; +} + +/** Find the address of the page corresponding to a given page number. + * @param[in] txn the transaction for this access. + * @param[in] pgno the page number for the page to retrieve. + * @param[out] ret address of a pointer where the page's address will be stored. + * @param[out] lvl dirty_list inheritance level of found page. 1=current txn, 0=mapped page. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) +{ + MDB_env *env = txn->mt_env; + MDB_page *p = NULL; + int level; + + if (!((txn->mt_flags & MDB_TXN_RDONLY) | (env->me_flags & MDB_WRITEMAP))) { + MDB_txn *tx2 = txn; + level = 1; + do { + MDB_ID2L dl = tx2->mt_u.dirty_list; + unsigned x; + /* Spilled pages were dirtied in this txn and flushed + * because the dirty list got full. Bring this page + * back in from the map (but don't unspill it here, + * leave that unless page_touch happens again). + */ + if (tx2->mt_spill_pgs) { + MDB_ID pn = pgno << 1; + x = mdb_midl_search(tx2->mt_spill_pgs, pn); + if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) { + p = (MDB_page *)(env->me_map + env->me_psize * pgno); + goto done; + } + } + if (dl[0].mid) { + unsigned x = mdb_mid2l_search(dl, pgno); + if (x <= dl[0].mid && dl[x].mid == pgno) { + p = dl[x].mptr; + goto done; + } + } + level++; + } while ((tx2 = tx2->mt_parent) != NULL); + } + + if (pgno < txn->mt_next_pgno) { + level = 0; +#ifdef VL32 + { + unsigned x = mdb_mid2l_search(txn->mt_rpages, pgno); + if (x <= txn->mt_rpages[0].mid && txn->mt_rpages[x].mid == pgno) { + p = txn->mt_rpages[x].mptr; + goto done; + } + if (txn->mt_rpages[0].mid >= MDB_IDL_UM_MAX) { + /* unmap some other page */ + mdb_tassert(txn, 0); + } + if (txn->mt_rpages[0].mid < MDB_IDL_UM_SIZE) { + MDB_ID2 id2; + size_t len = env->me_psize; + int np; +#ifdef _WIN32 + size_t off = pgno * env->me_psize; + DWORD lo, hi; + lo = off & 0xffffffff; + hi = off >> 16 >> 16; + p = MapViewOfFile(env->me_fmh, FILE_MAP_READ, hi, lo, len); + if (p == NULL) + return ErrCode(); + if (IS_OVERFLOW(p)) { + np = p->mp_pages; + UnmapViewOfFile(p); + len *= np; + p = MapViewOfFile(env->me_fmh, FILE_MAP_READ, hi, lo, len); + if (p == NULL) + return ErrCode(); + } +#else + off_t off = pgno * env->me_psize; + p = mmap(NULL, len, PROT_READ, MAP_SHARED, env->me_fd, off); + if (p == MAP_FAILED) + return errno; + if (IS_OVERFLOW(p)) { + np = p->mp_pages; + munmap(p, len); + len *= np; + p = mmap(NULL, len, PROT_READ, MAP_SHARED, env->me_fd, off); + if (p == MAP_FAILED) + return errno; + } +#endif + id2.mid = pgno; + id2.mptr = p; + mdb_mid2l_insert(txn->mt_rpages, &id2); + goto done; + } + } +#else + p = (MDB_page *)(env->me_map + env->me_psize * pgno); +#endif + } else { + DPRINTF(("page %"Z"u not found", pgno)); + txn->mt_flags |= MDB_TXN_ERROR; + return MDB_PAGE_NOTFOUND; + } + +done: + *ret = p; + if (lvl) + *lvl = level; + return MDB_SUCCESS; +} + +/** Finish #mdb_page_search() / #mdb_page_search_lowest(). + * The cursor is at the root page, set up the rest of it. + */ +static int +mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top]; + int rc; + DKBUF; + + while (IS_BRANCH(mp)) { + MDB_node *node; + indx_t i; + + DPRINTF(("branch page %"Z"u has %u keys", mp->mp_pgno, NUMKEYS(mp))); + mdb_cassert(mc, NUMKEYS(mp) > 1); + DPRINTF(("found index 0 to page %"Z"u", NODEPGNO(NODEPTR(mp, 0)))); + + if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) { + i = 0; + if (flags & MDB_PS_LAST) + i = NUMKEYS(mp) - 1; + } else { + int exact; + node = mdb_node_search(mc, key, &exact); + if (node == NULL) + i = NUMKEYS(mp) - 1; + else { + i = mc->mc_ki[mc->mc_top]; + if (!exact) { + mdb_cassert(mc, i > 0); + i--; + } + } + DPRINTF(("following index %u for key [%s]", i, DKEY(key))); + } + + mdb_cassert(mc, i < NUMKEYS(mp)); + node = NODEPTR(mp, i); + + if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) + return rc; + + mc->mc_ki[mc->mc_top] = i; + if ((rc = mdb_cursor_push(mc, mp))) + return rc; + + if (flags & MDB_PS_MODIFY) { + if ((rc = mdb_page_touch(mc)) != 0) + return rc; + mp = mc->mc_pg[mc->mc_top]; + } + } + + if (!IS_LEAF(mp)) { + DPRINTF(("internal error, index points to a %02X page!?", + mp->mp_flags)); + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CORRUPTED; + } + + DPRINTF(("found leaf page %"Z"u for key [%s]", mp->mp_pgno, + key ? DKEY(key) : "null")); + mc->mc_flags |= C_INITIALIZED; + mc->mc_flags &= ~C_EOF; + + return MDB_SUCCESS; +} + +/** Search for the lowest key under the current branch page. + * This just bypasses a NUMKEYS check in the current page + * before calling mdb_page_search_root(), because the callers + * are all in situations where the current page is known to + * be underfilled. + */ +static int +mdb_page_search_lowest(MDB_cursor *mc) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top]; + MDB_node *node = NODEPTR(mp, 0); + int rc; + + if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) + return rc; + + mc->mc_ki[mc->mc_top] = 0; + if ((rc = mdb_cursor_push(mc, mp))) + return rc; + return mdb_page_search_root(mc, NULL, MDB_PS_FIRST); +} + +/** Search for the page a given key should be in. + * Push it and its parent pages on the cursor stack. + * @param[in,out] mc the cursor for this operation. + * @param[in] key the key to search for, or NULL for first/last page. + * @param[in] flags If MDB_PS_MODIFY is set, visited pages in the DB + * are touched (updated with new page numbers). + * If MDB_PS_FIRST or MDB_PS_LAST is set, find first or last leaf. + * This is used by #mdb_cursor_first() and #mdb_cursor_last(). + * If MDB_PS_ROOTONLY set, just fetch root node, no further lookups. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) +{ + int rc; + pgno_t root; + + /* Make sure the txn is still viable, then find the root from + * the txn's db table and set it as the root of the cursor's stack. + */ + if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_ERROR)) { + DPUTS("transaction has failed, must abort"); + return MDB_BAD_TXN; + } else { + /* Make sure we're using an up-to-date root */ + if (*mc->mc_dbflag & DB_STALE) { + MDB_cursor mc2; + if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi)) + return MDB_BAD_DBI; + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL); + rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, 0); + if (rc) + return rc; + { + MDB_val data; + int exact = 0; + uint16_t flags; + MDB_node *leaf = mdb_node_search(&mc2, + &mc->mc_dbx->md_name, &exact); + if (!exact) + return MDB_NOTFOUND; + rc = mdb_node_read(mc->mc_txn, leaf, &data); + if (rc) + return rc; + memcpy(&flags, ((char *) data.mv_data + offsetof(MDB_db, md_flags)), + sizeof(uint16_t)); + /* The txn may not know this DBI, or another process may + * have dropped and recreated the DB with other flags. + */ + if ((mc->mc_db->md_flags & PERSISTENT_FLAGS) != flags) + return MDB_INCOMPATIBLE; + memcpy(mc->mc_db, data.mv_data, sizeof(MDB_db)); + } + *mc->mc_dbflag &= ~DB_STALE; + } + root = mc->mc_db->md_root; + + if (root == P_INVALID) { /* Tree is empty. */ + DPUTS("tree is empty"); + return MDB_NOTFOUND; + } + } + + mdb_cassert(mc, root > 1); + if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) + if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0) + return rc; + + mc->mc_snum = 1; + mc->mc_top = 0; + + DPRINTF(("db %d root page %"Z"u has flags 0x%X", + DDBI(mc), root, mc->mc_pg[0]->mp_flags)); + + if (flags & MDB_PS_MODIFY) { + if ((rc = mdb_page_touch(mc))) + return rc; + } + + if (flags & MDB_PS_ROOTONLY) + return MDB_SUCCESS; + + return mdb_page_search_root(mc, key, flags); +} + +static int +mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp) +{ + MDB_txn *txn = mc->mc_txn; + pgno_t pg = mp->mp_pgno; + unsigned x = 0, ovpages = mp->mp_pages; + MDB_env *env = txn->mt_env; + MDB_IDL sl = txn->mt_spill_pgs; + MDB_ID pn = pg << 1; + int rc; + + DPRINTF(("free ov page %"Z"u (%d)", pg, ovpages)); + /* If the page is dirty or on the spill list we just acquired it, + * so we should give it back to our current free list, if any. + * Otherwise put it onto the list of pages we freed in this txn. + * + * Won't create me_pghead: me_pglast must be inited along with it. + * Unsupported in nested txns: They would need to hide the page + * range in ancestor txns' dirty and spilled lists. + */ + if (env->me_pghead && + !txn->mt_parent && + ((mp->mp_flags & P_DIRTY) || + (sl && (x = mdb_midl_search(sl, pn)) <= sl[0] && sl[x] == pn))) + { + unsigned i, j; + pgno_t *mop; + MDB_ID2 *dl, ix, iy; + rc = mdb_midl_need(&env->me_pghead, ovpages); + if (rc) + return rc; + if (!(mp->mp_flags & P_DIRTY)) { + /* This page is no longer spilled */ + if (x == sl[0]) + sl[0]--; + else + sl[x] |= 1; + goto release; + } + /* Remove from dirty list */ + dl = txn->mt_u.dirty_list; + x = dl[0].mid--; + for (ix = dl[x]; ix.mptr != mp; ix = iy) { + if (x > 1) { + x--; + iy = dl[x]; + dl[x] = ix; + } else { + mdb_cassert(mc, x > 1); + j = ++(dl[0].mid); + dl[j] = ix; /* Unsorted. OK when MDB_TXN_ERROR. */ + txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CORRUPTED; + } + } + if (!(env->me_flags & MDB_WRITEMAP)) + mdb_dpage_free(env, mp); +release: + /* Insert in me_pghead */ + mop = env->me_pghead; + j = mop[0] + ovpages; + for (i = mop[0]; i && mop[i] < pg; i--) + mop[j--] = mop[i]; + while (j>i) + mop[j--] = pg++; + mop[0] += ovpages; + } else { + rc = mdb_midl_append_range(&txn->mt_free_pgs, pg, ovpages); + if (rc) + return rc; + } + mc->mc_db->md_overflow_pages -= ovpages; + return 0; +} + +/** Return the data associated with a given node. + * @param[in] txn The transaction for this operation. + * @param[in] leaf The node being read. + * @param[out] data Updated to point to the node's data. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data) +{ + MDB_page *omp; /* overflow page */ + pgno_t pgno; + int rc; + + if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) { + data->mv_size = NODEDSZ(leaf); + data->mv_data = NODEDATA(leaf); + return MDB_SUCCESS; + } + + /* Read overflow data. + */ + data->mv_size = NODEDSZ(leaf); + memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); + if ((rc = mdb_page_get(txn, pgno, &omp, NULL)) != 0) { + DPRINTF(("read overflow page %"Z"u failed", pgno)); + return rc; + } + data->mv_data = METADATA(omp); + + return MDB_SUCCESS; +} + +int +mdb_get(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data) +{ + MDB_cursor mc; + MDB_xcursor mx; + int exact = 0; + DKBUF; + + DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key))); + + if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + mdb_cursor_init(&mc, txn, dbi, &mx); + return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); +} + +/** Find a sibling for a page. + * Replaces the page at the top of the cursor's stack with the + * specified sibling, if one exists. + * @param[in] mc The cursor for this operation. + * @param[in] move_right Non-zero if the right sibling is requested, + * otherwise the left sibling. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_cursor_sibling(MDB_cursor *mc, int move_right) +{ + int rc; + MDB_node *indx; + MDB_page *mp; +#ifdef VL32 + MDB_page *op; +#endif + + if (mc->mc_snum < 2) { + return MDB_NOTFOUND; /* root has no siblings */ + } + +#ifdef VL32 + op = mc->mc_pg[mc->mc_top]; +#endif + mdb_cursor_pop(mc); + DPRINTF(("parent page is page %"Z"u, index %u", + mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top])); + + if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top])) + : (mc->mc_ki[mc->mc_top] == 0)) { + DPRINTF(("no more keys left, moving to %s sibling", + move_right ? "right" : "left")); + if ((rc = mdb_cursor_sibling(mc, move_right)) != MDB_SUCCESS) { + /* undo cursor_pop before returning */ + mc->mc_top++; + mc->mc_snum++; + return rc; + } + } else { + if (move_right) + mc->mc_ki[mc->mc_top]++; + else + mc->mc_ki[mc->mc_top]--; + DPRINTF(("just moving to %s index key %u", + move_right ? "right" : "left", mc->mc_ki[mc->mc_top])); + } + mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top])); + +#ifdef VL32 + { + MDB_ID2L rl = mc->mc_txn->mt_rpages; + unsigned x = mdb_mid2l_search(rl, op->mp_pgno); + if (x <= rl[0].mid && rl[x].mid == op->mp_pgno) { + munmap(op, mc->mc_txn->mt_env->me_psize); + while (x < rl[0].mid) { + rl[x] = rl[x+1]; + x++; + } + rl[0].mid--; + } + } +#endif + + indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) { + /* mc will be inconsistent if caller does mc_snum++ as above */ + mc->mc_flags &= ~(C_INITIALIZED|C_EOF); + return rc; + } + + mdb_cursor_push(mc, mp); + if (!move_right) + mc->mc_ki[mc->mc_top] = NUMKEYS(mp)-1; + + return MDB_SUCCESS; +} + +/** Move the cursor to the next data item. */ +static int +mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) +{ + MDB_page *mp; + MDB_node *leaf; + int rc; + + if (mc->mc_flags & C_EOF) { + return MDB_NOTFOUND; + } + + mdb_cassert(mc, mc->mc_flags & C_INITIALIZED); + + mp = mc->mc_pg[mc->mc_top]; + + if (mc->mc_db->md_flags & MDB_DUPSORT) { + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (op == MDB_NEXT || op == MDB_NEXT_DUP) { + rc = mdb_cursor_next(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_NEXT); + if (op != MDB_NEXT || rc != MDB_NOTFOUND) { + if (rc == MDB_SUCCESS) + MDB_GET_KEY(leaf, key); + return rc; + } + } + } else { + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + if (op == MDB_NEXT_DUP) + return MDB_NOTFOUND; + } + } + + DPRINTF(("cursor_next: top page is %"Z"u in cursor %p", + mdb_dbg_pgno(mp), (void *) mc)); + if (mc->mc_flags & C_DEL) + goto skip; + + if (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mp)) { + DPUTS("=====> move to next sibling page"); + if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS) { + mc->mc_flags |= C_EOF; + return rc; + } + mp = mc->mc_pg[mc->mc_top]; + DPRINTF(("next page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); + } else + mc->mc_ki[mc->mc_top]++; + +skip: + DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", + mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); + + if (IS_LEAF2(mp)) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + return MDB_SUCCESS; + } + + mdb_cassert(mc, IS_LEAF(mp)); + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + } + if (data) { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } + } + + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +/** Move the cursor to the previous data item. */ +static int +mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) +{ + MDB_page *mp; + MDB_node *leaf; + int rc; + + mdb_cassert(mc, mc->mc_flags & C_INITIALIZED); + + mp = mc->mc_pg[mc->mc_top]; + + if (mc->mc_db->md_flags & MDB_DUPSORT) { + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (op == MDB_PREV || op == MDB_PREV_DUP) { + rc = mdb_cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_PREV); + if (op != MDB_PREV || rc != MDB_NOTFOUND) { + if (rc == MDB_SUCCESS) { + MDB_GET_KEY(leaf, key); + mc->mc_flags &= ~C_EOF; + } + return rc; + } + } else { + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + if (op == MDB_PREV_DUP) + return MDB_NOTFOUND; + } + } + } + + DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p", + mdb_dbg_pgno(mp), (void *) mc)); + + if (mc->mc_ki[mc->mc_top] == 0) { + DPUTS("=====> move to prev sibling page"); + if ((rc = mdb_cursor_sibling(mc, 0)) != MDB_SUCCESS) { + return rc; + } + mp = mc->mc_pg[mc->mc_top]; + mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1; + DPRINTF(("prev page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); + } else + mc->mc_ki[mc->mc_top]--; + + mc->mc_flags &= ~C_EOF; + + DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", + mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); + + if (IS_LEAF2(mp)) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + return MDB_SUCCESS; + } + + mdb_cassert(mc, IS_LEAF(mp)); + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + } + if (data) { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } + } + + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +/** Set the cursor on a specific data item. */ +static int +mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, + MDB_cursor_op op, int *exactp) +{ + int rc; + MDB_page *mp; + MDB_node *leaf = NULL; + DKBUF; + + if (key->mv_size == 0) + return MDB_BAD_VALSIZE; + + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + + /* See if we're already on the right page */ + if (mc->mc_flags & C_INITIALIZED) { + MDB_val nodekey; + + mp = mc->mc_pg[mc->mc_top]; + if (!NUMKEYS(mp)) { + mc->mc_ki[mc->mc_top] = 0; + return MDB_NOTFOUND; + } + if (mp->mp_flags & P_LEAF2) { + nodekey.mv_size = mc->mc_db->md_pad; + nodekey.mv_data = LEAF2KEY(mp, 0, nodekey.mv_size); + } else { + leaf = NODEPTR(mp, 0); + MDB_GET_KEY2(leaf, nodekey); + } + rc = mc->mc_dbx->md_cmp(key, &nodekey); + if (rc == 0) { + /* Probably happens rarely, but first node on the page + * was the one we wanted. + */ + mc->mc_ki[mc->mc_top] = 0; + if (exactp) + *exactp = 1; + goto set1; + } + if (rc > 0) { + unsigned int i; + unsigned int nkeys = NUMKEYS(mp); + if (nkeys > 1) { + if (mp->mp_flags & P_LEAF2) { + nodekey.mv_data = LEAF2KEY(mp, + nkeys-1, nodekey.mv_size); + } else { + leaf = NODEPTR(mp, nkeys-1); + MDB_GET_KEY2(leaf, nodekey); + } + rc = mc->mc_dbx->md_cmp(key, &nodekey); + if (rc == 0) { + /* last node was the one we wanted */ + mc->mc_ki[mc->mc_top] = nkeys-1; + if (exactp) + *exactp = 1; + goto set1; + } + if (rc < 0) { + if (mc->mc_ki[mc->mc_top] < NUMKEYS(mp)) { + /* This is definitely the right page, skip search_page */ + if (mp->mp_flags & P_LEAF2) { + nodekey.mv_data = LEAF2KEY(mp, + mc->mc_ki[mc->mc_top], nodekey.mv_size); + } else { + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + MDB_GET_KEY2(leaf, nodekey); + } + rc = mc->mc_dbx->md_cmp(key, &nodekey); + if (rc == 0) { + /* current node was the one we wanted */ + if (exactp) + *exactp = 1; + goto set1; + } + } + rc = 0; + goto set2; + } + } + /* If any parents have right-sibs, search. + * Otherwise, there's nothing further. + */ + for (i=0; i<mc->mc_top; i++) + if (mc->mc_ki[i] < + NUMKEYS(mc->mc_pg[i])-1) + break; + if (i == mc->mc_top) { + /* There are no other pages */ + mc->mc_ki[mc->mc_top] = nkeys; + return MDB_NOTFOUND; + } + } + if (!mc->mc_top) { + /* There are no other pages */ + mc->mc_ki[mc->mc_top] = 0; + if (op == MDB_SET_RANGE && !exactp) { + rc = 0; + goto set1; + } else + return MDB_NOTFOUND; + } + } + + rc = mdb_page_search(mc, key, 0); + if (rc != MDB_SUCCESS) + return rc; + + mp = mc->mc_pg[mc->mc_top]; + mdb_cassert(mc, IS_LEAF(mp)); + +set2: + leaf = mdb_node_search(mc, key, exactp); + if (exactp != NULL && !*exactp) { + /* MDB_SET specified and not an exact match. */ + return MDB_NOTFOUND; + } + + if (leaf == NULL) { + DPUTS("===> inexact leaf not found, goto sibling"); + if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS) + return rc; /* no entries matched */ + mp = mc->mc_pg[mc->mc_top]; + mdb_cassert(mc, IS_LEAF(mp)); + leaf = NODEPTR(mp, 0); + } + +set1: + mc->mc_flags |= C_INITIALIZED; + mc->mc_flags &= ~C_EOF; + + if (IS_LEAF2(mp)) { + if (op == MDB_SET_RANGE || op == MDB_SET_KEY) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + } + return MDB_SUCCESS; + } + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + } + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + } else { + int ex2, *ex2p; + if (op == MDB_GET_BOTH) { + ex2p = &ex2; + ex2 = 0; + } else { + ex2p = NULL; + } + rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); + if (rc != MDB_SUCCESS) + return rc; + } + } else if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) { + MDB_val d2; + if ((rc = mdb_node_read(mc->mc_txn, leaf, &d2)) != MDB_SUCCESS) + return rc; + rc = mc->mc_dbx->md_dcmp(data, &d2); + if (rc) { + if (op == MDB_GET_BOTH || rc > 0) + return MDB_NOTFOUND; + rc = 0; + *data = d2; + } + + } else { + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + } + } + + /* The key already matches in all other cases */ + if (op == MDB_SET_RANGE || op == MDB_SET_KEY) + MDB_GET_KEY(leaf, key); + DPRINTF(("==> cursor placed on key [%s]", DKEY(key))); + + return rc; +} + +/** Move the cursor to the first item in the database. */ +static int +mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) +{ + int rc; + MDB_node *leaf; + + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + + if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { + rc = mdb_page_search(mc, NULL, MDB_PS_FIRST); + if (rc != MDB_SUCCESS) + return rc; + } + mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); + + leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0); + mc->mc_flags |= C_INITIALIZED; + mc->mc_flags &= ~C_EOF; + + mc->mc_ki[mc->mc_top] = 0; + + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size); + return MDB_SUCCESS; + } + + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } else { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + } + } + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +/** Move the cursor to the last item in the database. */ +static int +mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) +{ + int rc; + MDB_node *leaf; + + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + + if (!(mc->mc_flags & C_EOF)) { + + if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { + rc = mdb_page_search(mc, NULL, MDB_PS_LAST); + if (rc != MDB_SUCCESS) + return rc; + } + mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); + + } + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; + mc->mc_flags |= C_INITIALIZED|C_EOF; + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size); + return MDB_SUCCESS; + } + + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } else { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + } + } + + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +int +mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, + MDB_cursor_op op) +{ + int rc; + int exact = 0; + int (*mfunc)(MDB_cursor *mc, MDB_val *key, MDB_val *data); + + if (mc == NULL) + return EINVAL; + + if (mc->mc_txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + switch (op) { + case MDB_GET_CURRENT: + if (!(mc->mc_flags & C_INITIALIZED)) { + rc = EINVAL; + } else { + MDB_page *mp = mc->mc_pg[mc->mc_top]; + int nkeys = NUMKEYS(mp); + if (!nkeys || mc->mc_ki[mc->mc_top] >= nkeys) { + mc->mc_ki[mc->mc_top] = nkeys; + rc = MDB_NOTFOUND; + break; + } + rc = MDB_SUCCESS; + if (IS_LEAF2(mp)) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + } else { + MDB_node *leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + MDB_GET_KEY(leaf, key); + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (mc->mc_flags & C_DEL) + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_GET_CURRENT); + } else { + rc = mdb_node_read(mc->mc_txn, leaf, data); + } + } + } + } + break; + case MDB_GET_BOTH: + case MDB_GET_BOTH_RANGE: + if (data == NULL) { + rc = EINVAL; + break; + } + if (mc->mc_xcursor == NULL) { + rc = MDB_INCOMPATIBLE; + break; + } + /* FALLTHRU */ + case MDB_SET: + case MDB_SET_KEY: + case MDB_SET_RANGE: + if (key == NULL) { + rc = EINVAL; + } else { + rc = mdb_cursor_set(mc, key, data, op, + op == MDB_SET_RANGE ? NULL : &exact); + } + break; + case MDB_GET_MULTIPLE: + if (data == NULL || !(mc->mc_flags & C_INITIALIZED)) { + rc = EINVAL; + break; + } + if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) { + rc = MDB_INCOMPATIBLE; + break; + } + rc = MDB_SUCCESS; + if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) || + (mc->mc_xcursor->mx_cursor.mc_flags & C_EOF)) + break; + goto fetchm; + case MDB_NEXT_MULTIPLE: + if (data == NULL) { + rc = EINVAL; + break; + } + if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) { + rc = MDB_INCOMPATIBLE; + break; + } + if (!(mc->mc_flags & C_INITIALIZED)) + rc = mdb_cursor_first(mc, key, data); + else + rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP); + if (rc == MDB_SUCCESS) { + if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + MDB_cursor *mx; +fetchm: + mx = &mc->mc_xcursor->mx_cursor; + data->mv_size = NUMKEYS(mx->mc_pg[mx->mc_top]) * + mx->mc_db->md_pad; + data->mv_data = METADATA(mx->mc_pg[mx->mc_top]); + mx->mc_ki[mx->mc_top] = NUMKEYS(mx->mc_pg[mx->mc_top])-1; + } else { + rc = MDB_NOTFOUND; + } + } + break; + case MDB_NEXT: + case MDB_NEXT_DUP: + case MDB_NEXT_NODUP: + if (!(mc->mc_flags & C_INITIALIZED)) + rc = mdb_cursor_first(mc, key, data); + else + rc = mdb_cursor_next(mc, key, data, op); + break; + case MDB_PREV: + case MDB_PREV_DUP: + case MDB_PREV_NODUP: + if (!(mc->mc_flags & C_INITIALIZED)) { + rc = mdb_cursor_last(mc, key, data); + if (rc) + break; + mc->mc_flags |= C_INITIALIZED; + mc->mc_ki[mc->mc_top]++; + } + rc = mdb_cursor_prev(mc, key, data, op); + break; + case MDB_FIRST: + rc = mdb_cursor_first(mc, key, data); + break; + case MDB_FIRST_DUP: + mfunc = mdb_cursor_first; + mmove: + if (data == NULL || !(mc->mc_flags & C_INITIALIZED)) { + rc = EINVAL; + break; + } + if (mc->mc_xcursor == NULL) { + rc = MDB_INCOMPATIBLE; + break; + } + { + MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { + MDB_GET_KEY(leaf, key); + rc = mdb_node_read(mc->mc_txn, leaf, data); + break; + } + } + if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { + rc = EINVAL; + break; + } + rc = mfunc(&mc->mc_xcursor->mx_cursor, data, NULL); + break; + case MDB_LAST: + rc = mdb_cursor_last(mc, key, data); + break; + case MDB_LAST_DUP: + mfunc = mdb_cursor_last; + goto mmove; + default: + DPRINTF(("unhandled/unimplemented cursor operation %u", op)); + rc = EINVAL; + break; + } + + if (mc->mc_flags & C_DEL) + mc->mc_flags ^= C_DEL; + + return rc; +} + +/** Touch all the pages in the cursor stack. Set mc_top. + * Makes sure all the pages are writable, before attempting a write operation. + * @param[in] mc The cursor to operate on. + */ +static int +mdb_cursor_touch(MDB_cursor *mc) +{ + int rc = MDB_SUCCESS; + + if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) { + MDB_cursor mc2; + MDB_xcursor mcx; + if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi)) + return MDB_BAD_DBI; + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, &mcx); + rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, MDB_PS_MODIFY); + if (rc) + return rc; + *mc->mc_dbflag |= DB_DIRTY; + } + mc->mc_top = 0; + if (mc->mc_snum) { + do { + rc = mdb_page_touch(mc); + } while (!rc && ++(mc->mc_top) < mc->mc_snum); + mc->mc_top = mc->mc_snum-1; + } + return rc; +} + +/** Do not spill pages to disk if txn is getting full, may fail instead */ +#define MDB_NOSPILL 0x8000 + +int +mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, + unsigned int flags) +{ + enum { MDB_NO_ROOT = MDB_LAST_ERRCODE+10 }; /* internal code */ + MDB_env *env; + MDB_node *leaf = NULL; + MDB_page *fp, *mp; + uint16_t fp_flags; + MDB_val xdata, *rdata, dkey, olddata; + MDB_db dummy; + int do_sub = 0, insert_key, insert_data; + unsigned int mcount = 0, dcount = 0, nospill; + size_t nsize; + int rc, rc2; + unsigned int nflags; + DKBUF; + + if (mc == NULL || key == NULL) + return EINVAL; + + env = mc->mc_txn->mt_env; + + /* Check this first so counter will always be zero on any + * early failures. + */ + if (flags & MDB_MULTIPLE) { + dcount = data[1].mv_size; + data[1].mv_size = 0; + if (!F_ISSET(mc->mc_db->md_flags, MDB_DUPFIXED)) + return MDB_INCOMPATIBLE; + } + + nospill = flags & MDB_NOSPILL; + flags &= ~MDB_NOSPILL; + + if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) + return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + + if (key->mv_size-1 >= ENV_MAXKEY(env)) + return MDB_BAD_VALSIZE; + +#if SIZE_MAX > MAXDATASIZE + if (data->mv_size > ((mc->mc_db->md_flags & MDB_DUPSORT) ? ENV_MAXKEY(env) : MAXDATASIZE)) + return MDB_BAD_VALSIZE; +#else + if ((mc->mc_db->md_flags & MDB_DUPSORT) && data->mv_size > ENV_MAXKEY(env)) + return MDB_BAD_VALSIZE; +#endif + + DPRINTF(("==> put db %d key [%s], size %"Z"u, data size %"Z"u", + DDBI(mc), DKEY(key), key ? key->mv_size : 0, data->mv_size)); + + dkey.mv_size = 0; + + if (flags == MDB_CURRENT) { + if (!(mc->mc_flags & C_INITIALIZED)) + return EINVAL; + rc = MDB_SUCCESS; + } else if (mc->mc_db->md_root == P_INVALID) { + /* new database, cursor has nothing to point to */ + mc->mc_snum = 0; + mc->mc_top = 0; + mc->mc_flags &= ~C_INITIALIZED; + rc = MDB_NO_ROOT; + } else { + int exact = 0; + MDB_val d2; + if (flags & MDB_APPEND) { + MDB_val k2; + rc = mdb_cursor_last(mc, &k2, &d2); + if (rc == 0) { + rc = mc->mc_dbx->md_cmp(key, &k2); + if (rc > 0) { + rc = MDB_NOTFOUND; + mc->mc_ki[mc->mc_top]++; + } else { + /* new key is <= last key */ + rc = MDB_KEYEXIST; + } + } + } else { + rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact); + } + if ((flags & MDB_NOOVERWRITE) && rc == 0) { + DPRINTF(("duplicate key [%s]", DKEY(key))); + *data = d2; + return MDB_KEYEXIST; + } + if (rc && rc != MDB_NOTFOUND) + return rc; + } + + if (mc->mc_flags & C_DEL) + mc->mc_flags ^= C_DEL; + + /* Cursor is positioned, check for room in the dirty list */ + if (!nospill) { + if (flags & MDB_MULTIPLE) { + rdata = &xdata; + xdata.mv_size = data->mv_size * dcount; + } else { + rdata = data; + } + if ((rc2 = mdb_page_spill(mc, key, rdata))) + return rc2; + } + + if (rc == MDB_NO_ROOT) { + MDB_page *np; + /* new database, write a root leaf page */ + DPUTS("allocating new root leaf page"); + if ((rc2 = mdb_page_new(mc, P_LEAF, 1, &np))) { + return rc2; + } + mdb_cursor_push(mc, np); + mc->mc_db->md_root = np->mp_pgno; + mc->mc_db->md_depth++; + *mc->mc_dbflag |= DB_DIRTY; + if ((mc->mc_db->md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) + == MDB_DUPFIXED) + np->mp_flags |= P_LEAF2; + mc->mc_flags |= C_INITIALIZED; + } else { + /* make sure all cursor pages are writable */ + rc2 = mdb_cursor_touch(mc); + if (rc2) + return rc2; + } + + insert_key = insert_data = rc; + if (insert_key) { + /* The key does not exist */ + DPRINTF(("inserting key at index %i", mc->mc_ki[mc->mc_top])); + if ((mc->mc_db->md_flags & MDB_DUPSORT) && + LEAFSIZE(key, data) > env->me_nodemax) + { + /* Too big for a node, insert in sub-DB. Set up an empty + * "old sub-page" for prep_subDB to expand to a full page. + */ + fp_flags = P_LEAF|P_DIRTY; + fp = env->me_pbuf; + fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */ + fp->mp_lower = fp->mp_upper = (PAGEHDRSZ-PAGEBASE); + olddata.mv_size = PAGEHDRSZ; + goto prep_subDB; + } + } else { + /* there's only a key anyway, so this is a no-op */ + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { + char *ptr; + unsigned int ksize = mc->mc_db->md_pad; + if (key->mv_size != ksize) + return MDB_BAD_VALSIZE; + ptr = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize); + memcpy(ptr, key->mv_data, ksize); +fix_parent: + /* if overwriting slot 0 of leaf, need to + * update branch key if there is a parent page + */ + if (mc->mc_top && !mc->mc_ki[mc->mc_top]) { + unsigned short top = mc->mc_top; + mc->mc_top--; + /* slot 0 is always an empty key, find real slot */ + while (mc->mc_top && !mc->mc_ki[mc->mc_top]) + mc->mc_top--; + if (mc->mc_ki[mc->mc_top]) + rc2 = mdb_update_key(mc, key); + else + rc2 = MDB_SUCCESS; + mc->mc_top = top; + if (rc2) + return rc2; + } + return MDB_SUCCESS; + } + +more: + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + olddata.mv_size = NODEDSZ(leaf); + olddata.mv_data = NODEDATA(leaf); + + /* DB has dups? */ + if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT)) { + /* Prepare (sub-)page/sub-DB to accept the new item, + * if needed. fp: old sub-page or a header faking + * it. mp: new (sub-)page. offset: growth in page + * size. xdata: node data with new page or DB. + */ + unsigned i, offset = 0; + mp = fp = xdata.mv_data = env->me_pbuf; + mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno; + + /* Was a single item before, must convert now */ + if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { + /* Just overwrite the current item */ + if (flags == MDB_CURRENT) + goto current; + +#if UINT_MAX < SIZE_MAX + if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t)) + mc->mc_dbx->md_dcmp = mdb_cmp_clong; +#endif + /* does data match? */ + if (!mc->mc_dbx->md_dcmp(data, &olddata)) { + if (flags & MDB_NODUPDATA) + return MDB_KEYEXIST; + /* overwrite it */ + goto current; + } + + /* Back up original data item */ + dkey.mv_size = olddata.mv_size; + dkey.mv_data = memcpy(fp+1, olddata.mv_data, olddata.mv_size); + + /* Make sub-page header for the dup items, with dummy body */ + fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP; + fp->mp_lower = (PAGEHDRSZ-PAGEBASE); + xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size; + if (mc->mc_db->md_flags & MDB_DUPFIXED) { + fp->mp_flags |= P_LEAF2; + fp->mp_pad = data->mv_size; + xdata.mv_size += 2 * data->mv_size; /* leave space for 2 more */ + } else { + xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) + + (dkey.mv_size & 1) + (data->mv_size & 1); + } + fp->mp_upper = xdata.mv_size - PAGEBASE; + olddata.mv_size = xdata.mv_size; /* pretend olddata is fp */ + } else if (leaf->mn_flags & F_SUBDATA) { + /* Data is on sub-DB, just store it */ + flags |= F_DUPDATA|F_SUBDATA; + goto put_sub; + } else { + /* Data is on sub-page */ + fp = olddata.mv_data; + switch (flags) { + default: + if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) { + offset = EVEN(NODESIZE + sizeof(indx_t) + + data->mv_size); + break; + } + offset = fp->mp_pad; + if (SIZELEFT(fp) < offset) { + offset *= 4; /* space for 4 more */ + break; + } + /* FALLTHRU: Big enough MDB_DUPFIXED sub-page */ + case MDB_CURRENT: + fp->mp_flags |= P_DIRTY; + COPY_PGNO(fp->mp_pgno, mp->mp_pgno); + mc->mc_xcursor->mx_cursor.mc_pg[0] = fp; + flags |= F_DUPDATA; + goto put_sub; + } + xdata.mv_size = olddata.mv_size + offset; + } + + fp_flags = fp->mp_flags; + if (NODESIZE + NODEKSZ(leaf) + xdata.mv_size > env->me_nodemax) { + /* Too big for a sub-page, convert to sub-DB */ + fp_flags &= ~P_SUBP; +prep_subDB: + if (mc->mc_db->md_flags & MDB_DUPFIXED) { + fp_flags |= P_LEAF2; + dummy.md_pad = fp->mp_pad; + dummy.md_flags = MDB_DUPFIXED; + if (mc->mc_db->md_flags & MDB_INTEGERDUP) + dummy.md_flags |= MDB_INTEGERKEY; + } else { + dummy.md_pad = 0; + dummy.md_flags = 0; + } + dummy.md_depth = 1; + dummy.md_branch_pages = 0; + dummy.md_leaf_pages = 1; + dummy.md_overflow_pages = 0; + dummy.md_entries = NUMKEYS(fp); + xdata.mv_size = sizeof(MDB_db); + xdata.mv_data = &dummy; + if ((rc = mdb_page_alloc(mc, 1, &mp))) + return rc; + offset = env->me_psize - olddata.mv_size; + flags |= F_DUPDATA|F_SUBDATA; + dummy.md_root = mp->mp_pgno; + } + if (mp != fp) { + mp->mp_flags = fp_flags | P_DIRTY; + mp->mp_pad = fp->mp_pad; + mp->mp_lower = fp->mp_lower; + mp->mp_upper = fp->mp_upper + offset; + if (fp_flags & P_LEAF2) { + memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad); + } else { + memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE, + olddata.mv_size - fp->mp_upper - PAGEBASE); + for (i=0; i<NUMKEYS(fp); i++) + mp->mp_ptrs[i] = fp->mp_ptrs[i] + offset; + } + } + + rdata = &xdata; + flags |= F_DUPDATA; + do_sub = 1; + if (!insert_key) + mdb_node_del(mc, 0); + goto new_sub; + } +current: + /* overflow page overwrites need special handling */ + if (F_ISSET(leaf->mn_flags, F_BIGDATA)) { + MDB_page *omp; + pgno_t pg; + int level, ovpages, dpages = OVPAGES(data->mv_size, env->me_psize); + + memcpy(&pg, olddata.mv_data, sizeof(pg)); + if ((rc2 = mdb_page_get(mc->mc_txn, pg, &omp, &level)) != 0) + return rc2; + ovpages = omp->mp_pages; + + /* Is the ov page large enough? */ + if (ovpages >= dpages) { + if (!(omp->mp_flags & P_DIRTY) && + (level || (env->me_flags & MDB_WRITEMAP))) + { + rc = mdb_page_unspill(mc->mc_txn, omp, &omp); + if (rc) + return rc; + level = 0; /* dirty in this txn or clean */ + } + /* Is it dirty? */ + if (omp->mp_flags & P_DIRTY) { + /* yes, overwrite it. Note in this case we don't + * bother to try shrinking the page if the new data + * is smaller than the overflow threshold. + */ + if (level > 1) { + /* It is writable only in a parent txn */ + size_t sz = (size_t) env->me_psize * ovpages, off; + MDB_page *np = mdb_page_malloc(mc->mc_txn, ovpages); + MDB_ID2 id2; + if (!np) + return ENOMEM; + id2.mid = pg; + id2.mptr = np; + rc2 = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2); + mdb_cassert(mc, rc2 == 0); + if (!(flags & MDB_RESERVE)) { + /* Copy end of page, adjusting alignment so + * compiler may copy words instead of bytes. + */ + off = (PAGEHDRSZ + data->mv_size) & -sizeof(size_t); + memcpy((size_t *)((char *)np + off), + (size_t *)((char *)omp + off), sz - off); + sz = PAGEHDRSZ; + } + memcpy(np, omp, sz); /* Copy beginning of page */ + omp = np; + } + SETDSZ(leaf, data->mv_size); + if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = METADATA(omp); + else + memcpy(METADATA(omp), data->mv_data, data->mv_size); + return MDB_SUCCESS; + } + } + if ((rc2 = mdb_ovpage_free(mc, omp)) != MDB_SUCCESS) + return rc2; + } else if (data->mv_size == olddata.mv_size) { + /* same size, just replace it. Note that we could + * also reuse this node if the new data is smaller, + * but instead we opt to shrink the node in that case. + */ + if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = olddata.mv_data; + else if (!(mc->mc_flags & C_SUB)) + memcpy(olddata.mv_data, data->mv_data, data->mv_size); + else { + memcpy(NODEKEY(leaf), key->mv_data, key->mv_size); + goto fix_parent; + } + return MDB_SUCCESS; + } + mdb_node_del(mc, 0); + } + + rdata = data; + +new_sub: + nflags = flags & NODE_ADD_FLAGS; + nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->mv_size : mdb_leaf_size(env, key, rdata); + if (SIZELEFT(mc->mc_pg[mc->mc_top]) < nsize) { + if (( flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA ) + nflags &= ~MDB_APPEND; /* sub-page may need room to grow */ + if (!insert_key) + nflags |= MDB_SPLIT_REPLACE; + rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags); + } else { + /* There is room already in this leaf page. */ + rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags); + if (rc == 0 && insert_key) { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + unsigned i = mc->mc_top; + MDB_page *mp = mc->mc_pg[i]; + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == mc || m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[i] == mp && m3->mc_ki[i] >= mc->mc_ki[i]) { + m3->mc_ki[i]++; + } + } + } + } + + if (rc == MDB_SUCCESS) { + /* Now store the actual data in the child DB. Note that we're + * storing the user data in the keys field, so there are strict + * size limits on dupdata. The actual data fields of the child + * DB are all zero size. + */ + if (do_sub) { + int xflags; + size_t ecount; +put_sub: + xdata.mv_size = 0; + xdata.mv_data = ""; + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if (flags & MDB_CURRENT) { + xflags = MDB_CURRENT|MDB_NOSPILL; + } else { + mdb_xcursor_init1(mc, leaf); + xflags = (flags & MDB_NODUPDATA) ? + MDB_NOOVERWRITE|MDB_NOSPILL : MDB_NOSPILL; + } + /* converted, write the original data first */ + if (dkey.mv_size) { + rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags); + if (rc) + goto bad_sub; + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2; + unsigned i = mc->mc_top; + MDB_page *mp = mc->mc_pg[i]; + + for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { + if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; + if (!(m2->mc_flags & C_INITIALIZED)) continue; + if (m2->mc_pg[i] == mp && m2->mc_ki[i] == mc->mc_ki[i]) { + mdb_xcursor_init1(m2, leaf); + } + } + } + /* we've done our job */ + dkey.mv_size = 0; + } + ecount = mc->mc_xcursor->mx_db.md_entries; + if (flags & MDB_APPENDDUP) + xflags |= MDB_APPEND; + rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags); + if (flags & F_SUBDATA) { + void *db = NODEDATA(leaf); + memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db)); + } + insert_data = mc->mc_xcursor->mx_db.md_entries - ecount; + } + /* Increment count unless we just replaced an existing item. */ + if (insert_data) + mc->mc_db->md_entries++; + if (insert_key) { + /* Invalidate txn if we created an empty sub-DB */ + if (rc) + goto bad_sub; + /* If we succeeded and the key didn't exist before, + * make sure the cursor is marked valid. + */ + mc->mc_flags |= C_INITIALIZED; + } + if (flags & MDB_MULTIPLE) { + if (!rc) { + mcount++; + /* let caller know how many succeeded, if any */ + data[1].mv_size = mcount; + if (mcount < dcount) { + data[0].mv_data = (char *)data[0].mv_data + data[0].mv_size; + insert_key = insert_data = 0; + goto more; + } + } + } + return rc; +bad_sub: + if (rc == MDB_KEYEXIST) /* should not happen, we deleted that item */ + rc = MDB_CORRUPTED; + } + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +int +mdb_cursor_del(MDB_cursor *mc, unsigned int flags) +{ + MDB_node *leaf; + MDB_page *mp; + int rc; + + if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) + return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + + if (!(mc->mc_flags & C_INITIALIZED)) + return EINVAL; + + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) + return MDB_NOTFOUND; + + if (!(flags & MDB_NOSPILL) && (rc = mdb_page_spill(mc, NULL, NULL))) + return rc; + + rc = mdb_cursor_touch(mc); + if (rc) + return rc; + + mp = mc->mc_pg[mc->mc_top]; + if (IS_LEAF2(mp)) + goto del_key; + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (flags & MDB_NODUPDATA) { + /* mdb_cursor_del0() will subtract the final entry */ + mc->mc_db->md_entries -= mc->mc_xcursor->mx_db.md_entries - 1; + } else { + if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) { + mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + } + rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL); + if (rc) + return rc; + /* If sub-DB still has entries, we're done */ + if (mc->mc_xcursor->mx_db.md_entries) { + if (leaf->mn_flags & F_SUBDATA) { + /* update subDB info */ + void *db = NODEDATA(leaf); + memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db)); + } else { + MDB_cursor *m2; + /* shrink fake page */ + mdb_node_shrink(mp, mc->mc_ki[mc->mc_top]); + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + /* fix other sub-DB cursors pointed at this fake page */ + for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { + if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; + if (m2->mc_pg[mc->mc_top] == mp && + m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) + m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + } + } + mc->mc_db->md_entries--; + mc->mc_flags |= C_DEL; + return rc; + } + /* otherwise fall thru and delete the sub-DB */ + } + + if (leaf->mn_flags & F_SUBDATA) { + /* add all the child DB's pages to the free list */ + rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); + if (rc) + goto fail; + } + } + + /* add overflow pages to free list */ + if (F_ISSET(leaf->mn_flags, F_BIGDATA)) { + MDB_page *omp; + pgno_t pg; + + memcpy(&pg, NODEDATA(leaf), sizeof(pg)); + if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) || + (rc = mdb_ovpage_free(mc, omp))) + goto fail; + } + +del_key: + return mdb_cursor_del0(mc); + +fail: + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +/** Allocate and initialize new pages for a database. + * @param[in] mc a cursor on the database being added to. + * @param[in] flags flags defining what type of page is being allocated. + * @param[in] num the number of pages to allocate. This is usually 1, + * unless allocating overflow pages for a large record. + * @param[out] mp Address of a page, or NULL on failure. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp) +{ + MDB_page *np; + int rc; + + if ((rc = mdb_page_alloc(mc, num, &np))) + return rc; + DPRINTF(("allocated new mpage %"Z"u, page size %u", + np->mp_pgno, mc->mc_txn->mt_env->me_psize)); + np->mp_flags = flags | P_DIRTY; + np->mp_lower = (PAGEHDRSZ-PAGEBASE); + np->mp_upper = mc->mc_txn->mt_env->me_psize - PAGEBASE; + + if (IS_BRANCH(np)) + mc->mc_db->md_branch_pages++; + else if (IS_LEAF(np)) + mc->mc_db->md_leaf_pages++; + else if (IS_OVERFLOW(np)) { + mc->mc_db->md_overflow_pages += num; + np->mp_pages = num; + } + *mp = np; + + return 0; +} + +/** Calculate the size of a leaf node. + * The size depends on the environment's page size; if a data item + * is too large it will be put onto an overflow page and the node + * size will only include the key and not the data. Sizes are always + * rounded up to an even number of bytes, to guarantee 2-byte alignment + * of the #MDB_node headers. + * @param[in] env The environment handle. + * @param[in] key The key for the node. + * @param[in] data The data for the node. + * @return The number of bytes needed to store the node. + */ +static size_t +mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data) +{ + size_t sz; + + sz = LEAFSIZE(key, data); + if (sz > env->me_nodemax) { + /* put on overflow page */ + sz -= data->mv_size - sizeof(pgno_t); + } + + return EVEN(sz + sizeof(indx_t)); +} + +/** Calculate the size of a branch node. + * The size should depend on the environment's page size but since + * we currently don't support spilling large keys onto overflow + * pages, it's simply the size of the #MDB_node header plus the + * size of the key. Sizes are always rounded up to an even number + * of bytes, to guarantee 2-byte alignment of the #MDB_node headers. + * @param[in] env The environment handle. + * @param[in] key The key for the node. + * @return The number of bytes needed to store the node. + */ +static size_t +mdb_branch_size(MDB_env *env, MDB_val *key) +{ + size_t sz; + + sz = INDXSIZE(key); + if (sz > env->me_nodemax) { + /* put on overflow page */ + /* not implemented */ + /* sz -= key->size - sizeof(pgno_t); */ + } + + return sz + sizeof(indx_t); +} + +/** Add a node to the page pointed to by the cursor. + * @param[in] mc The cursor for this operation. + * @param[in] indx The index on the page where the new node should be added. + * @param[in] key The key for the new node. + * @param[in] data The data for the new node, if any. + * @param[in] pgno The page number, if adding a branch node. + * @param[in] flags Flags for the node. + * @return 0 on success, non-zero on failure. Possible errors are: + * <ul> + * <li>ENOMEM - failed to allocate overflow pages for the node. + * <li>MDB_PAGE_FULL - there is insufficient room in the page. This error + * should never happen since all callers already calculate the + * page's free space before calling this function. + * </ul> + */ +static int +mdb_node_add(MDB_cursor *mc, indx_t indx, + MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags) +{ + unsigned int i; + size_t node_size = NODESIZE; + ssize_t room; + indx_t ofs; + MDB_node *node; + MDB_page *mp = mc->mc_pg[mc->mc_top]; + MDB_page *ofp = NULL; /* overflow page */ + DKBUF; + + mdb_cassert(mc, mp->mp_upper >= mp->mp_lower); + + DPRINTF(("add to %s %spage %"Z"u index %i, data size %"Z"u key size %"Z"u [%s]", + IS_LEAF(mp) ? "leaf" : "branch", + IS_SUBP(mp) ? "sub-" : "", + mdb_dbg_pgno(mp), indx, data ? data->mv_size : 0, + key ? key->mv_size : 0, key ? DKEY(key) : "null")); + + if (IS_LEAF2(mp)) { + /* Move higher keys up one slot. */ + int ksize = mc->mc_db->md_pad, dif; + char *ptr = LEAF2KEY(mp, indx, ksize); + dif = NUMKEYS(mp) - indx; + if (dif > 0) + memmove(ptr+ksize, ptr, dif*ksize); + /* insert new key */ + memcpy(ptr, key->mv_data, ksize); + + /* Just using these for counting */ + mp->mp_lower += sizeof(indx_t); + mp->mp_upper -= ksize - sizeof(indx_t); + return MDB_SUCCESS; + } + + room = (ssize_t)SIZELEFT(mp) - (ssize_t)sizeof(indx_t); + if (key != NULL) + node_size += key->mv_size; + if (IS_LEAF(mp)) { + mdb_cassert(mc, data); + if (F_ISSET(flags, F_BIGDATA)) { + /* Data already on overflow page. */ + node_size += sizeof(pgno_t); + } else if (node_size + data->mv_size > mc->mc_txn->mt_env->me_nodemax) { + int ovpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize); + int rc; + /* Put data on overflow page. */ + DPRINTF(("data size is %"Z"u, node would be %"Z"u, put data on overflow page", + data->mv_size, node_size+data->mv_size)); + node_size = EVEN(node_size + sizeof(pgno_t)); + if ((ssize_t)node_size > room) + goto full; + if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp))) + return rc; + DPRINTF(("allocated overflow page %"Z"u", ofp->mp_pgno)); + flags |= F_BIGDATA; + goto update; + } else { + node_size += data->mv_size; + } + } + node_size = EVEN(node_size); + if ((ssize_t)node_size > room) + goto full; + +update: + /* Move higher pointers up one slot. */ + for (i = NUMKEYS(mp); i > indx; i--) + mp->mp_ptrs[i] = mp->mp_ptrs[i - 1]; + + /* Adjust free space offsets. */ + ofs = mp->mp_upper - node_size; + mdb_cassert(mc, ofs >= mp->mp_lower + sizeof(indx_t)); + mp->mp_ptrs[indx] = ofs; + mp->mp_upper = ofs; + mp->mp_lower += sizeof(indx_t); + + /* Write the node data. */ + node = NODEPTR(mp, indx); + node->mn_ksize = (key == NULL) ? 0 : key->mv_size; + node->mn_flags = flags; + if (IS_LEAF(mp)) + SETDSZ(node,data->mv_size); + else + SETPGNO(node,pgno); + + if (key) + memcpy(NODEKEY(node), key->mv_data, key->mv_size); + + if (IS_LEAF(mp)) { + mdb_cassert(mc, key); + if (ofp == NULL) { + if (F_ISSET(flags, F_BIGDATA)) + memcpy(node->mn_data + key->mv_size, data->mv_data, + sizeof(pgno_t)); + else if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = node->mn_data + key->mv_size; + else + memcpy(node->mn_data + key->mv_size, data->mv_data, + data->mv_size); + } else { + memcpy(node->mn_data + key->mv_size, &ofp->mp_pgno, + sizeof(pgno_t)); + if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = METADATA(ofp); + else + memcpy(METADATA(ofp), data->mv_data, data->mv_size); + } + } + + return MDB_SUCCESS; + +full: + DPRINTF(("not enough room in page %"Z"u, got %u ptrs", + mdb_dbg_pgno(mp), NUMKEYS(mp))); + DPRINTF(("upper-lower = %u - %u = %"Z"d", mp->mp_upper,mp->mp_lower,room)); + DPRINTF(("node size = %"Z"u", node_size)); + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return MDB_PAGE_FULL; +} + +/** Delete the specified node from a page. + * @param[in] mc Cursor pointing to the node to delete. + * @param[in] ksize The size of a node. Only used if the page is + * part of a #MDB_DUPFIXED database. + */ +static void +mdb_node_del(MDB_cursor *mc, int ksize) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top]; + indx_t indx = mc->mc_ki[mc->mc_top]; + unsigned int sz; + indx_t i, j, numkeys, ptr; + MDB_node *node; + char *base; + + DPRINTF(("delete node %u on %s page %"Z"u", indx, + IS_LEAF(mp) ? "leaf" : "branch", mdb_dbg_pgno(mp))); + numkeys = NUMKEYS(mp); + mdb_cassert(mc, indx < numkeys); + + if (IS_LEAF2(mp)) { + int x = numkeys - 1 - indx; + base = LEAF2KEY(mp, indx, ksize); + if (x) + memmove(base, base + ksize, x * ksize); + mp->mp_lower -= sizeof(indx_t); + mp->mp_upper += ksize - sizeof(indx_t); + return; + } + + node = NODEPTR(mp, indx); + sz = NODESIZE + node->mn_ksize; + if (IS_LEAF(mp)) { + if (F_ISSET(node->mn_flags, F_BIGDATA)) + sz += sizeof(pgno_t); + else + sz += NODEDSZ(node); + } + sz = EVEN(sz); + + ptr = mp->mp_ptrs[indx]; + for (i = j = 0; i < numkeys; i++) { + if (i != indx) { + mp->mp_ptrs[j] = mp->mp_ptrs[i]; + if (mp->mp_ptrs[i] < ptr) + mp->mp_ptrs[j] += sz; + j++; + } + } + + base = (char *)mp + mp->mp_upper + PAGEBASE; + memmove(base + sz, base, ptr - mp->mp_upper); + + mp->mp_lower -= sizeof(indx_t); + mp->mp_upper += sz; +} + +/** Compact the main page after deleting a node on a subpage. + * @param[in] mp The main page to operate on. + * @param[in] indx The index of the subpage on the main page. + */ +static void +mdb_node_shrink(MDB_page *mp, indx_t indx) +{ + MDB_node *node; + MDB_page *sp, *xp; + char *base; + int nsize, delta; + indx_t i, numkeys, ptr; + + node = NODEPTR(mp, indx); + sp = (MDB_page *)NODEDATA(node); + delta = SIZELEFT(sp); + xp = (MDB_page *)((char *)sp + delta); + + /* shift subpage upward */ + if (IS_LEAF2(sp)) { + nsize = NUMKEYS(sp) * sp->mp_pad; + if (nsize & 1) + return; /* do not make the node uneven-sized */ + memmove(METADATA(xp), METADATA(sp), nsize); + } else { + int i; + numkeys = NUMKEYS(sp); + for (i=numkeys-1; i>=0; i--) + xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta; + } + xp->mp_upper = sp->mp_lower; + xp->mp_lower = sp->mp_lower; + xp->mp_flags = sp->mp_flags; + xp->mp_pad = sp->mp_pad; + COPY_PGNO(xp->mp_pgno, mp->mp_pgno); + + nsize = NODEDSZ(node) - delta; + SETDSZ(node, nsize); + + /* shift lower nodes upward */ + ptr = mp->mp_ptrs[indx]; + numkeys = NUMKEYS(mp); + for (i = 0; i < numkeys; i++) { + if (mp->mp_ptrs[i] <= ptr) + mp->mp_ptrs[i] += delta; + } + + base = (char *)mp + mp->mp_upper + PAGEBASE; + memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node)); + mp->mp_upper += delta; +} + +/** Initial setup of a sorted-dups cursor. + * Sorted duplicates are implemented as a sub-database for the given key. + * The duplicate data items are actually keys of the sub-database. + * Operations on the duplicate data items are performed using a sub-cursor + * initialized when the sub-database is first accessed. This function does + * the preliminary setup of the sub-cursor, filling in the fields that + * depend only on the parent DB. + * @param[in] mc The main cursor whose sorted-dups cursor is to be initialized. + */ +static void +mdb_xcursor_init0(MDB_cursor *mc) +{ + MDB_xcursor *mx = mc->mc_xcursor; + + mx->mx_cursor.mc_xcursor = NULL; + mx->mx_cursor.mc_txn = mc->mc_txn; + mx->mx_cursor.mc_db = &mx->mx_db; + mx->mx_cursor.mc_dbx = &mx->mx_dbx; + mx->mx_cursor.mc_dbi = mc->mc_dbi; + mx->mx_cursor.mc_dbflag = &mx->mx_dbflag; + mx->mx_cursor.mc_snum = 0; + mx->mx_cursor.mc_top = 0; + mx->mx_cursor.mc_flags = C_SUB; + mx->mx_dbx.md_name.mv_size = 0; + mx->mx_dbx.md_name.mv_data = NULL; + mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp; + mx->mx_dbx.md_dcmp = NULL; + mx->mx_dbx.md_rel = mc->mc_dbx->md_rel; +} + +/** Final setup of a sorted-dups cursor. + * Sets up the fields that depend on the data from the main cursor. + * @param[in] mc The main cursor whose sorted-dups cursor is to be initialized. + * @param[in] node The data containing the #MDB_db record for the + * sorted-dup database. + */ +static void +mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) +{ + MDB_xcursor *mx = mc->mc_xcursor; + + if (node->mn_flags & F_SUBDATA) { + memcpy(&mx->mx_db, NODEDATA(node), sizeof(MDB_db)); + mx->mx_cursor.mc_pg[0] = 0; + mx->mx_cursor.mc_snum = 0; + mx->mx_cursor.mc_top = 0; + mx->mx_cursor.mc_flags = C_SUB; + } else { + MDB_page *fp = NODEDATA(node); + mx->mx_db.md_pad = mc->mc_pg[mc->mc_top]->mp_pad; + mx->mx_db.md_flags = 0; + mx->mx_db.md_depth = 1; + mx->mx_db.md_branch_pages = 0; + mx->mx_db.md_leaf_pages = 1; + mx->mx_db.md_overflow_pages = 0; + mx->mx_db.md_entries = NUMKEYS(fp); + COPY_PGNO(mx->mx_db.md_root, fp->mp_pgno); + mx->mx_cursor.mc_snum = 1; + mx->mx_cursor.mc_top = 0; + mx->mx_cursor.mc_flags = C_INITIALIZED|C_SUB; + mx->mx_cursor.mc_pg[0] = fp; + mx->mx_cursor.mc_ki[0] = 0; + if (mc->mc_db->md_flags & MDB_DUPFIXED) { + mx->mx_db.md_flags = MDB_DUPFIXED; + mx->mx_db.md_pad = fp->mp_pad; + if (mc->mc_db->md_flags & MDB_INTEGERDUP) + mx->mx_db.md_flags |= MDB_INTEGERKEY; + } + } + DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi, + mx->mx_db.md_root)); + mx->mx_dbflag = DB_VALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ +#if UINT_MAX < SIZE_MAX + if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t)) + mx->mx_dbx.md_cmp = mdb_cmp_clong; +#endif +} + +/** Initialize a cursor for a given transaction and database. */ +static void +mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) +{ + mc->mc_next = NULL; + mc->mc_backup = NULL; + mc->mc_dbi = dbi; + mc->mc_txn = txn; + mc->mc_db = &txn->mt_dbs[dbi]; + mc->mc_dbx = &txn->mt_dbxs[dbi]; + mc->mc_dbflag = &txn->mt_dbflags[dbi]; + mc->mc_snum = 0; + mc->mc_top = 0; + mc->mc_pg[0] = 0; + mc->mc_flags = 0; + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { + mdb_tassert(txn, mx != NULL); + mc->mc_xcursor = mx; + mdb_xcursor_init0(mc); + } else { + mc->mc_xcursor = NULL; + } + if (*mc->mc_dbflag & DB_STALE) { + mdb_page_search(mc, NULL, MDB_PS_ROOTONLY); + } +} + +int +mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) +{ + MDB_cursor *mc; + size_t size = sizeof(MDB_cursor); + + if (!ret || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + /* Allow read access to the freelist */ + if (!dbi && !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EINVAL; + + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) + size += sizeof(MDB_xcursor); + + if ((mc = malloc(size)) != NULL) { + mdb_cursor_init(mc, txn, dbi, (MDB_xcursor *)(mc + 1)); + if (txn->mt_cursors) { + mc->mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = mc; + mc->mc_flags |= C_UNTRACK; + } + } else { + return ENOMEM; + } + + *ret = mc; + + return MDB_SUCCESS; +} + +int +mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc) +{ + if (!mc || !TXN_DBI_EXIST(txn, mc->mc_dbi)) + return EINVAL; + + if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor); + return MDB_SUCCESS; +} + +/* Return the count of duplicate data items for the current key */ +int +mdb_cursor_count(MDB_cursor *mc, size_t *countp) +{ + MDB_node *leaf; + + if (mc == NULL || countp == NULL) + return EINVAL; + + if (mc->mc_xcursor == NULL) + return MDB_INCOMPATIBLE; + + if (mc->mc_txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + if (!(mc->mc_flags & C_INITIALIZED)) + return EINVAL; + + if (!mc->mc_snum || (mc->mc_flags & C_EOF)) + return MDB_NOTFOUND; + + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { + *countp = 1; + } else { + if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) + return EINVAL; + + *countp = mc->mc_xcursor->mx_db.md_entries; + } + return MDB_SUCCESS; +} + +void +mdb_cursor_close(MDB_cursor *mc) +{ + if (mc && !mc->mc_backup) { + /* remove from txn, if tracked */ + if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) { + MDB_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; + while (*prev && *prev != mc) prev = &(*prev)->mc_next; + if (*prev == mc) + *prev = mc->mc_next; + } + free(mc); + } +} + +MDB_txn * +mdb_cursor_txn(MDB_cursor *mc) +{ + if (!mc) return NULL; + return mc->mc_txn; +} + +MDB_dbi +mdb_cursor_dbi(MDB_cursor *mc) +{ + return mc->mc_dbi; +} + +/** Replace the key for a branch node with a new key. + * @param[in] mc Cursor pointing to the node to operate on. + * @param[in] key The new key to use. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_update_key(MDB_cursor *mc, MDB_val *key) +{ + MDB_page *mp; + MDB_node *node; + char *base; + size_t len; + int delta, ksize, oksize; + indx_t ptr, i, numkeys, indx; + DKBUF; + + indx = mc->mc_ki[mc->mc_top]; + mp = mc->mc_pg[mc->mc_top]; + node = NODEPTR(mp, indx); + ptr = mp->mp_ptrs[indx]; +#if MDB_DEBUG + { + MDB_val k2; + char kbuf2[DKBUF_MAXKEYSIZE*2+1]; + k2.mv_data = NODEKEY(node); + k2.mv_size = node->mn_ksize; + DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Z"u", + indx, ptr, + mdb_dkey(&k2, kbuf2), + DKEY(key), + mp->mp_pgno)); + } +#endif + + /* Sizes must be 2-byte aligned. */ + ksize = EVEN(key->mv_size); + oksize = EVEN(node->mn_ksize); + delta = ksize - oksize; + + /* Shift node contents if EVEN(key length) changed. */ + if (delta) { + if (delta > 0 && SIZELEFT(mp) < delta) { + pgno_t pgno; + /* not enough space left, do a delete and split */ + DPRINTF(("Not enough room, delta = %d, splitting...", delta)); + pgno = NODEPGNO(node); + mdb_node_del(mc, 0); + return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE); + } + + numkeys = NUMKEYS(mp); + for (i = 0; i < numkeys; i++) { + if (mp->mp_ptrs[i] <= ptr) + mp->mp_ptrs[i] -= delta; + } + + base = (char *)mp + mp->mp_upper + PAGEBASE; + len = ptr - mp->mp_upper + NODESIZE; + memmove(base - delta, base, len); + mp->mp_upper -= delta; + + node = NODEPTR(mp, indx); + } + + /* But even if no shift was needed, update ksize */ + if (node->mn_ksize != key->mv_size) + node->mn_ksize = key->mv_size; + + if (key->mv_size) + memcpy(NODEKEY(node), key->mv_data, key->mv_size); + + return MDB_SUCCESS; +} + +static void +mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst); + +/** Move a node from csrc to cdst. + */ +static int +mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst) +{ + MDB_node *srcnode; + MDB_val key, data; + pgno_t srcpg; + MDB_cursor mn; + int rc; + unsigned short flags; + + DKBUF; + + /* Mark src and dst as dirty. */ + if ((rc = mdb_page_touch(csrc)) || + (rc = mdb_page_touch(cdst))) + return rc; + + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_size = csrc->mc_db->md_pad; + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size); + data.mv_size = 0; + data.mv_data = NULL; + srcpg = 0; + flags = 0; + } else { + srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]); + mdb_cassert(csrc, !((size_t)srcnode & 1)); + srcpg = NODEPGNO(srcnode); + flags = srcnode->mn_flags; + if (csrc->mc_ki[csrc->mc_top] == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { + unsigned int snum = csrc->mc_snum; + MDB_node *s2; + /* must find the lowest key below src */ + rc = mdb_page_search_lowest(csrc); + if (rc) + return rc; + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_size = csrc->mc_db->md_pad; + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); + } else { + s2 = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); + key.mv_size = NODEKSZ(s2); + key.mv_data = NODEKEY(s2); + } + csrc->mc_snum = snum--; + csrc->mc_top = snum; + } else { + key.mv_size = NODEKSZ(srcnode); + key.mv_data = NODEKEY(srcnode); + } + data.mv_size = NODEDSZ(srcnode); + data.mv_data = NODEDATA(srcnode); + } + if (IS_BRANCH(cdst->mc_pg[cdst->mc_top]) && cdst->mc_ki[cdst->mc_top] == 0) { + unsigned int snum = cdst->mc_snum; + MDB_node *s2; + MDB_val bkey; + /* must find the lowest key below dst */ + mdb_cursor_copy(cdst, &mn); + rc = mdb_page_search_lowest(&mn); + if (rc) + return rc; + if (IS_LEAF2(mn.mc_pg[mn.mc_top])) { + bkey.mv_size = mn.mc_db->md_pad; + bkey.mv_data = LEAF2KEY(mn.mc_pg[mn.mc_top], 0, bkey.mv_size); + } else { + s2 = NODEPTR(mn.mc_pg[mn.mc_top], 0); + bkey.mv_size = NODEKSZ(s2); + bkey.mv_data = NODEKEY(s2); + } + mn.mc_snum = snum--; + mn.mc_top = snum; + mn.mc_ki[snum] = 0; + rc = mdb_update_key(&mn, &bkey); + if (rc) + return rc; + } + + DPRINTF(("moving %s node %u [%s] on page %"Z"u to node %u on page %"Z"u", + IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch", + csrc->mc_ki[csrc->mc_top], + DKEY(&key), + csrc->mc_pg[csrc->mc_top]->mp_pgno, + cdst->mc_ki[cdst->mc_top], cdst->mc_pg[cdst->mc_top]->mp_pgno)); + + /* Add the node to the destination page. + */ + rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, flags); + if (rc != MDB_SUCCESS) + return rc; + + /* Delete the node from the source page. + */ + mdb_node_del(csrc, key.mv_size); + + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = csrc->mc_dbi; + MDB_page *mp = csrc->mc_pg[csrc->mc_top]; + + for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (csrc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == csrc) continue; + if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] == + csrc->mc_ki[csrc->mc_top]) { + m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top]; + m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; + } + } + } + + /* Update the parent separators. + */ + if (csrc->mc_ki[csrc->mc_top] == 0) { + if (csrc->mc_ki[csrc->mc_top-1] != 0) { + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); + } else { + srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); + key.mv_size = NODEKSZ(srcnode); + key.mv_data = NODEKEY(srcnode); + } + DPRINTF(("update separator for source page %"Z"u to [%s]", + csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key))); + mdb_cursor_copy(csrc, &mn); + mn.mc_snum--; + mn.mc_top--; + if ((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS) + return rc; + } + if (IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { + MDB_val nullkey; + indx_t ix = csrc->mc_ki[csrc->mc_top]; + nullkey.mv_size = 0; + csrc->mc_ki[csrc->mc_top] = 0; + rc = mdb_update_key(csrc, &nullkey); + csrc->mc_ki[csrc->mc_top] = ix; + mdb_cassert(csrc, rc == MDB_SUCCESS); + } + } + + if (cdst->mc_ki[cdst->mc_top] == 0) { + if (cdst->mc_ki[cdst->mc_top-1] != 0) { + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, key.mv_size); + } else { + srcnode = NODEPTR(cdst->mc_pg[cdst->mc_top], 0); + key.mv_size = NODEKSZ(srcnode); + key.mv_data = NODEKEY(srcnode); + } + DPRINTF(("update separator for destination page %"Z"u to [%s]", + cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key))); + mdb_cursor_copy(cdst, &mn); + mn.mc_snum--; + mn.mc_top--; + if ((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS) + return rc; + } + if (IS_BRANCH(cdst->mc_pg[cdst->mc_top])) { + MDB_val nullkey; + indx_t ix = cdst->mc_ki[cdst->mc_top]; + nullkey.mv_size = 0; + cdst->mc_ki[cdst->mc_top] = 0; + rc = mdb_update_key(cdst, &nullkey); + cdst->mc_ki[cdst->mc_top] = ix; + mdb_cassert(csrc, rc == MDB_SUCCESS); + } + } + + return MDB_SUCCESS; +} + +/** Merge one page into another. + * The nodes from the page pointed to by \b csrc will + * be copied to the page pointed to by \b cdst and then + * the \b csrc page will be freed. + * @param[in] csrc Cursor pointing to the source page. + * @param[in] cdst Cursor pointing to the destination page. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) +{ + MDB_page *psrc, *pdst; + MDB_node *srcnode; + MDB_val key, data; + unsigned nkeys; + int rc; + indx_t i, j; + + psrc = csrc->mc_pg[csrc->mc_top]; + pdst = cdst->mc_pg[cdst->mc_top]; + + DPRINTF(("merging page %"Z"u into %"Z"u", psrc->mp_pgno, pdst->mp_pgno)); + + mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */ + mdb_cassert(csrc, cdst->mc_snum > 1); + + /* Mark dst as dirty. */ + if ((rc = mdb_page_touch(cdst))) + return rc; + + /* Move all nodes from src to dst. + */ + j = nkeys = NUMKEYS(pdst); + if (IS_LEAF2(psrc)) { + key.mv_size = csrc->mc_db->md_pad; + key.mv_data = METADATA(psrc); + for (i = 0; i < NUMKEYS(psrc); i++, j++) { + rc = mdb_node_add(cdst, j, &key, NULL, 0, 0); + if (rc != MDB_SUCCESS) + return rc; + key.mv_data = (char *)key.mv_data + key.mv_size; + } + } else { + for (i = 0; i < NUMKEYS(psrc); i++, j++) { + srcnode = NODEPTR(psrc, i); + if (i == 0 && IS_BRANCH(psrc)) { + MDB_cursor mn; + MDB_node *s2; + mdb_cursor_copy(csrc, &mn); + /* must find the lowest key below src */ + rc = mdb_page_search_lowest(&mn); + if (rc) + return rc; + if (IS_LEAF2(mn.mc_pg[mn.mc_top])) { + key.mv_size = mn.mc_db->md_pad; + key.mv_data = LEAF2KEY(mn.mc_pg[mn.mc_top], 0, key.mv_size); + } else { + s2 = NODEPTR(mn.mc_pg[mn.mc_top], 0); + key.mv_size = NODEKSZ(s2); + key.mv_data = NODEKEY(s2); + } + } else { + key.mv_size = srcnode->mn_ksize; + key.mv_data = NODEKEY(srcnode); + } + + data.mv_size = NODEDSZ(srcnode); + data.mv_data = NODEDATA(srcnode); + rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), srcnode->mn_flags); + if (rc != MDB_SUCCESS) + return rc; + } + } + + DPRINTF(("dst page %"Z"u now has %u keys (%.1f%% filled)", + pdst->mp_pgno, NUMKEYS(pdst), + (float)PAGEFILL(cdst->mc_txn->mt_env, pdst) / 10)); + + /* Unlink the src page from parent and add to free list. + */ + csrc->mc_top--; + mdb_node_del(csrc, 0); + if (csrc->mc_ki[csrc->mc_top] == 0) { + key.mv_size = 0; + rc = mdb_update_key(csrc, &key); + if (rc) { + csrc->mc_top++; + return rc; + } + } + csrc->mc_top++; + + psrc = csrc->mc_pg[csrc->mc_top]; + /* If not operating on FreeDB, allow this page to be reused + * in this txn. Otherwise just add to free list. + */ + rc = mdb_page_loose(csrc, psrc); + if (rc) + return rc; + if (IS_LEAF(psrc)) + csrc->mc_db->md_leaf_pages--; + else + csrc->mc_db->md_branch_pages--; + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = csrc->mc_dbi; + + for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (csrc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == csrc) continue; + if (m3->mc_snum < csrc->mc_snum) continue; + if (m3->mc_pg[csrc->mc_top] == psrc) { + m3->mc_pg[csrc->mc_top] = pdst; + m3->mc_ki[csrc->mc_top] += nkeys; + } + } + } + { + unsigned int snum = cdst->mc_snum; + uint16_t depth = cdst->mc_db->md_depth; + mdb_cursor_pop(cdst); + rc = mdb_rebalance(cdst); + /* Did the tree shrink? */ + if (depth > cdst->mc_db->md_depth) + snum--; + cdst->mc_snum = snum; + cdst->mc_top = snum-1; + } + return rc; +} + +/** Copy the contents of a cursor. + * @param[in] csrc The cursor to copy from. + * @param[out] cdst The cursor to copy to. + */ +static void +mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst) +{ + unsigned int i; + + cdst->mc_txn = csrc->mc_txn; + cdst->mc_dbi = csrc->mc_dbi; + cdst->mc_db = csrc->mc_db; + cdst->mc_dbx = csrc->mc_dbx; + cdst->mc_snum = csrc->mc_snum; + cdst->mc_top = csrc->mc_top; + cdst->mc_flags = csrc->mc_flags; + + for (i=0; i<csrc->mc_snum; i++) { + cdst->mc_pg[i] = csrc->mc_pg[i]; + cdst->mc_ki[i] = csrc->mc_ki[i]; + } +} + +/** Rebalance the tree after a delete operation. + * @param[in] mc Cursor pointing to the page where rebalancing + * should begin. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_rebalance(MDB_cursor *mc) +{ + MDB_node *node; + int rc; + unsigned int ptop, minkeys; + MDB_cursor mn; + indx_t oldki; + + minkeys = 1 + (IS_BRANCH(mc->mc_pg[mc->mc_top])); + DPRINTF(("rebalancing %s page %"Z"u (has %u keys, %.1f%% full)", + IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch", + mdb_dbg_pgno(mc->mc_pg[mc->mc_top]), NUMKEYS(mc->mc_pg[mc->mc_top]), + (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10)); + + if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD && + NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) { + DPRINTF(("no need to rebalance page %"Z"u, above fill threshold", + mdb_dbg_pgno(mc->mc_pg[mc->mc_top]))); + return MDB_SUCCESS; + } + + if (mc->mc_snum < 2) { + MDB_page *mp = mc->mc_pg[0]; + if (IS_SUBP(mp)) { + DPUTS("Can't rebalance a subpage, ignoring"); + return MDB_SUCCESS; + } + if (NUMKEYS(mp) == 0) { + DPUTS("tree is completely empty"); + mc->mc_db->md_root = P_INVALID; + mc->mc_db->md_depth = 0; + mc->mc_db->md_leaf_pages = 0; + rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno); + if (rc) + return rc; + /* Adjust cursors pointing to mp */ + mc->mc_snum = 0; + mc->mc_top = 0; + mc->mc_flags &= ~C_INITIALIZED; + { + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[0] == mp) { + m3->mc_snum = 0; + m3->mc_top = 0; + m3->mc_flags &= ~C_INITIALIZED; + } + } + } + } else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) { + int i; + DPUTS("collapsing root page!"); + rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno); + if (rc) + return rc; + mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0)); + rc = mdb_page_get(mc->mc_txn,mc->mc_db->md_root,&mc->mc_pg[0],NULL); + if (rc) + return rc; + mc->mc_db->md_depth--; + mc->mc_db->md_branch_pages--; + mc->mc_ki[0] = mc->mc_ki[1]; + for (i = 1; i<mc->mc_db->md_depth; i++) { + mc->mc_pg[i] = mc->mc_pg[i+1]; + mc->mc_ki[i] = mc->mc_ki[i+1]; + } + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == mc || m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[0] == mp) { + m3->mc_snum--; + m3->mc_top--; + for (i=0; i<m3->mc_snum; i++) { + m3->mc_pg[i] = m3->mc_pg[i+1]; + m3->mc_ki[i] = m3->mc_ki[i+1]; + } + } + } + } + } else + DPUTS("root page doesn't need rebalancing"); + return MDB_SUCCESS; + } + + /* The parent (branch page) must have at least 2 pointers, + * otherwise the tree is invalid. + */ + ptop = mc->mc_top-1; + mdb_cassert(mc, NUMKEYS(mc->mc_pg[ptop]) > 1); + + /* Leaf page fill factor is below the threshold. + * Try to move keys from left or right neighbor, or + * merge with a neighbor page. + */ + + /* Find neighbors. + */ + mdb_cursor_copy(mc, &mn); + mn.mc_xcursor = NULL; + + oldki = mc->mc_ki[mc->mc_top]; + if (mc->mc_ki[ptop] == 0) { + /* We're the leftmost leaf in our parent. + */ + DPUTS("reading right neighbor"); + mn.mc_ki[ptop]++; + node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); + rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); + if (rc) + return rc; + mn.mc_ki[mn.mc_top] = 0; + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]); + } else { + /* There is at least one neighbor to the left. + */ + DPUTS("reading left neighbor"); + mn.mc_ki[ptop]--; + node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); + rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); + if (rc) + return rc; + mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1; + mc->mc_ki[mc->mc_top] = 0; + } + + DPRINTF(("found neighbor page %"Z"u (%u keys, %.1f%% full)", + mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]), + (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10)); + + /* If the neighbor page is above threshold and has enough keys, + * move one key from it. Otherwise we should try to merge them. + * (A branch page must never have less than 2 keys.) + */ + minkeys = 1 + (IS_BRANCH(mn.mc_pg[mn.mc_top])); + if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) { + rc = mdb_node_move(&mn, mc); + if (mc->mc_ki[ptop]) { + oldki++; + } + } else { + if (mc->mc_ki[ptop] == 0) { + rc = mdb_page_merge(&mn, mc); + } else { + oldki += NUMKEYS(mn.mc_pg[mn.mc_top]); + mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1; + rc = mdb_page_merge(mc, &mn); + mdb_cursor_copy(&mn, mc); + } + mc->mc_flags &= ~C_EOF; + } + mc->mc_ki[mc->mc_top] = oldki; + return rc; +} + +/** Complete a delete operation started by #mdb_cursor_del(). */ +static int +mdb_cursor_del0(MDB_cursor *mc) +{ + int rc; + MDB_page *mp; + indx_t ki; + unsigned int nkeys; + + ki = mc->mc_ki[mc->mc_top]; + mdb_node_del(mc, mc->mc_db->md_pad); + mc->mc_db->md_entries--; + rc = mdb_rebalance(mc); + + if (rc == MDB_SUCCESS) { + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + + mp = mc->mc_pg[mc->mc_top]; + nkeys = NUMKEYS(mp); + + /* if mc points past last node in page, find next sibling */ + if (mc->mc_ki[mc->mc_top] >= nkeys) { + rc = mdb_cursor_sibling(mc, 1); + if (rc == MDB_NOTFOUND) { + mc->mc_flags |= C_EOF; + rc = MDB_SUCCESS; + } + } + + /* Adjust other cursors pointing to mp */ + for (m2 = mc->mc_txn->mt_cursors[dbi]; !rc && m2; m2=m2->mc_next) { + m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; + if (! (m2->mc_flags & m3->mc_flags & C_INITIALIZED)) + continue; + if (m3 == mc || m3->mc_snum < mc->mc_snum) + continue; + if (m3->mc_pg[mc->mc_top] == mp) { + if (m3->mc_ki[mc->mc_top] >= ki) { + m3->mc_flags |= C_DEL; + if (m3->mc_ki[mc->mc_top] > ki) + m3->mc_ki[mc->mc_top]--; + else if (mc->mc_db->md_flags & MDB_DUPSORT) + m3->mc_xcursor->mx_cursor.mc_flags |= C_EOF; + } + if (m3->mc_ki[mc->mc_top] >= nkeys) { + rc = mdb_cursor_sibling(m3, 1); + if (rc == MDB_NOTFOUND) { + m3->mc_flags |= C_EOF; + rc = MDB_SUCCESS; + } + } + } + } + mc->mc_flags |= C_DEL; + } + + if (rc) + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +int +mdb_del(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data) +{ + if (!key || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) + return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + + if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) { + /* must ignore any data */ + data = NULL; + } + + return mdb_del0(txn, dbi, key, data, 0); +} + +static int +mdb_del0(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, unsigned flags) +{ + MDB_cursor mc; + MDB_xcursor mx; + MDB_cursor_op op; + MDB_val rdata, *xdata; + int rc, exact = 0; + DKBUF; + + DPRINTF(("====> delete db %u key [%s]", dbi, DKEY(key))); + + mdb_cursor_init(&mc, txn, dbi, &mx); + + if (data) { + op = MDB_GET_BOTH; + rdata = *data; + xdata = &rdata; + } else { + op = MDB_SET; + xdata = NULL; + flags |= MDB_NODUPDATA; + } + rc = mdb_cursor_set(&mc, key, xdata, op, &exact); + if (rc == 0) { + /* let mdb_page_split know about this cursor if needed: + * delete will trigger a rebalance; if it needs to move + * a node from one page to another, it will have to + * update the parent's separator key(s). If the new sepkey + * is larger than the current one, the parent page may + * run out of space, triggering a split. We need this + * cursor to be consistent until the end of the rebalance. + */ + mc.mc_flags |= C_UNTRACK; + mc.mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = &mc; + rc = mdb_cursor_del(&mc, flags); + txn->mt_cursors[dbi] = mc.mc_next; + } + return rc; +} + +/** Split a page and insert a new node. + * @param[in,out] mc Cursor pointing to the page and desired insertion index. + * The cursor will be updated to point to the actual page and index where + * the node got inserted after the split. + * @param[in] newkey The key for the newly inserted node. + * @param[in] newdata The data for the newly inserted node. + * @param[in] newpgno The page number, if the new node is a branch node. + * @param[in] nflags The #NODE_ADD_FLAGS for the new node. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno, + unsigned int nflags) +{ + unsigned int flags; + int rc = MDB_SUCCESS, new_root = 0, did_split = 0; + indx_t newindx; + pgno_t pgno = 0; + int i, j, split_indx, nkeys, pmax; + MDB_env *env = mc->mc_txn->mt_env; + MDB_node *node; + MDB_val sepkey, rkey, xdata, *rdata = &xdata; + MDB_page *copy = NULL; + MDB_page *mp, *rp, *pp; + int ptop; + MDB_cursor mn; + DKBUF; + + mp = mc->mc_pg[mc->mc_top]; + newindx = mc->mc_ki[mc->mc_top]; + nkeys = NUMKEYS(mp); + + DPRINTF(("-----> splitting %s page %"Z"u and adding [%s] at index %i/%i", + IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno, + DKEY(newkey), mc->mc_ki[mc->mc_top], nkeys)); + + /* Create a right sibling. */ + if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp))) + return rc; + DPRINTF(("new right sibling: page %"Z"u", rp->mp_pgno)); + + if (mc->mc_snum < 2) { + if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp))) + goto done; + /* shift current top to make room for new parent */ + mc->mc_pg[1] = mc->mc_pg[0]; + mc->mc_ki[1] = mc->mc_ki[0]; + mc->mc_pg[0] = pp; + mc->mc_ki[0] = 0; + mc->mc_db->md_root = pp->mp_pgno; + DPRINTF(("root split! new root = %"Z"u", pp->mp_pgno)); + mc->mc_db->md_depth++; + new_root = 1; + + /* Add left (implicit) pointer. */ + if ((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0)) != MDB_SUCCESS) { + /* undo the pre-push */ + mc->mc_pg[0] = mc->mc_pg[1]; + mc->mc_ki[0] = mc->mc_ki[1]; + mc->mc_db->md_root = mp->mp_pgno; + mc->mc_db->md_depth--; + goto done; + } + mc->mc_snum = 2; + mc->mc_top = 1; + ptop = 0; + } else { + ptop = mc->mc_top-1; + DPRINTF(("parent branch page is %"Z"u", mc->mc_pg[ptop]->mp_pgno)); + } + + mc->mc_flags |= C_SPLITTING; + mdb_cursor_copy(mc, &mn); + mn.mc_pg[mn.mc_top] = rp; + mn.mc_ki[ptop] = mc->mc_ki[ptop]+1; + + if (nflags & MDB_APPEND) { + mn.mc_ki[mn.mc_top] = 0; + sepkey = *newkey; + split_indx = newindx; + nkeys = 0; + } else { + + split_indx = (nkeys+1) / 2; + + if (IS_LEAF2(rp)) { + char *split, *ins; + int x; + unsigned int lsize, rsize, ksize; + /* Move half of the keys to the right sibling */ + x = mc->mc_ki[mc->mc_top] - split_indx; + ksize = mc->mc_db->md_pad; + split = LEAF2KEY(mp, split_indx, ksize); + rsize = (nkeys - split_indx) * ksize; + lsize = (nkeys - split_indx) * sizeof(indx_t); + mp->mp_lower -= lsize; + rp->mp_lower += lsize; + mp->mp_upper += rsize - lsize; + rp->mp_upper -= rsize - lsize; + sepkey.mv_size = ksize; + if (newindx == split_indx) { + sepkey.mv_data = newkey->mv_data; + } else { + sepkey.mv_data = split; + } + if (x<0) { + ins = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], ksize); + memcpy(rp->mp_ptrs, split, rsize); + sepkey.mv_data = rp->mp_ptrs; + memmove(ins+ksize, ins, (split_indx - mc->mc_ki[mc->mc_top]) * ksize); + memcpy(ins, newkey->mv_data, ksize); + mp->mp_lower += sizeof(indx_t); + mp->mp_upper -= ksize - sizeof(indx_t); + } else { + if (x) + memcpy(rp->mp_ptrs, split, x * ksize); + ins = LEAF2KEY(rp, x, ksize); + memcpy(ins, newkey->mv_data, ksize); + memcpy(ins+ksize, split + x * ksize, rsize - x * ksize); + rp->mp_lower += sizeof(indx_t); + rp->mp_upper -= ksize - sizeof(indx_t); + mc->mc_ki[mc->mc_top] = x; + mc->mc_pg[mc->mc_top] = rp; + } + } else { + int psize, nsize, k; + /* Maximum free space in an empty page */ + pmax = env->me_psize - PAGEHDRSZ; + if (IS_LEAF(mp)) + nsize = mdb_leaf_size(env, newkey, newdata); + else + nsize = mdb_branch_size(env, newkey); + nsize = EVEN(nsize); + + /* grab a page to hold a temporary copy */ + copy = mdb_page_malloc(mc->mc_txn, 1); + if (copy == NULL) { + rc = ENOMEM; + goto done; + } + copy->mp_pgno = mp->mp_pgno; + copy->mp_flags = mp->mp_flags; + copy->mp_lower = (PAGEHDRSZ-PAGEBASE); + copy->mp_upper = env->me_psize - PAGEBASE; + + /* prepare to insert */ + for (i=0, j=0; i<nkeys; i++) { + if (i == newindx) { + copy->mp_ptrs[j++] = 0; + } + copy->mp_ptrs[j++] = mp->mp_ptrs[i]; + } + + /* When items are relatively large the split point needs + * to be checked, because being off-by-one will make the + * difference between success or failure in mdb_node_add. + * + * It's also relevant if a page happens to be laid out + * such that one half of its nodes are all "small" and + * the other half of its nodes are "large." If the new + * item is also "large" and falls on the half with + * "large" nodes, it also may not fit. + * + * As a final tweak, if the new item goes on the last + * spot on the page (and thus, onto the new page), bias + * the split so the new page is emptier than the old page. + * This yields better packing during sequential inserts. + */ + if (nkeys < 20 || nsize > pmax/16 || newindx >= nkeys) { + /* Find split point */ + psize = 0; + if (newindx <= split_indx || newindx >= nkeys) { + i = 0; j = 1; + k = newindx >= nkeys ? nkeys : split_indx+2; + } else { + i = nkeys; j = -1; + k = split_indx-1; + } + for (; i!=k; i+=j) { + if (i == newindx) { + psize += nsize; + node = NULL; + } else { + node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); + psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t); + if (IS_LEAF(mp)) { + if (F_ISSET(node->mn_flags, F_BIGDATA)) + psize += sizeof(pgno_t); + else + psize += NODEDSZ(node); + } + psize = EVEN(psize); + } + if (psize > pmax || i == k-j) { + split_indx = i + (j<0); + break; + } + } + } + if (split_indx == newindx) { + sepkey.mv_size = newkey->mv_size; + sepkey.mv_data = newkey->mv_data; + } else { + node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx] + PAGEBASE); + sepkey.mv_size = node->mn_ksize; + sepkey.mv_data = NODEKEY(node); + } + } + } + + DPRINTF(("separator is %d [%s]", split_indx, DKEY(&sepkey))); + + /* Copy separator key to the parent. + */ + if (SIZELEFT(mn.mc_pg[ptop]) < mdb_branch_size(env, &sepkey)) { + mn.mc_snum--; + mn.mc_top--; + did_split = 1; + rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0); + if (rc) + goto done; + + /* root split? */ + if (mn.mc_snum == mc->mc_snum) { + mc->mc_pg[mc->mc_snum] = mc->mc_pg[mc->mc_top]; + mc->mc_ki[mc->mc_snum] = mc->mc_ki[mc->mc_top]; + mc->mc_pg[mc->mc_top] = mc->mc_pg[ptop]; + mc->mc_ki[mc->mc_top] = mc->mc_ki[ptop]; + mc->mc_snum++; + mc->mc_top++; + ptop++; + } + /* Right page might now have changed parent. + * Check if left page also changed parent. + */ + if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && + mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) { + for (i=0; i<ptop; i++) { + mc->mc_pg[i] = mn.mc_pg[i]; + mc->mc_ki[i] = mn.mc_ki[i]; + } + mc->mc_pg[ptop] = mn.mc_pg[ptop]; + if (mn.mc_ki[ptop]) { + mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1; + } else { + /* find right page's left sibling */ + mc->mc_ki[ptop] = mn.mc_ki[ptop]; + mdb_cursor_sibling(mc, 0); + } + } + } else { + mn.mc_top--; + rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0); + mn.mc_top++; + } + mc->mc_flags ^= C_SPLITTING; + if (rc != MDB_SUCCESS) { + goto done; + } + if (nflags & MDB_APPEND) { + mc->mc_pg[mc->mc_top] = rp; + mc->mc_ki[mc->mc_top] = 0; + rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags); + if (rc) + goto done; + for (i=0; i<mc->mc_top; i++) + mc->mc_ki[i] = mn.mc_ki[i]; + } else if (!IS_LEAF2(mp)) { + /* Move nodes */ + mc->mc_pg[mc->mc_top] = rp; + i = split_indx; + j = 0; + do { + if (i == newindx) { + rkey.mv_data = newkey->mv_data; + rkey.mv_size = newkey->mv_size; + if (IS_LEAF(mp)) { + rdata = newdata; + } else + pgno = newpgno; + flags = nflags; + /* Update index for the new key. */ + mc->mc_ki[mc->mc_top] = j; + } else { + node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); + rkey.mv_data = NODEKEY(node); + rkey.mv_size = node->mn_ksize; + if (IS_LEAF(mp)) { + xdata.mv_data = NODEDATA(node); + xdata.mv_size = NODEDSZ(node); + rdata = &xdata; + } else + pgno = NODEPGNO(node); + flags = node->mn_flags; + } + + if (!IS_LEAF(mp) && j == 0) { + /* First branch index doesn't need key data. */ + rkey.mv_size = 0; + } + + rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags); + if (rc) + goto done; + if (i == nkeys) { + i = 0; + j = 0; + mc->mc_pg[mc->mc_top] = copy; + } else { + i++; + j++; + } + } while (i != split_indx); + + nkeys = NUMKEYS(copy); + for (i=0; i<nkeys; i++) + mp->mp_ptrs[i] = copy->mp_ptrs[i]; + mp->mp_lower = copy->mp_lower; + mp->mp_upper = copy->mp_upper; + memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1), + env->me_psize - copy->mp_upper - PAGEBASE); + + /* reset back to original page */ + if (newindx < split_indx) { + mc->mc_pg[mc->mc_top] = mp; + if (nflags & MDB_RESERVE) { + node = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + if (!(node->mn_flags & F_BIGDATA)) + newdata->mv_data = NODEDATA(node); + } + } else { + mc->mc_pg[mc->mc_top] = rp; + mc->mc_ki[ptop]++; + /* Make sure mc_ki is still valid. + */ + if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && + mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) { + for (i=0; i<=ptop; i++) { + mc->mc_pg[i] = mn.mc_pg[i]; + mc->mc_ki[i] = mn.mc_ki[i]; + } + } + } + } + + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + int fixup = NUMKEYS(mp); + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == mc) + continue; + if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) + continue; + if (m3->mc_flags & C_SPLITTING) + continue; + if (new_root) { + int k; + /* root split */ + for (k=m3->mc_top; k>=0; k--) { + m3->mc_ki[k+1] = m3->mc_ki[k]; + m3->mc_pg[k+1] = m3->mc_pg[k]; + } + if (m3->mc_ki[0] >= split_indx) { + m3->mc_ki[0] = 1; + } else { + m3->mc_ki[0] = 0; + } + m3->mc_pg[0] = mc->mc_pg[0]; + m3->mc_snum++; + m3->mc_top++; + } + if (m3->mc_top >= mc->mc_top && m3->mc_pg[mc->mc_top] == mp) { + if (m3->mc_ki[mc->mc_top] >= newindx && !(nflags & MDB_SPLIT_REPLACE)) + m3->mc_ki[mc->mc_top]++; + if (m3->mc_ki[mc->mc_top] >= fixup) { + m3->mc_pg[mc->mc_top] = rp; + m3->mc_ki[mc->mc_top] -= fixup; + m3->mc_ki[ptop] = mn.mc_ki[ptop]; + } + } else if (!did_split && m3->mc_top >= ptop && m3->mc_pg[ptop] == mc->mc_pg[ptop] && + m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { + m3->mc_ki[ptop]++; + } + } + } + DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp))); + +done: + if (copy) /* tmp page */ + mdb_page_free(env, copy); + if (rc) + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +int +mdb_put(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, unsigned int flags) +{ + MDB_cursor mc; + MDB_xcursor mx; + + if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags) + return EINVAL; + + mdb_cursor_init(&mc, txn, dbi, &mx); + return mdb_cursor_put(&mc, key, data, flags); +} + +#ifndef MDB_WBUF +#define MDB_WBUF (1024*1024) +#endif + + /** State needed for a compacting copy. */ +typedef struct mdb_copy { + pthread_mutex_t mc_mutex; + pthread_cond_t mc_cond; + char *mc_wbuf[2]; + char *mc_over[2]; + MDB_env *mc_env; + MDB_txn *mc_txn; + int mc_wlen[2]; + int mc_olen[2]; + pgno_t mc_next_pgno; + HANDLE mc_fd; + int mc_status; + volatile int mc_new; + int mc_toggle; + +} mdb_copy; + + /** Dedicated writer thread for compacting copy. */ +static THREAD_RET ESECT +mdb_env_copythr(void *arg) +{ + mdb_copy *my = arg; + char *ptr; + int toggle = 0, wsize, rc; +#ifdef _WIN32 + DWORD len; +#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) +#else + int len; +#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) +#endif + + pthread_mutex_lock(&my->mc_mutex); + my->mc_new = 0; + pthread_cond_signal(&my->mc_cond); + for(;;) { + while (!my->mc_new) + pthread_cond_wait(&my->mc_cond, &my->mc_mutex); + if (my->mc_new < 0) { + my->mc_new = 0; + break; + } + my->mc_new = 0; + wsize = my->mc_wlen[toggle]; + ptr = my->mc_wbuf[toggle]; +again: + while (wsize > 0) { + DO_WRITE(rc, my->mc_fd, ptr, wsize, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + wsize -= len; + continue; + } else { + rc = EIO; + break; + } + } + if (rc) { + my->mc_status = rc; + break; + } + /* If there's an overflow page tail, write it too */ + if (my->mc_olen[toggle]) { + wsize = my->mc_olen[toggle]; + ptr = my->mc_over[toggle]; + my->mc_olen[toggle] = 0; + goto again; + } + my->mc_wlen[toggle] = 0; + toggle ^= 1; + pthread_cond_signal(&my->mc_cond); + } + pthread_cond_signal(&my->mc_cond); + pthread_mutex_unlock(&my->mc_mutex); + return (THREAD_RET)0; +#undef DO_WRITE +} + + /** Tell the writer thread there's a buffer ready to write */ +static int ESECT +mdb_env_cthr_toggle(mdb_copy *my, int st) +{ + int toggle = my->mc_toggle ^ 1; + pthread_mutex_lock(&my->mc_mutex); + if (my->mc_status) { + pthread_mutex_unlock(&my->mc_mutex); + return my->mc_status; + } + while (my->mc_new == 1) + pthread_cond_wait(&my->mc_cond, &my->mc_mutex); + my->mc_new = st; + my->mc_toggle = toggle; + pthread_cond_signal(&my->mc_cond); + pthread_mutex_unlock(&my->mc_mutex); + return 0; +} + + /** Depth-first tree traversal for compacting copy. */ +static int ESECT +mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) +{ + MDB_cursor mc; + MDB_txn *txn = my->mc_txn; + MDB_node *ni; + MDB_page *mo, *mp, *leaf; + char *buf, *ptr; + int rc, toggle; + unsigned int i; + + /* Empty DB, nothing to do */ + if (*pg == P_INVALID) + return MDB_SUCCESS; + + mc.mc_snum = 1; + mc.mc_top = 0; + mc.mc_txn = txn; + + rc = mdb_page_get(my->mc_txn, *pg, &mc.mc_pg[0], NULL); + if (rc) + return rc; + rc = mdb_page_search_root(&mc, NULL, MDB_PS_FIRST); + if (rc) + return rc; + + /* Make cursor pages writable */ + buf = ptr = malloc(my->mc_env->me_psize * mc.mc_snum); + if (buf == NULL) + return ENOMEM; + + for (i=0; i<mc.mc_top; i++) { + mdb_page_copy((MDB_page *)ptr, mc.mc_pg[i], my->mc_env->me_psize); + mc.mc_pg[i] = (MDB_page *)ptr; + ptr += my->mc_env->me_psize; + } + + /* This is writable space for a leaf page. Usually not needed. */ + leaf = (MDB_page *)ptr; + + toggle = my->mc_toggle; + while (mc.mc_snum > 0) { + unsigned n; + mp = mc.mc_pg[mc.mc_top]; + n = NUMKEYS(mp); + + if (IS_LEAF(mp)) { + if (!IS_LEAF2(mp) && !(flags & F_DUPDATA)) { + for (i=0; i<n; i++) { + ni = NODEPTR(mp, i); + if (ni->mn_flags & F_BIGDATA) { + MDB_page *omp; + pgno_t pg; + + /* Need writable leaf */ + if (mp != leaf) { + mc.mc_pg[mc.mc_top] = leaf; + mdb_page_copy(leaf, mp, my->mc_env->me_psize); + mp = leaf; + ni = NODEPTR(mp, i); + } + + memcpy(&pg, NODEDATA(ni), sizeof(pg)); + rc = mdb_page_get(txn, pg, &omp, NULL); + if (rc) + goto done; + if (my->mc_wlen[toggle] >= MDB_WBUF) { + rc = mdb_env_cthr_toggle(my, 1); + if (rc) + goto done; + toggle = my->mc_toggle; + } + mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); + memcpy(mo, omp, my->mc_env->me_psize); + mo->mp_pgno = my->mc_next_pgno; + my->mc_next_pgno += omp->mp_pages; + my->mc_wlen[toggle] += my->mc_env->me_psize; + if (omp->mp_pages > 1) { + my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1); + my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize; + rc = mdb_env_cthr_toggle(my, 1); + if (rc) + goto done; + toggle = my->mc_toggle; + } + memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t)); + } else if (ni->mn_flags & F_SUBDATA) { + MDB_db db; + + /* Need writable leaf */ + if (mp != leaf) { + mc.mc_pg[mc.mc_top] = leaf; + mdb_page_copy(leaf, mp, my->mc_env->me_psize); + mp = leaf; + ni = NODEPTR(mp, i); + } + + memcpy(&db, NODEDATA(ni), sizeof(db)); + my->mc_toggle = toggle; + rc = mdb_env_cwalk(my, &db.md_root, ni->mn_flags & F_DUPDATA); + if (rc) + goto done; + toggle = my->mc_toggle; + memcpy(NODEDATA(ni), &db, sizeof(db)); + } + } + } + } else { + mc.mc_ki[mc.mc_top]++; + if (mc.mc_ki[mc.mc_top] < n) { + pgno_t pg; +again: + ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]); + pg = NODEPGNO(ni); + rc = mdb_page_get(txn, pg, &mp, NULL); + if (rc) + goto done; + mc.mc_top++; + mc.mc_snum++; + mc.mc_ki[mc.mc_top] = 0; + if (IS_BRANCH(mp)) { + /* Whenever we advance to a sibling branch page, + * we must proceed all the way down to its first leaf. + */ + mdb_page_copy(mc.mc_pg[mc.mc_top], mp, my->mc_env->me_psize); + goto again; + } else + mc.mc_pg[mc.mc_top] = mp; + continue; + } + } + if (my->mc_wlen[toggle] >= MDB_WBUF) { + rc = mdb_env_cthr_toggle(my, 1); + if (rc) + goto done; + toggle = my->mc_toggle; + } + mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); + mdb_page_copy(mo, mp, my->mc_env->me_psize); + mo->mp_pgno = my->mc_next_pgno++; + my->mc_wlen[toggle] += my->mc_env->me_psize; + if (mc.mc_top) { + /* Update parent if there is one */ + ni = NODEPTR(mc.mc_pg[mc.mc_top-1], mc.mc_ki[mc.mc_top-1]); + SETPGNO(ni, mo->mp_pgno); + mdb_cursor_pop(&mc); + } else { + /* Otherwise we're done */ + *pg = mo->mp_pgno; + break; + } + } +done: + free(buf); + return rc; +} + + /** Copy environment with compaction. */ +static int ESECT +mdb_env_copyfd1(MDB_env *env, HANDLE fd) +{ + MDB_meta *mm; + MDB_page *mp; + mdb_copy my; + MDB_txn *txn = NULL; + pthread_t thr; + int rc; + +#ifdef _WIN32 + my.mc_mutex = CreateMutex(NULL, FALSE, NULL); + my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL); + my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_os_psize); + if (my.mc_wbuf[0] == NULL) + return errno; +#else + pthread_mutex_init(&my.mc_mutex, NULL); + pthread_cond_init(&my.mc_cond, NULL); +#ifdef HAVE_MEMALIGN + my.mc_wbuf[0] = memalign(env->me_os_psize, MDB_WBUF*2); + if (my.mc_wbuf[0] == NULL) + return errno; +#else + rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_os_psize, MDB_WBUF*2); + if (rc) + return rc; +#endif +#endif + memset(my.mc_wbuf[0], 0, MDB_WBUF*2); + my.mc_wbuf[1] = my.mc_wbuf[0] + MDB_WBUF; + my.mc_wlen[0] = 0; + my.mc_wlen[1] = 0; + my.mc_olen[0] = 0; + my.mc_olen[1] = 0; + my.mc_next_pgno = 2; + my.mc_status = 0; + my.mc_new = 1; + my.mc_toggle = 0; + my.mc_env = env; + my.mc_fd = fd; + THREAD_CREATE(thr, mdb_env_copythr, &my); + + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) + return rc; + + mp = (MDB_page *)my.mc_wbuf[0]; + memset(mp, 0, 2*env->me_psize); + mp->mp_pgno = 0; + mp->mp_flags = P_META; + mm = (MDB_meta *)METADATA(mp); + mdb_env_init_meta0(env, mm); + mm->mm_address = env->me_metas[0]->mm_address; + + mp = (MDB_page *)(my.mc_wbuf[0] + env->me_psize); + mp->mp_pgno = 1; + mp->mp_flags = P_META; + *(MDB_meta *)METADATA(mp) = *mm; + mm = (MDB_meta *)METADATA(mp); + + /* Count the number of free pages, subtract from lastpg to find + * number of active pages + */ + { + MDB_ID freecount = 0; + MDB_cursor mc; + MDB_val key, data; + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) + freecount += *(MDB_ID *)data.mv_data; + freecount += txn->mt_dbs[0].md_branch_pages + + txn->mt_dbs[0].md_leaf_pages + + txn->mt_dbs[0].md_overflow_pages; + + /* Set metapage 1 */ + mm->mm_last_pg = txn->mt_next_pgno - freecount - 1; + mm->mm_dbs[1] = txn->mt_dbs[1]; + mm->mm_dbs[1].md_root = mm->mm_last_pg; + mm->mm_txnid = 1; + } + my.mc_wlen[0] = env->me_psize * 2; + my.mc_txn = txn; + pthread_mutex_lock(&my.mc_mutex); + while(my.mc_new) + pthread_cond_wait(&my.mc_cond, &my.mc_mutex); + pthread_mutex_unlock(&my.mc_mutex); + rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0); + if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) + rc = mdb_env_cthr_toggle(&my, 1); + mdb_env_cthr_toggle(&my, -1); + pthread_mutex_lock(&my.mc_mutex); + while(my.mc_new) + pthread_cond_wait(&my.mc_cond, &my.mc_mutex); + pthread_mutex_unlock(&my.mc_mutex); + THREAD_FINISH(thr); + + mdb_txn_abort(txn); +#ifdef _WIN32 + CloseHandle(my.mc_cond); + CloseHandle(my.mc_mutex); + _aligned_free(my.mc_wbuf[0]); +#else + pthread_cond_destroy(&my.mc_cond); + pthread_mutex_destroy(&my.mc_mutex); + free(my.mc_wbuf[0]); +#endif + return rc; +} + + /** Copy environment as-is. */ +static int ESECT +mdb_env_copyfd0(MDB_env *env, HANDLE fd) +{ + MDB_txn *txn = NULL; + int rc; + size_t wsize; + char *ptr; +#ifdef _WIN32 + DWORD len, w2; +#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) +#else + ssize_t len; + size_t w2; +#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) +#endif + + /* Do the lock/unlock of the reader mutex before starting the + * write txn. Otherwise other read txns could block writers. + */ + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) + return rc; + + if (env->me_txns) { + /* We must start the actual read txn after blocking writers */ + mdb_txn_reset0(txn, "reset-stage1"); + + /* Temporarily block writers until we snapshot the meta pages */ + LOCK_MUTEX_W(env); + + rc = mdb_txn_renew0(txn); + if (rc) { + UNLOCK_MUTEX_W(env); + goto leave; + } + } + + wsize = env->me_psize * 2; + ptr = env->me_map; + w2 = wsize; + while (w2 > 0) { + DO_WRITE(rc, fd, ptr, w2, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + w2 -= len; + continue; + } else { + /* Non-blocking or async handles are not supported */ + rc = EIO; + break; + } + } + if (env->me_txns) + UNLOCK_MUTEX_W(env); + + if (rc) + goto leave; + + w2 = txn->mt_next_pgno * env->me_psize; +#ifdef WIN32 + { + LARGE_INTEGER fsize; + GetFileSizeEx(env->me_fd, &fsize); + if (w2 > fsize.QuadPart) + w2 = fsize.QuadPart; + } +#else + { + struct stat st; + fstat(env->me_fd, &st); + if (w2 > (size_t)st.st_size) + w2 = st.st_size; + } +#endif + wsize = w2 - wsize; + while (wsize > 0) { + if (wsize > MAX_WRITE) + w2 = MAX_WRITE; + else + w2 = wsize; + DO_WRITE(rc, fd, ptr, w2, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + wsize -= len; + continue; + } else { + rc = EIO; + break; + } + } + +leave: + mdb_txn_abort(txn); + return rc; +} + +int ESECT +mdb_env_copyfd2(MDB_env *env, HANDLE fd, unsigned int flags) +{ + if (flags & MDB_CP_COMPACT) + return mdb_env_copyfd1(env, fd); + else + return mdb_env_copyfd0(env, fd); +} + +int ESECT +mdb_env_copyfd(MDB_env *env, HANDLE fd) +{ + return mdb_env_copyfd2(env, fd, 0); +} + +int ESECT +mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) +{ + int rc, len; + char *lpath; + HANDLE newfd = INVALID_HANDLE_VALUE; + + if (env->me_flags & MDB_NOSUBDIR) { + lpath = (char *)path; + } else { + len = strlen(path); + len += sizeof(DATANAME); + lpath = malloc(len); + if (!lpath) + return ENOMEM; + sprintf(lpath, "%s" DATANAME, path); + } + + /* The destination path must exist, but the destination file must not. + * We don't want the OS to cache the writes, since the source data is + * already in the OS cache. + */ +#ifdef _WIN32 + newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, + FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); +#else + newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666); +#endif + if (newfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + + if (env->me_psize >= env->me_os_psize) { +#ifdef O_DIRECT + /* Set O_DIRECT if the file system supports it */ + if ((rc = fcntl(newfd, F_GETFL)) != -1) + (void) fcntl(newfd, F_SETFL, rc | O_DIRECT); +#endif +#ifdef F_NOCACHE /* __APPLE__ */ + rc = fcntl(newfd, F_NOCACHE, 1); + if (rc) { + rc = ErrCode(); + goto leave; + } +#endif + } + + rc = mdb_env_copyfd2(env, newfd, flags); + +leave: + if (!(env->me_flags & MDB_NOSUBDIR)) + free(lpath); + if (newfd != INVALID_HANDLE_VALUE) + if (close(newfd) < 0 && rc == MDB_SUCCESS) + rc = ErrCode(); + + return rc; +} + +int ESECT +mdb_env_copy(MDB_env *env, const char *path) +{ + return mdb_env_copy2(env, path, 0); +} + +int ESECT +mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) +{ + if ((flag & CHANGEABLE) != flag) + return EINVAL; + if (onoff) + env->me_flags |= flag; + else + env->me_flags &= ~flag; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_get_flags(MDB_env *env, unsigned int *arg) +{ + if (!env || !arg) + return EINVAL; + + *arg = env->me_flags; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_set_userctx(MDB_env *env, void *ctx) +{ + if (!env) + return EINVAL; + env->me_userctx = ctx; + return MDB_SUCCESS; +} + +void * ESECT +mdb_env_get_userctx(MDB_env *env) +{ + return env ? env->me_userctx : NULL; +} + +int ESECT +mdb_env_set_assert(MDB_env *env, MDB_assert_func *func) +{ + if (!env) + return EINVAL; +#ifndef NDEBUG + env->me_assert_func = func; +#endif + return MDB_SUCCESS; +} + +int ESECT +mdb_env_get_path(MDB_env *env, const char **arg) +{ + if (!env || !arg) + return EINVAL; + + *arg = env->me_path; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg) +{ + if (!env || !arg) + return EINVAL; + + *arg = env->me_fd; + return MDB_SUCCESS; +} + +/** Common code for #mdb_stat() and #mdb_env_stat(). + * @param[in] env the environment to operate in. + * @param[in] db the #MDB_db record containing the stats to return. + * @param[out] arg the address of an #MDB_stat structure to receive the stats. + * @return 0, this function always succeeds. + */ +static int ESECT +mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg) +{ + arg->ms_psize = env->me_psize; + arg->ms_depth = db->md_depth; + arg->ms_branch_pages = db->md_branch_pages; + arg->ms_leaf_pages = db->md_leaf_pages; + arg->ms_overflow_pages = db->md_overflow_pages; + arg->ms_entries = db->md_entries; + + return MDB_SUCCESS; +} + +int ESECT +mdb_env_stat(MDB_env *env, MDB_stat *arg) +{ + int toggle; + + if (env == NULL || arg == NULL) + return EINVAL; + + toggle = mdb_env_pick_meta(env); + + return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg); +} + +int ESECT +mdb_env_info(MDB_env *env, MDB_envinfo *arg) +{ + int toggle; + + if (env == NULL || arg == NULL) + return EINVAL; + + toggle = mdb_env_pick_meta(env); + arg->me_mapaddr = env->me_metas[toggle]->mm_address; + arg->me_mapsize = env->me_mapsize; + arg->me_maxreaders = env->me_maxreaders; + + /* me_numreaders may be zero if this process never used any readers. Use + * the shared numreader count if it exists. + */ + arg->me_numreaders = env->me_txns ? env->me_txns->mti_numreaders : env->me_numreaders; + + arg->me_last_pgno = env->me_metas[toggle]->mm_last_pg; + arg->me_last_txnid = env->me_metas[toggle]->mm_txnid; + return MDB_SUCCESS; +} + +/** Set the default comparison functions for a database. + * Called immediately after a database is opened to set the defaults. + * The user can then override them with #mdb_set_compare() or + * #mdb_set_dupsort(). + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + */ +static void +mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi) +{ + uint16_t f = txn->mt_dbs[dbi].md_flags; + + txn->mt_dbxs[dbi].md_cmp = + (f & MDB_REVERSEKEY) ? mdb_cmp_memnr : + (f & MDB_INTEGERKEY) ? mdb_cmp_cint : mdb_cmp_memn; + + txn->mt_dbxs[dbi].md_dcmp = + !(f & MDB_DUPSORT) ? 0 : + ((f & MDB_INTEGERDUP) + ? ((f & MDB_DUPFIXED) ? mdb_cmp_int : mdb_cmp_cint) + : ((f & MDB_REVERSEDUP) ? mdb_cmp_memnr : mdb_cmp_memn)); +} + +int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) +{ + MDB_val key, data; + MDB_dbi i; + MDB_cursor mc; + MDB_db dummy; + int rc, dbflag, exact; + unsigned int unused = 0, seq; + size_t len; + + if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) { + mdb_default_cmp(txn, FREE_DBI); + } + + if ((flags & VALID_FLAGS) != flags) + return EINVAL; + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + /* main DB? */ + if (!name) { + *dbi = MAIN_DBI; + if (flags & PERSISTENT_FLAGS) { + uint16_t f2 = flags & PERSISTENT_FLAGS; + /* make sure flag changes get committed */ + if ((txn->mt_dbs[MAIN_DBI].md_flags | f2) != txn->mt_dbs[MAIN_DBI].md_flags) { + txn->mt_dbs[MAIN_DBI].md_flags |= f2; + txn->mt_flags |= MDB_TXN_DIRTY; + } + } + mdb_default_cmp(txn, MAIN_DBI); + return MDB_SUCCESS; + } + + if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) { + mdb_default_cmp(txn, MAIN_DBI); + } + + /* Is the DB already open? */ + len = strlen(name); + for (i=2; i<txn->mt_numdbs; i++) { + if (!txn->mt_dbxs[i].md_name.mv_size) { + /* Remember this free slot */ + if (!unused) unused = i; + continue; + } + if (len == txn->mt_dbxs[i].md_name.mv_size && + !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) { + *dbi = i; + return MDB_SUCCESS; + } + } + + /* If no free slot and max hit, fail */ + if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs) + return MDB_DBS_FULL; + + /* Cannot mix named databases with some mainDB flags */ + if (txn->mt_dbs[MAIN_DBI].md_flags & (MDB_DUPSORT|MDB_INTEGERKEY)) + return (flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND; + + /* Find the DB info */ + dbflag = DB_NEW|DB_VALID; + exact = 0; + key.mv_size = len; + key.mv_data = (void *)name; + mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); + rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact); + if (rc == MDB_SUCCESS) { + /* make sure this is actually a DB */ + MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); + if (!(node->mn_flags & F_SUBDATA)) + return MDB_INCOMPATIBLE; + } else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) { + /* Create if requested */ + data.mv_size = sizeof(MDB_db); + data.mv_data = &dummy; + memset(&dummy, 0, sizeof(dummy)); + dummy.md_root = P_INVALID; + dummy.md_flags = flags & PERSISTENT_FLAGS; + rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); + dbflag |= DB_DIRTY; + } + + /* OK, got info, add to table */ + if (rc == MDB_SUCCESS) { + unsigned int slot = unused ? unused : txn->mt_numdbs; + txn->mt_dbxs[slot].md_name.mv_data = strdup(name); + txn->mt_dbxs[slot].md_name.mv_size = len; + txn->mt_dbxs[slot].md_rel = NULL; + txn->mt_dbflags[slot] = dbflag; + /* txn-> and env-> are the same in read txns, use + * tmp variable to avoid undefined assignment + */ + seq = ++txn->mt_env->me_dbiseqs[slot]; + txn->mt_dbiseqs[slot] = seq; + + memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db)); + *dbi = slot; + mdb_default_cmp(txn, slot); + if (!unused) { + txn->mt_numdbs++; + } + } + + return rc; +} + +int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) +{ + if (!arg || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + if (txn->mt_dbflags[dbi] & DB_STALE) { + MDB_cursor mc; + MDB_xcursor mx; + /* Stale, must read the DB's root. cursor_init does it for us. */ + mdb_cursor_init(&mc, txn, dbi, &mx); + } + return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg); +} + +void mdb_dbi_close(MDB_env *env, MDB_dbi dbi) +{ + char *ptr; + if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs) + return; + ptr = env->me_dbxs[dbi].md_name.mv_data; + /* If there was no name, this was already closed */ + if (ptr) { + env->me_dbxs[dbi].md_name.mv_data = NULL; + env->me_dbxs[dbi].md_name.mv_size = 0; + env->me_dbflags[dbi] = 0; + env->me_dbiseqs[dbi]++; + free(ptr); + } +} + +int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags) +{ + /* We could return the flags for the FREE_DBI too but what's the point? */ + if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + *flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS; + return MDB_SUCCESS; +} + +/** Add all the DB's pages to the free list. + * @param[in] mc Cursor on the DB to free. + * @param[in] subs non-Zero to check for sub-DBs in this DB. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_drop0(MDB_cursor *mc, int subs) +{ + int rc; + + rc = mdb_page_search(mc, NULL, MDB_PS_FIRST); + if (rc == MDB_SUCCESS) { + MDB_txn *txn = mc->mc_txn; + MDB_node *ni; + MDB_cursor mx; + unsigned int i; + + /* LEAF2 pages have no nodes, cannot have sub-DBs */ + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) + mdb_cursor_pop(mc); + + mdb_cursor_copy(mc, &mx); + while (mc->mc_snum > 0) { + MDB_page *mp = mc->mc_pg[mc->mc_top]; + unsigned n = NUMKEYS(mp); + if (IS_LEAF(mp)) { + for (i=0; i<n; i++) { + ni = NODEPTR(mp, i); + if (ni->mn_flags & F_BIGDATA) { + MDB_page *omp; + pgno_t pg; + memcpy(&pg, NODEDATA(ni), sizeof(pg)); + rc = mdb_page_get(txn, pg, &omp, NULL); + if (rc != 0) + goto done; + mdb_cassert(mc, IS_OVERFLOW(omp)); + rc = mdb_midl_append_range(&txn->mt_free_pgs, + pg, omp->mp_pages); + if (rc) + goto done; + } else if (subs && (ni->mn_flags & F_SUBDATA)) { + mdb_xcursor_init1(mc, ni); + rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); + if (rc) + goto done; + } + } + } else { + if ((rc = mdb_midl_need(&txn->mt_free_pgs, n)) != 0) + goto done; + for (i=0; i<n; i++) { + pgno_t pg; + ni = NODEPTR(mp, i); + pg = NODEPGNO(ni); + /* free it */ + mdb_midl_xappend(txn->mt_free_pgs, pg); + } + } + if (!mc->mc_top) + break; + mc->mc_ki[mc->mc_top] = i; + rc = mdb_cursor_sibling(mc, 1); + if (rc) { + if (rc != MDB_NOTFOUND) + goto done; + /* no more siblings, go back to beginning + * of previous level. + */ + mdb_cursor_pop(mc); + mc->mc_ki[0] = 0; + for (i=1; i<mc->mc_snum; i++) { + mc->mc_ki[i] = 0; + mc->mc_pg[i] = mx.mc_pg[i]; + } + } + } + /* free it */ + rc = mdb_midl_append(&txn->mt_free_pgs, mc->mc_db->md_root); +done: + if (rc) + txn->mt_flags |= MDB_TXN_ERROR; + } else if (rc == MDB_NOTFOUND) { + rc = MDB_SUCCESS; + } + return rc; +} + +int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del) +{ + MDB_cursor *mc, *m2; + int rc; + + if ((unsigned)del > 1 || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; + + if (dbi > MAIN_DBI && TXN_DBI_CHANGED(txn, dbi)) + return MDB_BAD_DBI; + + rc = mdb_cursor_open(txn, dbi, &mc); + if (rc) + return rc; + + rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT); + /* Invalidate the dropped DB's cursors */ + for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) + m2->mc_flags &= ~(C_INITIALIZED|C_EOF); + if (rc) + goto leave; + + /* Can't delete the main DB */ + if (del && dbi > MAIN_DBI) { + rc = mdb_del0(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, 0); + if (!rc) { + txn->mt_dbflags[dbi] = DB_STALE; + mdb_dbi_close(txn->mt_env, dbi); + } else { + txn->mt_flags |= MDB_TXN_ERROR; + } + } else { + /* reset the DB record, mark it dirty */ + txn->mt_dbflags[dbi] |= DB_DIRTY; + txn->mt_dbs[dbi].md_depth = 0; + txn->mt_dbs[dbi].md_branch_pages = 0; + txn->mt_dbs[dbi].md_leaf_pages = 0; + txn->mt_dbs[dbi].md_overflow_pages = 0; + txn->mt_dbs[dbi].md_entries = 0; + txn->mt_dbs[dbi].md_root = P_INVALID; + + txn->mt_flags |= MDB_TXN_DIRTY; + } +leave: + mdb_cursor_close(mc); + return rc; +} + +int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) +{ + if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + txn->mt_dbxs[dbi].md_cmp = cmp; + return MDB_SUCCESS; +} + +int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) +{ + if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + txn->mt_dbxs[dbi].md_dcmp = cmp; + return MDB_SUCCESS; +} + +int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel) +{ + if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + txn->mt_dbxs[dbi].md_rel = rel; + return MDB_SUCCESS; +} + +int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx) +{ + if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + txn->mt_dbxs[dbi].md_relctx = ctx; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_get_maxkeysize(MDB_env *env) +{ + return ENV_MAXKEY(env); +} + +int ESECT +mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) +{ + unsigned int i, rdrs; + MDB_reader *mr; + char buf[64]; + int rc = 0, first = 1; + + if (!env || !func) + return -1; + if (!env->me_txns) { + return func("(no reader locks)\n", ctx); + } + rdrs = env->me_txns->mti_numreaders; + mr = env->me_txns->mti_readers; + for (i=0; i<rdrs; i++) { + if (mr[i].mr_pid) { + txnid_t txnid = mr[i].mr_txnid; + sprintf(buf, txnid == (txnid_t)-1 ? + "%10d %"Z"x -\n" : "%10d %"Z"x %"Z"u\n", + (int)mr[i].mr_pid, (size_t)mr[i].mr_tid, txnid); + if (first) { + first = 0; + rc = func(" pid thread txnid\n", ctx); + if (rc < 0) + break; + } + rc = func(buf, ctx); + if (rc < 0) + break; + } + } + if (first) { + rc = func("(no active readers)\n", ctx); + } + return rc; +} + +/** Insert pid into list if not already present. + * return -1 if already present. + */ +static int ESECT +mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid) +{ + /* binary search of pid in list */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = ids[0]; + + while( 0 < n ) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = pid - ids[cursor]; + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor; + n -= pivot + 1; + + } else { + /* found, so it's a duplicate */ + return -1; + } + } + + if( val > 0 ) { + ++cursor; + } + ids[0]++; + for (n = ids[0]; n > cursor; n--) + ids[n] = ids[n-1]; + ids[n] = pid; + return 0; +} + +int ESECT +mdb_reader_check(MDB_env *env, int *dead) +{ + unsigned int i, j, rdrs; + MDB_reader *mr; + MDB_PID_T *pids, pid; + int count = 0; + + if (!env) + return EINVAL; + if (dead) + *dead = 0; + if (!env->me_txns) + return MDB_SUCCESS; + rdrs = env->me_txns->mti_numreaders; + pids = malloc((rdrs+1) * sizeof(MDB_PID_T)); + if (!pids) + return ENOMEM; + pids[0] = 0; + mr = env->me_txns->mti_readers; + for (i=0; i<rdrs; i++) { + if (mr[i].mr_pid && mr[i].mr_pid != env->me_pid) { + pid = mr[i].mr_pid; + if (mdb_pid_insert(pids, pid) == 0) { + if (!mdb_reader_pid(env, Pidcheck, pid)) { + LOCK_MUTEX_R(env); + /* Recheck, a new process may have reused pid */ + if (!mdb_reader_pid(env, Pidcheck, pid)) { + for (j=i; j<rdrs; j++) + if (mr[j].mr_pid == pid) { + DPRINTF(("clear stale reader pid %u txn %"Z"d", + (unsigned) pid, mr[j].mr_txnid)); + mr[j].mr_pid = 0; + count++; + } + } + UNLOCK_MUTEX_R(env); + } + } + } + } + free(pids); + if (dead) + *dead = count; + return MDB_SUCCESS; +} +/** @} */ diff --git a/external/db_drivers/liblmdb32/mdb_copy.1 b/external/db_drivers/liblmdb32/mdb_copy.1 new file mode 100644 index 000000000..094b26056 --- /dev/null +++ b/external/db_drivers/liblmdb32/mdb_copy.1 @@ -0,0 +1,54 @@ +.TH MDB_COPY 1 "2014/06/20" "LMDB 0.9.14" +.\" Copyright 2012-2014 Howard Chu, Symas Corp. All Rights Reserved. +.\" Copying restrictions apply. See COPYRIGHT/LICENSE. +.SH NAME +mdb_copy \- LMDB environment copy tool +.SH SYNOPSIS +.B mdb_copy +[\c +.BR \-V ] +[\c +.BR \-c ] +[\c +.BR \-n ] +.B srcpath +[\c +.BR dstpath ] +.SH DESCRIPTION +The +.B mdb_copy +utility copies an LMDB environment. The environment can +be copied regardless of whether it is currently in use. +No lockfile is created, since it gets recreated at need. + +If +.I dstpath +is specified it must be the path of an empty directory +for storing the backup. Otherwise, the backup will be +written to stdout. + +.SH OPTIONS +.TP +.BR \-V +Write the library version number to the standard output, and exit. +.TP +.BR \-c +Compact while copying. Only current data pages will be copied; freed +or unused pages will be omitted from the copy. This option will +slow down the backup process as it is more CPU-intensive. +.TP +.BR \-n +Open LDMB environment(s) which do not use subdirectories. + +.SH DIAGNOSTICS +Exit status is zero if no errors occur. +Errors result in a non-zero exit status and +a diagnostic message being written to standard error. +.SH CAVEATS +This utility can trigger significant file size growth if run +in parallel with write transactions, because pages which they +free during copying cannot be reused until the copy is done. +.SH "SEE ALSO" +.BR mdb_stat (1) +.SH AUTHOR +Howard Chu of Symas Corporation <http://www.symas.com> diff --git a/external/db_drivers/liblmdb32/mdb_copy.c b/external/db_drivers/liblmdb32/mdb_copy.c new file mode 100644 index 000000000..e7f965c03 --- /dev/null +++ b/external/db_drivers/liblmdb32/mdb_copy.c @@ -0,0 +1,82 @@ +/* mdb_copy.c - memory-mapped database backup tool */ +/* + * Copyright 2012 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#ifdef _WIN32 +#include <windows.h> +#define MDB_STDOUT GetStdHandle(STD_OUTPUT_HANDLE) +#else +#define MDB_STDOUT 1 +#endif +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include "lmdb.h" + +static void +sighandle(int sig) +{ +} + +int main(int argc,char * argv[]) +{ + int rc; + MDB_env *env; + const char *progname = argv[0], *act; + unsigned flags = MDB_RDONLY; + unsigned cpflags = 0; + + for (; argc > 1 && argv[1][0] == '-'; argc--, argv++) { + if (argv[1][1] == 'n' && argv[1][2] == '\0') + flags |= MDB_NOSUBDIR; + else if (argv[1][1] == 'c' && argv[1][2] == '\0') + cpflags |= MDB_CP_COMPACT; + else if (argv[1][1] == 'V' && argv[1][2] == '\0') { + printf("%s\n", MDB_VERSION_STRING); + exit(0); + } else + argc = 0; + } + + if (argc<2 || argc>3) { + fprintf(stderr, "usage: %s [-V] [-c] [-n] srcpath [dstpath]\n", progname); + exit(EXIT_FAILURE); + } + +#ifdef SIGPIPE + signal(SIGPIPE, sighandle); +#endif +#ifdef SIGHUP + signal(SIGHUP, sighandle); +#endif + signal(SIGINT, sighandle); + signal(SIGTERM, sighandle); + + act = "opening environment"; + rc = mdb_env_create(&env); + if (rc == MDB_SUCCESS) { + rc = mdb_env_open(env, argv[1], flags, 0664); + } + if (rc == MDB_SUCCESS) { + act = "copying"; + if (argc == 2) + rc = mdb_env_copyfd2(env, MDB_STDOUT, cpflags); + else + rc = mdb_env_copy2(env, argv[2], cpflags); + } + if (rc) + fprintf(stderr, "%s: %s failed, error %d (%s)\n", + progname, act, rc, mdb_strerror(rc)); + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/external/db_drivers/liblmdb32/mdb_dump.1 b/external/db_drivers/liblmdb32/mdb_dump.1 new file mode 100644 index 000000000..6fcc93081 --- /dev/null +++ b/external/db_drivers/liblmdb32/mdb_dump.1 @@ -0,0 +1,75 @@ +.TH MDB_DUMP 1 "2014/06/20" "LMDB 0.9.14" +.\" Copyright 2014 Howard Chu, Symas Corp. All Rights Reserved. +.\" Copying restrictions apply. See COPYRIGHT/LICENSE. +.SH NAME +mdb_dump \- LMDB environment export tool +.SH SYNOPSIS +.B mdb_dump +.BR \ envpath +[\c +.BR \-V ] +[\c +.BI \-f \ file\fR] +[\c +.BR \-l ] +[\c +.BR \-n ] +[\c +.BR \-p ] +[\c +.BR \-a \ | +.BI \-s \ subdb\fR] +.SH DESCRIPTION +The +.B mdb_dump +utility reads a database and writes its contents to the +standard output using a portable flat-text format +understood by the +.BR mdb_load (1) +utility. +.SH OPTIONS +.TP +.BR \-V +Write the library version number to the standard output, and exit. +.TP +.BR \-f \ file +Write to the specified file instead of to the standard output. +.TP +.BR \-l +List the databases stored in the environment. Just the +names will be listed, no data will be output. +.TP +.BR \-n +Dump an LMDB database which does not use subdirectories. +.TP +.BR \-p +If characters in either the key or data items are printing characters (as +defined by isprint(3)), output them directly. This option permits users to +use standard text editors and tools to modify the contents of databases. + +Note: different systems may have different notions about what characters +are considered printing characters, and databases dumped in this manner may +be less portable to external systems. +.TP +.BR \-a +Dump all of the subdatabases in the environment. +.TP +.BR \-s \ subdb +Dump a specific subdatabase. If no database is specified, only the main database is dumped. +.SH DIAGNOSTICS +Exit status is zero if no errors occur. +Errors result in a non-zero exit status and +a diagnostic message being written to standard error. + +Dumping and reloading databases that use user-defined comparison functions +will result in new databases that use the default comparison functions. +\fBIn this case it is quite likely that the reloaded database will be +damaged beyond repair permitting neither record storage nor retrieval.\fP + +The only available workaround is to modify the source for the +.BR mdb_load (1) +utility to load the database using the correct comparison functions. +.SH "SEE ALSO" +.BR mdb_load (1) +.SH AUTHOR +Howard Chu of Symas Corporation <http://www.symas.com> diff --git a/external/db_drivers/liblmdb32/mdb_dump.c b/external/db_drivers/liblmdb32/mdb_dump.c new file mode 100644 index 000000000..0eb85fd20 --- /dev/null +++ b/external/db_drivers/liblmdb32/mdb_dump.c @@ -0,0 +1,317 @@ +/* mdb_dump.c - memory-mapped database dump tool */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include <signal.h> +#include "lmdb.h" + +#ifdef _WIN32 +#define Z "I" +#else +#define Z "z" +#endif + +#define PRINT 1 +static int mode; + +typedef struct flagbit { + int bit; + char *name; +} flagbit; + +flagbit dbflags[] = { + { MDB_REVERSEKEY, "reversekey" }, + { MDB_DUPSORT, "dupsort" }, + { MDB_INTEGERKEY, "integerkey" }, + { MDB_DUPFIXED, "dupfixed" }, + { MDB_INTEGERDUP, "integerdup" }, + { MDB_REVERSEDUP, "reversedup" }, + { 0, NULL } +}; + +static volatile sig_atomic_t gotsig; + +static void dumpsig( int sig ) +{ + gotsig=1; +} + +static const char hexc[] = "0123456789abcdef"; + +static void hex(unsigned char c) +{ + putchar(hexc[c >> 4]); + putchar(hexc[c & 0xf]); +} + +static void text(MDB_val *v) +{ + unsigned char *c, *end; + + putchar(' '); + c = v->mv_data; + end = c + v->mv_size; + while (c < end) { + if (isprint(*c)) { + putchar(*c); + } else { + putchar('\\'); + hex(*c); + } + c++; + } + putchar('\n'); +} + +static void byte(MDB_val *v) +{ + unsigned char *c, *end; + + putchar(' '); + c = v->mv_data; + end = c + v->mv_size; + while (c < end) { + hex(*c++); + } + putchar('\n'); +} + +/* Dump in BDB-compatible format */ +static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) +{ + MDB_cursor *mc; + MDB_stat ms; + MDB_val key, data; + MDB_envinfo info; + unsigned int flags; + int rc, i; + + rc = mdb_dbi_flags(txn, dbi, &flags); + if (rc) return rc; + + rc = mdb_stat(txn, dbi, &ms); + if (rc) return rc; + + rc = mdb_env_info(mdb_txn_env(txn), &info); + if (rc) return rc; + + printf("VERSION=3\n"); + printf("format=%s\n", mode & PRINT ? "print" : "bytevalue"); + if (name) + printf("database=%s\n", name); + printf("type=btree\n"); + printf("mapsize=%" Z "u\n", info.me_mapsize); + if (info.me_mapaddr) + printf("mapaddr=%p\n", info.me_mapaddr); + printf("maxreaders=%u\n", info.me_maxreaders); + + if (flags & MDB_DUPSORT) + printf("duplicates=1\n"); + + for (i=0; dbflags[i].bit; i++) + if (flags & dbflags[i].bit) + printf("%s=1\n", dbflags[i].name); + + printf("db_pagesize=%d\n", ms.ms_psize); + printf("HEADER=END\n"); + + rc = mdb_cursor_open(txn, dbi, &mc); + if (rc) return rc; + + while ((rc = mdb_cursor_get(mc, &key, &data, MDB_NEXT) == MDB_SUCCESS)) { + if (gotsig) { + rc = EINTR; + break; + } + if (mode & PRINT) { + text(&key); + text(&data); + } else { + byte(&key); + byte(&data); + } + } + printf("DATA=END\n"); + if (rc == MDB_NOTFOUND) + rc = MDB_SUCCESS; + + return rc; +} + +static void usage(char *prog) +{ + fprintf(stderr, "usage: %s dbpath [-V] [-f output] [-l] [-n] [-p] [-a|-s subdb]\n", prog); + exit(EXIT_FAILURE); +} + +int main(int argc, char *argv[]) +{ + int i, rc; + MDB_env *env; + MDB_txn *txn; + MDB_dbi dbi; + char *prog = argv[0]; + char *envname; + char *subname = NULL; + int alldbs = 0, envflags = 0, list = 0; + + if (argc < 2) { + usage(prog); + } + + /* -a: dump main DB and all subDBs + * -s: dump only the named subDB + * -n: use NOSUBDIR flag on env_open + * -p: use printable characters + * -f: write to file instead of stdout + * -V: print version and exit + * (default) dump only the main DB + */ + while ((i = getopt(argc, argv, "af:lnps:V")) != EOF) { + switch(i) { + case 'V': + printf("%s\n", MDB_VERSION_STRING); + exit(0); + break; + case 'l': + list = 1; + /*FALLTHROUGH*/; + case 'a': + if (subname) + usage(prog); + alldbs++; + break; + case 'f': + if (freopen(optarg, "w", stdout) == NULL) { + fprintf(stderr, "%s: %s: reopen: %s\n", + prog, optarg, strerror(errno)); + exit(EXIT_FAILURE); + } + break; + case 'n': + envflags |= MDB_NOSUBDIR; + break; + case 'p': + mode |= PRINT; + break; + case 's': + if (alldbs) + usage(prog); + subname = optarg; + break; + default: + usage(prog); + } + } + + if (optind != argc - 1) + usage(prog); + +#ifdef SIGPIPE + signal(SIGPIPE, dumpsig); +#endif +#ifdef SIGHUP + signal(SIGHUP, dumpsig); +#endif + signal(SIGINT, dumpsig); + signal(SIGTERM, dumpsig); + + envname = argv[optind]; + rc = mdb_env_create(&env); + if (rc) { + fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc)); + return EXIT_FAILURE; + } + + if (alldbs || subname) { + mdb_env_set_maxdbs(env, 2); + } + + rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); + if (rc) { + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) { + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + rc = mdb_open(txn, subname, 0, &dbi); + if (rc) { + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + + if (alldbs) { + MDB_cursor *cursor; + MDB_val key; + int count = 0; + + rc = mdb_cursor_open(txn, dbi, &cursor); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) { + char *str; + MDB_dbi db2; + if (memchr(key.mv_data, '\0', key.mv_size)) + continue; + count++; + str = malloc(key.mv_size+1); + memcpy(str, key.mv_data, key.mv_size); + str[key.mv_size] = '\0'; + rc = mdb_open(txn, str, 0, &db2); + if (rc == MDB_SUCCESS) { + if (list) { + printf("%s\n", str); + list++; + } else { + rc = dumpit(txn, db2, str); + if (rc) + break; + } + mdb_close(env, db2); + } + free(str); + if (rc) continue; + } + mdb_cursor_close(cursor); + if (!count) { + fprintf(stderr, "%s: %s does not contain multiple databases\n", prog, envname); + rc = MDB_NOTFOUND; + } else if (rc == MDB_NOTFOUND) { + rc = MDB_SUCCESS; + } + } else { + rc = dumpit(txn, dbi, subname); + } + if (rc && rc != MDB_NOTFOUND) + fprintf(stderr, "%s: %s: %s\n", prog, envname, mdb_strerror(rc)); + + mdb_close(env, dbi); +txn_abort: + mdb_txn_abort(txn); +env_close: + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/external/db_drivers/liblmdb32/mdb_load.1 b/external/db_drivers/liblmdb32/mdb_load.1 new file mode 100644 index 000000000..728024010 --- /dev/null +++ b/external/db_drivers/liblmdb32/mdb_load.1 @@ -0,0 +1,77 @@ +.TH MDB_LOAD 1 "2014/06/20" "LMDB 0.9.14" +.\" Copyright 2014 Howard Chu, Symas Corp. All Rights Reserved. +.\" Copying restrictions apply. See COPYRIGHT/LICENSE. +.SH NAME +mdb_load \- LMDB environment import tool +.SH SYNOPSIS +.B mdb_load +.BR \ envpath +[\c +.BR \-V ] +[\c +.BI \-f \ file\fR] +[\c +.BR \-n ] +[\c +.BI \-s \ subdb\fR] +[\c +.BR \-N ] +[\c +.BR \-T ] +.SH DESCRIPTION +The +.B mdb_load +utility reads from the standard input and loads it into the +LMDB environment +.BR envpath . + +The input to +.B mdb_load +must be in the output format specified by the +.BR mdb_dump (1) +utility or as specified by the +.B -T +option below. +.SH OPTIONS +.TP +.BR \-V +Write the library version number to the standard output, and exit. +.TP +.BR \-f \ file +Read from the specified file instead of from the standard input. +.TP +.BR \-n +Load an LMDB database which does not use subdirectories. +.TP +.BR \-s \ subdb +Load a specific subdatabase. If no database is specified, data is loaded into the main database. +.TP +.BR \-N +Don't overwrite existing records when loading into an already existing database; just skip them. +.TP +.BR \-T +Load data from simple text files. The input must be paired lines of text, where the first +line of the pair is the key item, and the second line of the pair is its corresponding +data item. + +A simple escape mechanism, where newline and backslash (\\) characters are special, is +applied to the text input. Newline characters are interpreted as record separators. +Backslash characters in the text will be interpreted in one of two ways: If the backslash +character precedes another backslash character, the pair will be interpreted as a literal +backslash. If the backslash character precedes any other character, the two characters +following the backslash will be interpreted as a hexadecimal specification of a single +character; for example, \\0a is a newline character in the ASCII character set. + +For this reason, any backslash or newline characters that naturally occur in the text +input must be escaped to avoid misinterpretation by +.BR mdb_load . + +.SH DIAGNOSTICS +Exit status is zero if no errors occur. +Errors result in a non-zero exit status and +a diagnostic message being written to standard error. + +.SH "SEE ALSO" +.BR mdb_dump (1) +.SH AUTHOR +Howard Chu of Symas Corporation <http://www.symas.com> diff --git a/external/db_drivers/liblmdb32/mdb_load.c b/external/db_drivers/liblmdb32/mdb_load.c new file mode 100644 index 000000000..e0d95e13c --- /dev/null +++ b/external/db_drivers/liblmdb32/mdb_load.c @@ -0,0 +1,452 @@ +/* mdb_load.c - memory-mapped database load tool */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include "lmdb.h" + +#define PRINT 1 +#define NOHDR 2 +static int mode; + +static char *subname = NULL; + +static size_t lineno; +static int version; + +static int flags; + +static char *prog; + +static int Eof; + +static MDB_envinfo info; + +static MDB_val kbuf, dbuf; + +#ifdef _WIN32 +#define Z "I" +#else +#define Z "z" +#endif + +#define STRLENOF(s) (sizeof(s)-1) + +typedef struct flagbit { + int bit; + char *name; + int len; +} flagbit; + +#define S(s) s, STRLENOF(s) + +flagbit dbflags[] = { + { MDB_REVERSEKEY, S("reversekey") }, + { MDB_DUPSORT, S("dupsort") }, + { MDB_INTEGERKEY, S("integerkey") }, + { MDB_DUPFIXED, S("dupfixed") }, + { MDB_INTEGERDUP, S("integerdup") }, + { MDB_REVERSEDUP, S("reversedup") }, + { 0, NULL, 0 } +}; + +static void readhdr(void) +{ + char *ptr; + + while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) { + lineno++; + if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) { + version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION=")); + if (version > 3) { + fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n", + prog, lineno, version); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) { + break; + } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) { + if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print"))) + mode |= PRINT; + else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) { + fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) { + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + if (subname) free(subname); + subname = strdup((char *)dbuf.mv_data+STRLENOF("database=")); + } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) { + if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) { + fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("type=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr); + if (i != 1) { + fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize); + if (i != 1) { + fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders); + if (i != 1) { + fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders=")); + exit(EXIT_FAILURE); + } + } else { + int i; + for (i=0; dbflags[i].bit; i++) { + if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) && + ((char *)dbuf.mv_data)[dbflags[i].len] == '=') { + flags |= dbflags[i].bit; + break; + } + } + if (!dbflags[i].bit) { + ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size); + if (!ptr) { + fprintf(stderr, "%s: line %" Z "d: unexpected format\n", + prog, lineno); + exit(EXIT_FAILURE); + } else { + *ptr = '\0'; + fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n", + prog, lineno, (char *)dbuf.mv_data); + } + } + } + } +} + +static void badend(void) +{ + fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n", + prog, lineno); +} + +static int unhex(unsigned char *c2) +{ + int x, c; + x = *c2++ & 0x4f; + if (x & 0x40) + x -= 55; + c = x << 4; + x = *c2 & 0x4f; + if (x & 0x40) + x -= 55; + c |= x; + return c; +} + +static int readline(MDB_val *out, MDB_val *buf) +{ + unsigned char *c1, *c2, *end; + size_t len; + int c; + + if (!(mode & NOHDR)) { + c = fgetc(stdin); + if (c == EOF) { + Eof = 1; + return EOF; + } + if (c != ' ') { + lineno++; + if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { +badend: + Eof = 1; + badend(); + return EOF; + } + if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END"))) + return EOF; + goto badend; + } + } + if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { + Eof = 1; + return EOF; + } + lineno++; + + c1 = buf->mv_data; + len = strlen((char *)c1); + + /* Is buffer too short? */ + while (c1[len-1] != '\n') { + buf->mv_data = realloc(buf->mv_data, buf->mv_size*2); + if (!buf->mv_data) { + Eof = 1; + fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n", + prog, lineno); + return EOF; + } + c1 = buf->mv_data; + c1 += buf->mv_size; + if (fgets((char *)c1, buf->mv_size, stdin) == NULL) { + Eof = 1; + badend(); + return EOF; + } + buf->mv_size *= 2; + len = strlen((char *)c1); + } + c1 = c2 = buf->mv_data; + len = strlen((char *)c1); + c1[--len] = '\0'; + end = c1 + len; + + if (mode & PRINT) { + while (c2 < end) { + if (*c2 == '\\') { + if (c2[1] == '\\') { + c1++; c2 += 2; + } else { + if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) { + Eof = 1; + badend(); + return EOF; + } + *c1++ = unhex(++c2); + c2 += 2; + } + } else { + c1++; c2++; + } + } + } else { + /* odd length not allowed */ + if (len & 1) { + Eof = 1; + badend(); + return EOF; + } + while (c2 < end) { + if (!isxdigit(*c2) || !isxdigit(c2[1])) { + Eof = 1; + badend(); + return EOF; + } + *c1++ = unhex(c2); + c2 += 2; + } + } + c2 = out->mv_data = buf->mv_data; + out->mv_size = c1 - c2; + + return 0; +} + +static void usage(void) +{ + fprintf(stderr, "usage: %s dbpath [-V] [-f input] [-n] [-s name] [-N] [-T]\n", prog); + exit(EXIT_FAILURE); +} + +int main(int argc, char *argv[]) +{ + int i, rc; + MDB_env *env; + MDB_txn *txn; + MDB_cursor *mc; + MDB_dbi dbi; + char *envname; + int envflags = 0, putflags = 0; + int dohdr = 0; + + prog = argv[0]; + + if (argc < 2) { + usage(); + } + + /* -f: load file instead of stdin + * -n: use NOSUBDIR flag on env_open + * -s: load into named subDB + * -N: use NOOVERWRITE on puts + * -T: read plaintext + * -V: print version and exit + */ + while ((i = getopt(argc, argv, "f:ns:NTV")) != EOF) { + switch(i) { + case 'V': + printf("%s\n", MDB_VERSION_STRING); + exit(0); + break; + case 'f': + if (freopen(optarg, "r", stdin) == NULL) { + fprintf(stderr, "%s: %s: reopen: %s\n", + prog, optarg, strerror(errno)); + exit(EXIT_FAILURE); + } + break; + case 'n': + envflags |= MDB_NOSUBDIR; + break; + case 's': + subname = strdup(optarg); + break; + case 'N': + putflags = MDB_NOOVERWRITE|MDB_NODUPDATA; + break; + case 'T': + mode |= NOHDR; + break; + default: + usage(); + } + } + + if (optind != argc - 1) + usage(); + + dbuf.mv_size = 4096; + dbuf.mv_data = malloc(dbuf.mv_size); + + if (!(mode & NOHDR)) + readhdr(); + + envname = argv[optind]; + rc = mdb_env_create(&env); + if (rc) { + fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc)); + return EXIT_FAILURE; + } + + mdb_env_set_maxdbs(env, 2); + + if (info.me_maxreaders) + mdb_env_set_maxreaders(env, info.me_maxreaders); + + if (info.me_mapsize) + mdb_env_set_mapsize(env, info.me_mapsize); + + if (info.me_mapaddr) + envflags |= MDB_FIXEDMAP; + + rc = mdb_env_open(env, envname, envflags, 0664); + if (rc) { + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2; + kbuf.mv_data = malloc(kbuf.mv_size); + + while(!Eof) { + MDB_val key, data; + int batch = 0; + flags = 0; + + if (!dohdr) { + dohdr = 1; + } else if (!(mode & NOHDR)) + readhdr(); + + rc = mdb_txn_begin(env, NULL, 0, &txn); + if (rc) { + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi); + if (rc) { + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + + rc = mdb_cursor_open(txn, dbi, &mc); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + + while(1) { + rc = readline(&key, &kbuf); + if (rc == EOF) + break; + if (rc) + goto txn_abort; + + rc = readline(&data, &dbuf); + if (rc) + goto txn_abort; + + rc = mdb_cursor_put(mc, &key, &data, putflags); + if (rc == MDB_KEYEXIST && putflags) + continue; + if (rc) + goto txn_abort; + batch++; + if (batch == 100) { + rc = mdb_txn_commit(txn); + if (rc) { + fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", + prog, lineno, mdb_strerror(rc)); + goto env_close; + } + rc = mdb_txn_begin(env, NULL, 0, &txn); + if (rc) { + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + rc = mdb_cursor_open(txn, dbi, &mc); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + batch = 0; + } + } + rc = mdb_txn_commit(txn); + txn = NULL; + if (rc) { + fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", + prog, lineno, mdb_strerror(rc)); + goto env_close; + } + mdb_dbi_close(env, dbi); + } + +txn_abort: + mdb_txn_abort(txn); +env_close: + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/external/db_drivers/liblmdb32/mdb_stat.1 b/external/db_drivers/liblmdb32/mdb_stat.1 new file mode 100644 index 000000000..3d8d461d9 --- /dev/null +++ b/external/db_drivers/liblmdb32/mdb_stat.1 @@ -0,0 +1,64 @@ +.TH MDB_STAT 1 "2014/06/20" "LMDB 0.9.14" +.\" Copyright 2012-2014 Howard Chu, Symas Corp. All Rights Reserved. +.\" Copying restrictions apply. See COPYRIGHT/LICENSE. +.SH NAME +mdb_stat \- LMDB environment status tool +.SH SYNOPSIS +.B mdb_stat +.BR \ envpath +[\c +.BR \-V ] +[\c +.BR \-e ] +[\c +.BR \-f [ f [ f ]]] +[\c +.BR \-n ] +[\c +.BR \-r [ r ]] +[\c +.BR \-a \ | +.BI \-s \ subdb\fR] +.SH DESCRIPTION +The +.B mdb_stat +utility displays the status of an LMDB environment. +.SH OPTIONS +.TP +.BR \-V +Write the library version number to the standard output, and exit. +.TP +.BR \-e +Display information about the database environment. +.TP +.BR \-f +Display information about the environment freelist. +If \fB\-ff\fP is given, summarize each freelist entry. +If \fB\-fff\fP is given, display the full list of page IDs in the freelist. +.TP +.BR \-n +Display the status of an LMDB database which does not use subdirectories. +.TP +.BR \-r +Display information about the environment reader table. +Shows the process ID, thread ID, and transaction ID for each active +reader slot. The process ID and transaction ID are in decimal, the +thread ID is in hexadecimal. The transaction ID is displayed as "-" +if the reader does not currently have a read transaction open. +If \fB\-rr\fP is given, check for stale entries in the reader +table and clear them. The reader table will be printed again +after the check is performed. +.TP +.BR \-a +Display the status of all of the subdatabases in the environment. +.TP +.BR \-s \ subdb +Display the status of a specific subdatabase. +.SH DIAGNOSTICS +Exit status is zero if no errors occur. +Errors result in a non-zero exit status and +a diagnostic message being written to standard error. +.SH "SEE ALSO" +.BR mdb_copy (1) +.SH AUTHOR +Howard Chu of Symas Corporation <http://www.symas.com> diff --git a/external/db_drivers/liblmdb32/mdb_stat.c b/external/db_drivers/liblmdb32/mdb_stat.c new file mode 100644 index 000000000..1e9229296 --- /dev/null +++ b/external/db_drivers/liblmdb32/mdb_stat.c @@ -0,0 +1,263 @@ +/* mdb_stat.c - memory-mapped database status tool */ +/* + * Copyright 2011-2013 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "lmdb.h" + +#ifdef _WIN32 +#define Z "I" +#else +#define Z "z" +#endif + +static void prstat(MDB_stat *ms) +{ +#if 0 + printf(" Page size: %u\n", ms->ms_psize); +#endif + printf(" Tree depth: %u\n", ms->ms_depth); + printf(" Branch pages: %"Z"u\n", ms->ms_branch_pages); + printf(" Leaf pages: %"Z"u\n", ms->ms_leaf_pages); + printf(" Overflow pages: %"Z"u\n", ms->ms_overflow_pages); + printf(" Entries: %"Z"u\n", ms->ms_entries); +} + +static void usage(char *prog) +{ + fprintf(stderr, "usage: %s dbpath [-V] [-n] [-e] [-r[r]] [-f[f[f]]] [-a|-s subdb]\n", prog); + exit(EXIT_FAILURE); +} + +int main(int argc, char *argv[]) +{ + int i, rc; + MDB_env *env; + MDB_txn *txn; + MDB_dbi dbi; + MDB_stat mst; + MDB_envinfo mei; + char *prog = argv[0]; + char *envname; + char *subname = NULL; + int alldbs = 0, envinfo = 0, envflags = 0, freinfo = 0, rdrinfo = 0; + + if (argc < 2) { + usage(prog); + } + + /* -a: print stat of main DB and all subDBs + * -s: print stat of only the named subDB + * -e: print env info + * -f: print freelist info + * -r: print reader info + * -n: use NOSUBDIR flag on env_open + * -V: print version and exit + * (default) print stat of only the main DB + */ + while ((i = getopt(argc, argv, "Vaefnrs:")) != EOF) { + switch(i) { + case 'V': + printf("%s\n", MDB_VERSION_STRING); + exit(0); + break; + case 'a': + if (subname) + usage(prog); + alldbs++; + break; + case 'e': + envinfo++; + break; + case 'f': + freinfo++; + break; + case 'n': + envflags |= MDB_NOSUBDIR; + break; + case 'r': + rdrinfo++; + break; + case 's': + if (alldbs) + usage(prog); + subname = optarg; + break; + default: + usage(prog); + } + } + + if (optind != argc - 1) + usage(prog); + + envname = argv[optind]; + rc = mdb_env_create(&env); + if (rc) { + fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc)); + return EXIT_FAILURE; + } + + if (alldbs || subname) { + mdb_env_set_maxdbs(env, 4); + } + + rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); + if (rc) { + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + if (envinfo) { + (void)mdb_env_stat(env, &mst); + (void)mdb_env_info(env, &mei); + printf("Environment Info\n"); + printf(" Map address: %p\n", mei.me_mapaddr); + printf(" Map size: %"Z"u\n", mei.me_mapsize); + printf(" Page size: %u\n", mst.ms_psize); + printf(" Max pages: %"Z"u\n", mei.me_mapsize / mst.ms_psize); + printf(" Number of pages used: %"Z"u\n", mei.me_last_pgno+1); + printf(" Last transaction ID: %"Z"u\n", mei.me_last_txnid); + printf(" Max readers: %u\n", mei.me_maxreaders); + printf(" Number of readers used: %u\n", mei.me_numreaders); + } + + if (rdrinfo) { + printf("Reader Table Status\n"); + rc = mdb_reader_list(env, (MDB_msg_func *)fputs, stdout); + if (rdrinfo > 1) { + int dead; + mdb_reader_check(env, &dead); + printf(" %d stale readers cleared.\n", dead); + rc = mdb_reader_list(env, (MDB_msg_func *)fputs, stdout); + } + if (!(subname || alldbs || freinfo)) + goto env_close; + } + + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) { + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + if (freinfo) { + MDB_cursor *cursor; + MDB_val key, data; + size_t pages = 0, *iptr; + + printf("Freelist Status\n"); + dbi = 0; + rc = mdb_cursor_open(txn, dbi, &cursor); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + rc = mdb_stat(txn, dbi, &mst); + if (rc) { + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + prstat(&mst); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + iptr = data.mv_data; + pages += *iptr; + if (freinfo > 1) { + char *bad = ""; + size_t pg, prev; + ssize_t i, j, span = 0; + j = *iptr++; + for (i = j, prev = 1; --i >= 0; ) { + pg = iptr[i]; + if (pg <= prev) + bad = " [bad sequence]"; + prev = pg; + pg += span; + for (; i >= span && iptr[i-span] == pg; span++, pg++) ; + } + printf(" Transaction %"Z"u, %"Z"d pages, maxspan %"Z"d%s\n", + *(size_t *)key.mv_data, j, span, bad); + if (freinfo > 2) { + for (--j; j >= 0; ) { + pg = iptr[j]; + for (span=1; --j >= 0 && iptr[j] == pg+span; span++) ; + printf(span>1 ? " %9"Z"u[%"Z"d]\n" : " %9"Z"u\n", + pg, span); + } + } + } + } + mdb_cursor_close(cursor); + printf(" Free pages: %"Z"u\n", pages); + } + + rc = mdb_open(txn, subname, 0, &dbi); + if (rc) { + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + + rc = mdb_stat(txn, dbi, &mst); + if (rc) { + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + printf("Status of %s\n", subname ? subname : "Main DB"); + prstat(&mst); + + if (alldbs) { + MDB_cursor *cursor; + MDB_val key; + + rc = mdb_cursor_open(txn, dbi, &cursor); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) { + char *str; + MDB_dbi db2; + if (memchr(key.mv_data, '\0', key.mv_size)) + continue; + str = malloc(key.mv_size+1); + memcpy(str, key.mv_data, key.mv_size); + str[key.mv_size] = '\0'; + rc = mdb_open(txn, str, 0, &db2); + if (rc == MDB_SUCCESS) + printf("Status of %s\n", str); + free(str); + if (rc) continue; + rc = mdb_stat(txn, db2, &mst); + if (rc) { + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + prstat(&mst); + mdb_close(env, db2); + } + mdb_cursor_close(cursor); + } + + if (rc == MDB_NOTFOUND) + rc = MDB_SUCCESS; + + mdb_close(env, dbi); +txn_abort: + mdb_txn_abort(txn); +env_close: + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/external/db_drivers/liblmdb32/midl.c b/external/db_drivers/liblmdb32/midl.c new file mode 100644 index 000000000..88a3aff10 --- /dev/null +++ b/external/db_drivers/liblmdb32/midl.c @@ -0,0 +1,360 @@ +/** @file midl.c + * @brief ldap bdb back-end ID List functions */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software <http://www.openldap.org/>. + * + * Copyright 2000-2014 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +#include <limits.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include "midl.h" + +/** @defgroup internal LMDB Internals + * @{ + */ +/** @defgroup idls ID List Management + * @{ + */ +#define CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) ) + +unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id ) +{ + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = ids[0]; + + while( 0 < n ) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = CMP( ids[cursor], id ); + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor; + n -= pivot + 1; + + } else { + return cursor; + } + } + + if( val > 0 ) { + ++cursor; + } + return cursor; +} + +#if 0 /* superseded by append/sort */ +int mdb_midl_insert( MDB_IDL ids, MDB_ID id ) +{ + unsigned x, i; + + x = mdb_midl_search( ids, id ); + assert( x > 0 ); + + if( x < 1 ) { + /* internal error */ + return -2; + } + + if ( x <= ids[0] && ids[x] == id ) { + /* duplicate */ + assert(0); + return -1; + } + + if ( ++ids[0] >= MDB_IDL_DB_MAX ) { + /* no room */ + --ids[0]; + return -2; + + } else { + /* insert id */ + for (i=ids[0]; i>x; i--) + ids[i] = ids[i-1]; + ids[x] = id; + } + + return 0; +} +#endif + +MDB_IDL mdb_midl_alloc(int num) +{ + MDB_IDL ids = malloc((num+2) * sizeof(MDB_ID)); + if (ids) { + *ids++ = num; + *ids = 0; + } + return ids; +} + +void mdb_midl_free(MDB_IDL ids) +{ + if (ids) + free(ids-1); +} + +int mdb_midl_shrink( MDB_IDL *idp ) +{ + MDB_IDL ids = *idp; + if (*(--ids) > MDB_IDL_UM_MAX && + (ids = realloc(ids, (MDB_IDL_UM_MAX+1) * sizeof(MDB_ID)))) + { + *ids++ = MDB_IDL_UM_MAX; + *idp = ids; + return 1; + } + return 0; +} + +static int mdb_midl_grow( MDB_IDL *idp, int num ) +{ + MDB_IDL idn = *idp-1; + /* grow it */ + idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID)); + if (!idn) + return ENOMEM; + *idn++ += num; + *idp = idn; + return 0; +} + +int mdb_midl_need( MDB_IDL *idp, unsigned num ) +{ + MDB_IDL ids = *idp; + num += ids[0]; + if (num > ids[-1]) { + num = (num + num/4 + (256 + 2)) & -256; + if (!(ids = realloc(ids-1, num * sizeof(MDB_ID)))) + return ENOMEM; + *ids++ = num - 2; + *idp = ids; + } + return 0; +} + +int mdb_midl_append( MDB_IDL *idp, MDB_ID id ) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] >= ids[-1]) { + if (mdb_midl_grow(idp, MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0]++; + ids[ids[0]] = id; + return 0; +} + +int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app ) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] + app[0] >= ids[-1]) { + if (mdb_midl_grow(idp, app[0])) + return ENOMEM; + ids = *idp; + } + memcpy(&ids[ids[0]+1], &app[1], app[0] * sizeof(MDB_ID)); + ids[0] += app[0]; + return 0; +} + +int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n ) +{ + MDB_ID *ids = *idp, len = ids[0]; + /* Too big? */ + if (len + n > ids[-1]) { + if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0] = len + n; + ids += len; + while (n) + ids[n--] = id++; + return 0; +} + +void mdb_midl_xmerge( MDB_IDL idl, MDB_IDL merge ) +{ + MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i+j, total = k; + idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */ + old_id = idl[j]; + while (i) { + merge_id = merge[i--]; + for (; old_id < merge_id; old_id = idl[--j]) + idl[k--] = old_id; + idl[k--] = merge_id; + } + idl[0] = total; +} + +/* Quicksort + Insertion sort for small arrays */ + +#define SMALL 8 +#define MIDL_SWAP(a,b) { itmp=(a); (a)=(b); (b)=itmp; } + +void +mdb_midl_sort( MDB_IDL ids ) +{ + /* Max possible depth of int-indexed tree * 2 items/level */ + int istack[sizeof(int)*CHAR_BIT * 2]; + int i,j,k,l,ir,jstack; + MDB_ID a, itmp; + + ir = (int)ids[0]; + l = 1; + jstack = 0; + for(;;) { + if (ir - l < SMALL) { /* Insertion sort */ + for (j=l+1;j<=ir;j++) { + a = ids[j]; + for (i=j-1;i>=1;i--) { + if (ids[i] >= a) break; + ids[i+1] = ids[i]; + } + ids[i+1] = a; + } + if (jstack == 0) break; + ir = istack[jstack--]; + l = istack[jstack--]; + } else { + k = (l + ir) >> 1; /* Choose median of left, center, right */ + MIDL_SWAP(ids[k], ids[l+1]); + if (ids[l] < ids[ir]) { + MIDL_SWAP(ids[l], ids[ir]); + } + if (ids[l+1] < ids[ir]) { + MIDL_SWAP(ids[l+1], ids[ir]); + } + if (ids[l] < ids[l+1]) { + MIDL_SWAP(ids[l], ids[l+1]); + } + i = l+1; + j = ir; + a = ids[l+1]; + for(;;) { + do i++; while(ids[i] > a); + do j--; while(ids[j] < a); + if (j < i) break; + MIDL_SWAP(ids[i],ids[j]); + } + ids[l+1] = ids[j]; + ids[j] = a; + jstack += 2; + if (ir-i+1 >= j-l) { + istack[jstack] = ir; + istack[jstack-1] = i; + ir = j-1; + } else { + istack[jstack] = j-1; + istack[jstack-1] = l; + l = i; + } + } + } +} + +unsigned mdb_mid2l_search( MDB_ID2L ids, MDB_ID id ) +{ + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = (unsigned)ids[0].mid; + + while( 0 < n ) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = CMP( id, ids[cursor].mid ); + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor; + n -= pivot + 1; + + } else { + return cursor; + } + } + + if( val > 0 ) { + ++cursor; + } + return cursor; +} + +int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id ) +{ + unsigned x, i; + + x = mdb_mid2l_search( ids, id->mid ); + + if( x < 1 ) { + /* internal error */ + return -2; + } + + if ( x <= ids[0].mid && ids[x].mid == id->mid ) { + /* duplicate */ + return -1; + } + + if ( ids[0].mid >= MDB_IDL_UM_MAX ) { + /* too big */ + return -2; + + } else { + /* insert id */ + ids[0].mid++; + for (i=(unsigned)ids[0].mid; i>x; i--) + ids[i] = ids[i-1]; + ids[x] = *id; + } + + return 0; +} + +int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ) +{ + /* Too big? */ + if (ids[0].mid >= MDB_IDL_UM_MAX) { + return -2; + } + ids[0].mid++; + ids[ids[0].mid] = *id; + return 0; +} + +/** @} */ +/** @} */ diff --git a/external/db_drivers/liblmdb32/midl.h b/external/db_drivers/liblmdb32/midl.h new file mode 100644 index 000000000..f23d7d3f3 --- /dev/null +++ b/external/db_drivers/liblmdb32/midl.h @@ -0,0 +1,190 @@ +/** @file midl.h + * @brief LMDB ID List header file. + * + * This file was originally part of back-bdb but has been + * modified for use in libmdb. Most of the macros defined + * in this file are unused, just left over from the original. + * + * This file is only used internally in libmdb and its definitions + * are not exposed publicly. + */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software <http://www.openldap.org/>. + * + * Copyright 2000-2014 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +#ifndef _MDB_MIDL_H_ +#define _MDB_MIDL_H_ + +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** @defgroup internal LMDB Internals + * @{ + */ + +/** @defgroup idls ID List Management + * @{ + */ + /** A generic unsigned ID number. These were entryIDs in back-bdb. + * Preferably it should have the same size as a pointer. + */ +typedef size_t MDB_ID; + + /** An IDL is an ID List, a sorted array of IDs. The first + * element of the array is a counter for how many actual + * IDs are in the list. In the original back-bdb code, IDLs are + * sorted in ascending order. For libmdb IDLs are sorted in + * descending order. + */ +typedef MDB_ID *MDB_IDL; + +/* IDL sizes - likely should be even bigger + * limiting factors: sizeof(ID), thread stack size + */ +#ifdef VL32 +#define MDB_IDL_LOGN 10 /* DB_SIZE is 2^10, UM_SIZE is 2^11 */ +#else +#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ +#endif +#define MDB_IDL_DB_SIZE (1<<MDB_IDL_LOGN) +#define MDB_IDL_UM_SIZE (1<<(MDB_IDL_LOGN+1)) + +#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE-1) +#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE-1) + +#define MDB_IDL_SIZEOF(ids) (((ids)[0]+1) * sizeof(MDB_ID)) +#define MDB_IDL_IS_ZERO(ids) ( (ids)[0] == 0 ) +#define MDB_IDL_CPY( dst, src ) (memcpy( dst, src, MDB_IDL_SIZEOF( src ) )) +#define MDB_IDL_FIRST( ids ) ( (ids)[1] ) +#define MDB_IDL_LAST( ids ) ( (ids)[(ids)[0]] ) + + /** Current max length of an #mdb_midl_alloc()ed IDL */ +#define MDB_IDL_ALLOCLEN( ids ) ( (ids)[-1] ) + + /** Append ID to IDL. The IDL must be big enough. */ +#define mdb_midl_xappend(idl, id) do { \ + MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \ + xidl[xlen] = (id); \ + } while (0) + + /** Search for an ID in an IDL. + * @param[in] ids The IDL to search. + * @param[in] id The ID to search for. + * @return The index of the first ID greater than or equal to \b id. + */ +unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id ); + + /** Allocate an IDL. + * Allocates memory for an IDL of the given size. + * @return IDL on success, NULL on failure. + */ +MDB_IDL mdb_midl_alloc(int num); + + /** Free an IDL. + * @param[in] ids The IDL to free. + */ +void mdb_midl_free(MDB_IDL ids); + + /** Shrink an IDL. + * Return the IDL to the default size if it has grown larger. + * @param[in,out] idp Address of the IDL to shrink. + * @return 0 on no change, non-zero if shrunk. + */ +int mdb_midl_shrink(MDB_IDL *idp); + + /** Make room for num additional elements in an IDL. + * @param[in,out] idp Address of the IDL. + * @param[in] num Number of elements to make room for. + * @return 0 on success, ENOMEM on failure. + */ +int mdb_midl_need(MDB_IDL *idp, unsigned num); + + /** Append an ID onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] id The ID to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append( MDB_IDL *idp, MDB_ID id ); + + /** Append an IDL onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] app The IDL to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app ); + + /** Append an ID range onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] id The lowest ID to append. + * @param[in] n Number of IDs to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n ); + + /** Merge an IDL onto an IDL. The destination IDL must be big enough. + * @param[in] idl The IDL to merge into. + * @param[in] merge The IDL to merge. + */ +void mdb_midl_xmerge( MDB_IDL idl, MDB_IDL merge ); + + /** Sort an IDL. + * @param[in,out] ids The IDL to sort. + */ +void mdb_midl_sort( MDB_IDL ids ); + + /** An ID2 is an ID/pointer pair. + */ +typedef struct MDB_ID2 { + MDB_ID mid; /**< The ID */ + void *mptr; /**< The pointer */ +} MDB_ID2; + + /** An ID2L is an ID2 List, a sorted array of ID2s. + * The first element's \b mid member is a count of how many actual + * elements are in the array. The \b mptr member of the first element is unused. + * The array is sorted in ascending order by \b mid. + */ +typedef MDB_ID2 *MDB_ID2L; + + /** Search for an ID in an ID2L. + * @param[in] ids The ID2L to search. + * @param[in] id The ID to search for. + * @return The index of the first ID2 whose \b mid member is greater than or equal to \b id. + */ +unsigned mdb_mid2l_search( MDB_ID2L ids, MDB_ID id ); + + + /** Insert an ID2 into a ID2L. + * @param[in,out] ids The ID2L to insert into. + * @param[in] id The ID2 to insert. + * @return 0 on success, -1 if the ID was already present in the ID2L. + */ +int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id ); + + /** Append an ID2 into a ID2L. + * @param[in,out] ids The ID2L to append into. + * @param[in] id The ID2 to append. + * @return 0 on success, -2 if the ID2L is too big. + */ +int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ); + +/** @} */ +/** @} */ +#ifdef __cplusplus +} +#endif +#endif /* _MDB_MIDL_H_ */ diff --git a/external/db_drivers/liblmdb32/mtest.c b/external/db_drivers/liblmdb32/mtest.c new file mode 100644 index 000000000..2d4e5e832 --- /dev/null +++ b/external/db_drivers/liblmdb32/mtest.c @@ -0,0 +1,173 @@ +/* mtest.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor, *cur2; + MDB_cursor_op op; + int count; + int *values; + char sval[32] = ""; + + srand(time(NULL)); + + count = (rand()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i<count;i++) { + values[i] = rand()%1024; + } + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_open(env, "./testdb", 0 /* MDB_FIXEDMAP |MDB_NOSYNC*/, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, NULL, 0, &dbi)); + + key.mv_size = sizeof(int); + key.mv_data = sval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + for (i=0;i<count;i++) { + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NOOVERWRITE))) { + j++; + data.mv_size = sizeof(sval); + data.mv_data = sval; + } + } + if (j) printf("%d duplicates skipped\n", j); + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + j=0; + key.mv_data = sval; + for (i= count - 1; i > -1; i-= (rand()%5)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(sval, "%03x ", values[i]); + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, NULL))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor last\n"); + E(mdb_cursor_get(cursor, &key, &data, MDB_LAST)); + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor last/prev\n"); + E(mdb_cursor_get(cursor, &key, &data, MDB_LAST)); + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + E(mdb_cursor_get(cursor, &key, &data, MDB_PREV)); + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + + mdb_txn_abort(txn); + + printf("Deleting with cursor\n"); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_cursor_open(txn, dbi, &cur2)); + for (i=0; i<50; i++) { + if (RES(MDB_NOTFOUND, mdb_cursor_get(cur2, &key, &data, MDB_NEXT))) + break; + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + E(mdb_del(txn, dbi, &key, NULL)); + } + + printf("Restarting cursor in txn\n"); + for (op=MDB_FIRST, i=0; i<=32; op=MDB_NEXT, i++) { + if (RES(MDB_NOTFOUND, mdb_cursor_get(cur2, &key, &data, op))) + break; + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cur2); + E(mdb_txn_commit(txn)); + + printf("Restarting cursor outside txn\n"); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + for (op=MDB_FIRST, i=0; i<=32; op=MDB_NEXT, i++) { + if (RES(MDB_NOTFOUND, mdb_cursor_get(cursor, &key, &data, op))) + break; + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb32/mtest2.c b/external/db_drivers/liblmdb32/mtest2.c new file mode 100644 index 000000000..f1a3dbd6c --- /dev/null +++ b/external/db_drivers/liblmdb32/mtest2.c @@ -0,0 +1,123 @@ +/* mtest2.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +/* Just like mtest.c, but using a subDB instead of the main DB */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[32] = ""; + + srand(time(NULL)); + + count = (rand()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i<count;i++) { + values[i] = rand()%1024; + } + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 4)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, "id1", MDB_CREATE, &dbi)); + + key.mv_size = sizeof(int); + key.mv_data = sval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + for (i=0;i<count;i++) { + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NOOVERWRITE))) + j++; + } + if (j) printf("%d duplicates skipped\n", j); + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + j=0; + key.mv_data = sval; + for (i= count - 1; i > -1; i-= (rand()%5)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(sval, "%03x ", values[i]); + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, NULL))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb32/mtest3.c b/external/db_drivers/liblmdb32/mtest3.c new file mode 100644 index 000000000..f705c52de --- /dev/null +++ b/external/db_drivers/liblmdb32/mtest3.c @@ -0,0 +1,133 @@ +/* mtest3.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +/* Tests for sorted duplicate DBs */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[32]; + char kval[sizeof(int)]; + + srand(time(NULL)); + + memset(sval, 0, sizeof(sval)); + + count = (rand()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i<count;i++) { + values[i] = rand()%1024; + } + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 4)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, "id2", MDB_CREATE|MDB_DUPSORT, &dbi)); + + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + for (i=0;i<count;i++) { + if (!(i & 0x0f)) + sprintf(kval, "%03x", values[i]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA))) + j++; + } + if (j) printf("%d duplicates skipped\n", j); + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + j=0; + + for (i= count - 1; i > -1; i-= (rand()%5)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(kval, "%03x", values[i & ~0x0f]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, &data))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb32/mtest4.c b/external/db_drivers/liblmdb32/mtest4.c new file mode 100644 index 000000000..da5a95304 --- /dev/null +++ b/external/db_drivers/liblmdb32/mtest4.c @@ -0,0 +1,168 @@ +/* mtest4.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +/* Tests for sorted duplicate DBs with fixed-size keys */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[8]; + char kval[sizeof(int)]; + + memset(sval, 0, sizeof(sval)); + + count = 510; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i<count;i++) { + values[i] = i*5; + } + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 4)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, "id4", MDB_CREATE|MDB_DUPSORT|MDB_DUPFIXED, &dbi)); + + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + strcpy(kval, "001"); + for (i=0;i<count;i++) { + sprintf(sval, "%07x", values[i]); + if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA))) + j++; + } + if (j) printf("%d duplicates skipped\n", j); + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + /* there should be one full page of dups now. + */ + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + /* test all 3 branches of split code: + * 1: new key in lower half + * 2: new key at split point + * 3: new key in upper half + */ + + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + sprintf(sval, "%07x", values[3]+1); + E(mdb_txn_begin(env, NULL, 0, &txn)); + (void)RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA)); + mdb_txn_abort(txn); + + sprintf(sval, "%07x", values[255]+1); + E(mdb_txn_begin(env, NULL, 0, &txn)); + (void)RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA)); + mdb_txn_abort(txn); + + sprintf(sval, "%07x", values[500]+1); + E(mdb_txn_begin(env, NULL, 0, &txn)); + (void)RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA)); + E(mdb_txn_commit(txn)); + + /* Try MDB_NEXT_MULTIPLE */ + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT_MULTIPLE)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + j=0; + + for (i= count - 1; i > -1; i-= (rand()%3)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(sval, "%07x", values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, &data))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb32/mtest5.c b/external/db_drivers/liblmdb32/mtest5.c new file mode 100644 index 000000000..39a8c728a --- /dev/null +++ b/external/db_drivers/liblmdb32/mtest5.c @@ -0,0 +1,135 @@ +/* mtest5.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +/* Tests for sorted duplicate DBs using cursor_put */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[32]; + char kval[sizeof(int)]; + + srand(time(NULL)); + + memset(sval, 0, sizeof(sval)); + + count = (rand()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i<count;i++) { + values[i] = rand()%1024; + } + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 4)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, "id2", MDB_CREATE|MDB_DUPSORT, &dbi)); + E(mdb_cursor_open(txn, dbi, &cursor)); + + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + for (i=0;i<count;i++) { + if (!(i & 0x0f)) + sprintf(kval, "%03x", values[i]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + if (RES(MDB_KEYEXIST, mdb_cursor_put(cursor, &key, &data, MDB_NODUPDATA))) + j++; + } + if (j) printf("%d duplicates skipped\n", j); + mdb_cursor_close(cursor); + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + j=0; + + for (i= count - 1; i > -1; i-= (rand()%5)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(kval, "%03x", values[i & ~0x0f]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, &data))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb32/mtest6.c b/external/db_drivers/liblmdb32/mtest6.c new file mode 100644 index 000000000..07d575893 --- /dev/null +++ b/external/db_drivers/liblmdb32/mtest6.c @@ -0,0 +1,137 @@ +/* mtest6.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +/* Tests for DB splits and merges */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +char dkbuf[1024]; + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + long kval; + char *sval; + + srand(time(NULL)); + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 4)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, "id6", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + E(mdb_cursor_open(txn, dbi, &cursor)); + E(mdb_stat(txn, dbi, &mst)); + + sval = calloc(1, mst.ms_psize / 4); + key.mv_size = sizeof(long); + key.mv_data = &kval; + data.mv_size = mst.ms_psize / 4 - 30; + data.mv_data = sval; + + printf("Adding 12 values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5; + sprintf(sval, "%08x", kval); + (void)RES(MDB_KEYEXIST, mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE)); + } + printf("Adding 12 more values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5+4; + sprintf(sval, "%08x", kval); + (void)RES(MDB_KEYEXIST, mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE)); + } + printf("Adding 12 more values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5+1; + sprintf(sval, "%08x", kval); + (void)RES(MDB_KEYEXIST, mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE)); + } + E(mdb_cursor_get(cursor, &key, &data, MDB_FIRST)); + + do { + printf("key: %p %s, data: %p %.*s\n", + key.mv_data, mdb_dkey(&key, dkbuf), + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0); + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_commit(txn); + +#if 0 + j=0; + + for (i= count - 1; i > -1; i-= (rand()%5)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(kval, "%03x", values[i & ~0x0f]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, &data))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); +#endif + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb32/sample-bdb.txt b/external/db_drivers/liblmdb32/sample-bdb.txt new file mode 100644 index 000000000..6a959bd68 --- /dev/null +++ b/external/db_drivers/liblmdb32/sample-bdb.txt @@ -0,0 +1,73 @@ +/* sample-bdb.txt - BerkeleyDB toy/sample + * + * Do a line-by-line comparison of this and sample-mdb.txt + */ +/* + * Copyright 2012 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include <string.h> +#include <db.h> + +int main(int argc,char * argv[]) +{ + int rc; + DB_ENV *env; + DB *dbi; + DBT key, data; + DB_TXN *txn; + DBC *cursor; + char sval[32], kval[32]; + + /* Note: Most error checking omitted for simplicity */ + +#define FLAGS (DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_INIT_MPOOL|DB_CREATE|DB_THREAD) + rc = db_env_create(&env, 0); + rc = env->open(env, "./testdb", FLAGS, 0664); + rc = db_create(&dbi, env, 0); + rc = env->txn_begin(env, NULL, &txn, 0); + rc = dbi->open(dbi, txn, "test.bdb", NULL, DB_BTREE, DB_CREATE, 0664); + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.size = sizeof(int); + key.data = sval; + data.size = sizeof(sval); + data.data = sval; + + sprintf(sval, "%03x %d foo bar", 32, 3141592); + rc = dbi->put(dbi, txn, &key, &data, 0); + rc = txn->commit(txn, 0); + if (rc) { + fprintf(stderr, "txn->commit: (%d) %s\n", rc, db_strerror(rc)); + goto leave; + } + rc = env->txn_begin(env, NULL, &txn, 0); + rc = dbi->cursor(dbi, txn, &cursor, 0); + key.flags = DB_DBT_USERMEM; + key.data = kval; + key.ulen = sizeof(kval); + data.flags = DB_DBT_USERMEM; + data.data = sval; + data.ulen = sizeof(sval); + while ((rc = cursor->c_get(cursor, &key, &data, DB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.data, (int) key.size, (char *) key.data, + data.data, (int) data.size, (char *) data.data); + } + rc = cursor->c_close(cursor); + rc = txn->abort(txn); +leave: + rc = dbi->close(dbi, 0); + rc = env->close(env, 0); + return rc; +} diff --git a/external/db_drivers/liblmdb32/sample-mdb.txt b/external/db_drivers/liblmdb32/sample-mdb.txt new file mode 100644 index 000000000..a233ec5dd --- /dev/null +++ b/external/db_drivers/liblmdb32/sample-mdb.txt @@ -0,0 +1,62 @@ +/* sample-mdb.txt - MDB toy/sample + * + * Do a line-by-line comparison of this and sample-bdb.txt + */ +/* + * Copyright 2012 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include "lmdb.h" + +int main(int argc,char * argv[]) +{ + int rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_cursor *cursor; + char sval[32]; + + /* Note: Most error checking omitted for simplicity */ + + rc = mdb_env_create(&env); + rc = mdb_env_open(env, "./testdb", 0, 0664); + rc = mdb_txn_begin(env, NULL, 0, &txn); + rc = mdb_open(txn, NULL, 0, &dbi); + + key.mv_size = sizeof(int); + key.mv_data = sval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + sprintf(sval, "%03x %d foo bar", 32, 3141592); + rc = mdb_put(txn, dbi, &key, &data, 0); + rc = mdb_txn_commit(txn); + if (rc) { + fprintf(stderr, "mdb_txn_commit: (%d) %s\n", rc, mdb_strerror(rc)); + goto leave; + } + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_txn_abort(txn); +leave: + mdb_close(env, dbi); + mdb_env_close(env); + return 0; +} diff --git a/external/db_drivers/liblmdb32/tooltag b/external/db_drivers/liblmdb32/tooltag new file mode 100644 index 000000000..229bf16ba --- /dev/null +++ b/external/db_drivers/liblmdb32/tooltag @@ -0,0 +1,22 @@ +<tagfile> + <compound kind="page"> + <name>mdb_copy_1</name> + <title>mdb_copy - environment copy tool</title> + <filename>mdb_copy.1</filename> + </compound> + <compound kind="page"> + <name>mdb_dump_1</name> + <title>mdb_dump - environment export tool</title> + <filename>mdb_dump.1</filename> + </compound> + <compound kind="page"> + <name>mdb_load_1</name> + <title>mdb_load - environment import tool</title> + <filename>mdb_load.1</filename> + </compound> + <compound kind="page"> + <name>mdb_stat_1</name> + <title>mdb_stat - environment status tool</title> + <filename>mdb_stat.1</filename> + </compound> +</tagfile> diff --git a/external/db_drivers/liblmdb64/.gitignore b/external/db_drivers/liblmdb64/.gitignore new file mode 100644 index 000000000..d5102a87c --- /dev/null +++ b/external/db_drivers/liblmdb64/.gitignore @@ -0,0 +1,23 @@ +mtest +mtest[23456] +testdb +mdb_copy +mdb_stat +mdb_dump +mdb_load +*.lo +*.[ao] +*.so +*.exe +*[~#] +*.bak +*.orig +*.rej +*.gcov +*.gcda +*.gcno +core +core.* +valgrind.* +man/ +html/ diff --git a/external/db_drivers/liblmdb64/CHANGES b/external/db_drivers/liblmdb64/CHANGES new file mode 100644 index 000000000..34049ba7a --- /dev/null +++ b/external/db_drivers/liblmdb64/CHANGES @@ -0,0 +1,112 @@ +LMDB 0.9 Change Log + +LMDB 0.9.14 Release (2014/09/20) + Fix to support 64K page size (ITS#7713) + Fix to persist decreased as well as increased mapsizes (ITS#7789) + Fix cursor bug when deleting last node of a DUPSORT key + Fix mdb_env_info to return FIXEDMAP address + Fix ambiguous error code from writing to closed DBI (ITS#7825) + Fix mdb_copy copying past end of file (ITS#7886) + Fix cursor bugs from page_merge/rebalance + Fix to dirty fewer pages in deletes (mdb_page_loose()) + Fix mdb_dbi_open creating subDBs (ITS#7917) + Fix mdb_cursor_get(_DUP) with single value (ITS#7913) + Fix Windows compat issues in mtests (ITS#7879) + Add compacting variant of mdb_copy + Add BigEndian integer key compare code + Add mdb_dump/mdb_load utilities + +LMDB 0.9.13 Release (2014/06/18) + Fix mdb_page_alloc unlimited overflow page search + Documentation + Re-fix MDB_CURRENT doc (ITS#7793) + Fix MDB_GET_MULTIPLE/MDB_NEXT_MULTIPLE doc + +LMDB 0.9.12 Release (2014/06/13) + Fix MDB_GET_BOTH regression (ITS#7875,#7681) + Fix MDB_MULTIPLE writing multiple keys (ITS#7834) + Fix mdb_rebalance (ITS#7829) + Fix mdb_page_split (ITS#7815) + Fix md_entries count (ITS#7861,#7828,#7793) + Fix MDB_CURRENT (ITS#7793) + Fix possible crash on Windows DLL detach + Misc code cleanup + Documentation + mdb_cursor_put: cursor moves on error (ITS#7771) + + +LMDB 0.9.11 Release (2014/01/15) + Add mdb_env_set_assert() (ITS#7775) + Fix: invalidate txn on page allocation errors (ITS#7377) + Fix xcursor tracking in mdb_cursor_del0() (ITS#7771) + Fix corruption from deletes (ITS#7756) + Fix Windows/MSVC build issues + Raise safe limit of max MDB_MAXKEYSIZE + Misc code cleanup + Documentation + Remove spurious note about non-overlapping flags (ITS#7665) + +LMDB 0.9.10 Release (2013/11/12) + Add MDB_NOMEMINIT option + Fix mdb_page_split() again (ITS#7589) + Fix MDB_NORDAHEAD definition (ITS#7734) + Fix mdb_cursor_del() positioning (ITS#7733) + Partial fix for larger page sizes (ITS#7713) + Fix Windows64/MSVC build issues + +LMDB 0.9.9 Release (2013/10/24) + Add mdb_env_get_fd() + Add MDB_NORDAHEAD option + Add MDB_NOLOCK option + Avoid wasting space in mdb_page_split() (ITS#7589) + Fix mdb_page_merge() cursor fixup (ITS#7722) + Fix mdb_cursor_del() on last delete (ITS#7718) + Fix adding WRITEMAP on existing env (ITS#7715) + Fix nested txns (ITS#7515) + Fix mdb_env_copy() O_DIRECT bug (ITS#7682) + Fix mdb_cursor_set(SET_RANGE) return code (ITS#7681) + Fix mdb_rebalance() cursor fixup (ITS#7701) + Misc code cleanup + Documentation + Note that by default, readers need write access + + +LMDB 0.9.8 Release (2013/09/09) + Allow mdb_env_set_mapsize() on an open environment + Fix mdb_dbi_flags() (ITS#7672) + Fix mdb_page_unspill() in nested txns + Fix mdb_cursor_get(CURRENT|NEXT) after a delete + Fix mdb_cursor_get(DUP) to always return key (ITS#7671) + Fix mdb_cursor_del() to always advance to next item (ITS#7670) + Fix mdb_cursor_set(SET_RANGE) for tree with single page (ITS#7681) + Fix mdb_env_copy() retry open if O_DIRECT fails (ITS#7682) + Tweak mdb_page_spill() to be less aggressive + Documentation + Update caveats since mdb_reader_check() added in 0.9.7 + +LMDB 0.9.7 Release (2013/08/17) + Don't leave stale lockfile on failed RDONLY open (ITS#7664) + Fix mdb_page_split() ref beyond cursor depth + Fix read txn data race (ITS#7635) + Fix mdb_rebalance (ITS#7536, #7538) + Fix mdb_drop() (ITS#7561) + Misc DEBUG macro fixes + Add MDB_NOTLS envflag + Add mdb_env_copyfd() + Add mdb_txn_env() (ITS#7660) + Add mdb_dbi_flags() (ITS#7661) + Add mdb_env_get_maxkeysize() + Add mdb_env_reader_list()/mdb_env_reader_check() + Add mdb_page_spill/unspill, remove hard txn size limit + Use shorter names for semaphores (ITS#7615) + Build + Fix install target (ITS#7656) + Documentation + Misc updates for cursors, DB handles, data lifetime + +LMDB 0.9.6 Release (2013/02/25) + Many fixes/enhancements + +LMDB 0.9.5 Release (2012/11/30) + Renamed from libmdb to liblmdb + Many fixes/enhancements diff --git a/external/db_drivers/liblmdb64/CMakeLists.txt b/external/db_drivers/liblmdb64/CMakeLists.txt new file mode 100644 index 000000000..62b800135 --- /dev/null +++ b/external/db_drivers/liblmdb64/CMakeLists.txt @@ -0,0 +1,39 @@ +# Copyright (c) 2014-2015, The Monero Project +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are +# permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, this list +# of conditions and the following disclaimer in the documentation and/or other +# materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors may be +# used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set (lmdb_sources +mdb.c +midl.c) + +include_directories("${CMAKE_CURRENT_SOURCE_DIR}") + +add_library(lmdb + ${lmdb_sources}) +target_link_libraries(lmdb + LINK_PRIVATE + ${CMAKE_THREAD_LIBS_INIT}) diff --git a/external/db_drivers/liblmdb64/COPYRIGHT b/external/db_drivers/liblmdb64/COPYRIGHT new file mode 100644 index 000000000..e0eb484c7 --- /dev/null +++ b/external/db_drivers/liblmdb64/COPYRIGHT @@ -0,0 +1,20 @@ +Copyright 2011-2014 Howard Chu, Symas Corp. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted only as authorized by the OpenLDAP +Public License. + +A copy of this license is available in the file LICENSE in the +top-level directory of the distribution or, alternatively, at +<http://www.OpenLDAP.org/license.html>. + +OpenLDAP is a registered trademark of the OpenLDAP Foundation. + +Individual files and/or contributed packages may be copyright by +other parties and/or subject to additional restrictions. + +This work also contains materials derived from public sources. + +Additional information about OpenLDAP can be obtained at +<http://www.openldap.org/>. diff --git a/external/db_drivers/liblmdb64/Doxyfile b/external/db_drivers/liblmdb64/Doxyfile new file mode 100644 index 000000000..92d17b09e --- /dev/null +++ b/external/db_drivers/liblmdb64/Doxyfile @@ -0,0 +1,1631 @@ +# Doxyfile 1.7.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = LMDB + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen to replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +INLINE_GROUPED_CLASSES = YES +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = YES + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penality. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will rougly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = NO + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command <command> <input-file>, where <command> is the value of +# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = lmdb.h midl.h mdb.c midl.c + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command <filter> <input-file>, where <filter> +# is the value of the INPUT_FILTER tag, and <input-file> is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the stylesheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters"> +# Qt Help Project / Custom Filters</a>. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes"> +# Qt Help Project / Filter Attributes</a>. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvances is that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = YES + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = DEBUG=2 __GNUC__=1 + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = tooltag=./man1 + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will write a font called FreeSans.ttf to the output +# directory and reference it in all dot files that doxygen generates. This +# font does not include all possible unicode characters however, so when you need +# these (or just want a differently looking font) you can specify the font name +# using DOT_FONTNAME. You need need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = FreeSans.ttf + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = YES + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/external/db_drivers/liblmdb64/LICENSE b/external/db_drivers/liblmdb64/LICENSE new file mode 100644 index 000000000..05ad7571e --- /dev/null +++ b/external/db_drivers/liblmdb64/LICENSE @@ -0,0 +1,47 @@ +The OpenLDAP Public License + Version 2.8, 17 August 2003 + +Redistribution and use of this software and associated documentation +("Software"), with or without modification, are permitted provided +that the following conditions are met: + +1. Redistributions in source form must retain copyright statements + and notices, + +2. Redistributions in binary form must reproduce applicable copyright + statements and notices, this list of conditions, and the following + disclaimer in the documentation and/or other materials provided + with the distribution, and + +3. Redistributions must contain a verbatim copy of this document. + +The OpenLDAP Foundation may revise this license from time to time. +Each revision is distinguished by a version number. You may use +this Software under terms of this license revision or under the +terms of any subsequent revision of the license. + +THIS SOFTWARE IS PROVIDED BY THE OPENLDAP FOUNDATION AND ITS +CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE OPENLDAP FOUNDATION, ITS CONTRIBUTORS, OR THE AUTHOR(S) +OR OWNER(S) OF THE SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +The names of the authors and copyright holders must not be used in +advertising or otherwise to promote the sale, use or other dealing +in this Software without specific, written prior permission. Title +to copyright in this Software shall at all times remain with copyright +holders. + +OpenLDAP is a registered trademark of the OpenLDAP Foundation. + +Copyright 1999-2003 The OpenLDAP Foundation, Redwood City, +California, USA. All Rights Reserved. Permission to copy and +distribute verbatim copies of this document is granted. diff --git a/external/db_drivers/liblmdb64/Makefile b/external/db_drivers/liblmdb64/Makefile new file mode 100644 index 000000000..2d0983eff --- /dev/null +++ b/external/db_drivers/liblmdb64/Makefile @@ -0,0 +1,104 @@ +# Makefile for liblmdb (Lightning memory-mapped database library). + +######################################################################## +# Configuration. The compiler options must enable threaded compilation. +# +# Preprocessor macros (for CPPFLAGS) of interest... +# Note that the defaults should already be correct for most +# platforms; you should not need to change any of these. +# Read their descriptions in mdb.c if you do: +# +# - MDB_USE_POSIX_SEM +# - MDB_DSYNC +# - MDB_FDATASYNC +# - MDB_FDATASYNC_WORKS +# - MDB_USE_PWRITEV +# +# There may be other macros in mdb.c of interest. You should +# read mdb.c before changing any of them. +# +CC = gcc +W = -W -Wall -Wno-unused-parameter -Wbad-function-cast -Wuninitialized +THREADS = -pthread +OPT = -O2 -g +CFLAGS = $(THREADS) $(OPT) $(W) $(XCFLAGS) +LDLIBS = +SOLIBS = +prefix = /usr/local + +######################################################################## + +IHDRS = lmdb.h +ILIBS = liblmdb.a liblmdb.so +IPROGS = mdb_stat mdb_copy mdb_dump mdb_load +IDOCS = mdb_stat.1 mdb_copy.1 mdb_dump.1 mdb_load.1 +PROGS = $(IPROGS) mtest mtest2 mtest3 mtest4 mtest5 +all: $(ILIBS) $(PROGS) + +install: $(ILIBS) $(IPROGS) $(IHDRS) + for f in $(IPROGS); do cp $$f $(DESTDIR)$(prefix)/bin; done + for f in $(ILIBS); do cp $$f $(DESTDIR)$(prefix)/lib; done + for f in $(IHDRS); do cp $$f $(DESTDIR)$(prefix)/include; done + for f in $(IDOCS); do cp $$f $(DESTDIR)$(prefix)/man/man1; done + +clean: + rm -rf $(PROGS) *.[ao] *.[ls]o *~ testdb + +test: all + rm -rf testdb && mkdir testdb + ./mtest && ./mdb_stat testdb + +liblmdb.a: mdb.o midl.o + ar rs $@ mdb.o midl.o + +liblmdb.so: mdb.lo midl.lo +# $(CC) $(LDFLAGS) -pthread -shared -Wl,-Bsymbolic -o $@ mdb.o midl.o $(SOLIBS) + $(CC) $(LDFLAGS) -pthread -shared -o $@ mdb.lo midl.lo $(SOLIBS) + +mdb_stat: mdb_stat.o liblmdb.a +mdb_copy: mdb_copy.o liblmdb.a +mdb_dump: mdb_dump.o liblmdb.a +mdb_load: mdb_load.o liblmdb.a +mtest: mtest.o liblmdb.a +mtest2: mtest2.o liblmdb.a +mtest3: mtest3.o liblmdb.a +mtest4: mtest4.o liblmdb.a +mtest5: mtest5.o liblmdb.a +mtest6: mtest6.o liblmdb.a + +mdb.o: mdb.c lmdb.h midl.h + $(CC) $(CFLAGS) $(CPPFLAGS) -c mdb.c + +midl.o: midl.c midl.h + $(CC) $(CFLAGS) $(CPPFLAGS) -c midl.c + +mdb.lo: mdb.c lmdb.h midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -c mdb.c -o $@ + +midl.lo: midl.c midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -c midl.c -o $@ + +%: %.o + $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ + +%.o: %.c lmdb.h + $(CC) $(CFLAGS) $(CPPFLAGS) -c $< + +COV_FLAGS=-fprofile-arcs -ftest-coverage +COV_OBJS=xmdb.o xmidl.o + +coverage: xmtest + for i in mtest*.c [0-9]*.c; do j=`basename \$$i .c`; $(MAKE) $$j.o; \ + gcc -o x$$j $$j.o $(COV_OBJS) -pthread $(COV_FLAGS); \ + rm -rf testdb; mkdir testdb; ./x$$j; done + gcov xmdb.c + gcov xmidl.c + +xmtest: mtest.o xmdb.o xmidl.o + gcc -o xmtest mtest.o xmdb.o xmidl.o -pthread $(COV_FLAGS) + +xmdb.o: mdb.c lmdb.h midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 $(COV_FLAGS) -c mdb.c -o $@ + +xmidl.o: midl.c midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 $(COV_FLAGS) -c midl.c -o $@ diff --git a/external/db_drivers/liblmdb64/lmdb.h b/external/db_drivers/liblmdb64/lmdb.h new file mode 100644 index 000000000..ebfbc5dc8 --- /dev/null +++ b/external/db_drivers/liblmdb64/lmdb.h @@ -0,0 +1,1573 @@ +/** @file lmdb.h + * @brief Lightning memory-mapped database library + * + * @mainpage Lightning Memory-Mapped Database Manager (LMDB) + * + * @section intro_sec Introduction + * LMDB is a Btree-based database management library modeled loosely on the + * BerkeleyDB API, but much simplified. The entire database is exposed + * in a memory map, and all data fetches return data directly + * from the mapped memory, so no malloc's or memcpy's occur during + * data fetches. As such, the library is extremely simple because it + * requires no page caching layer of its own, and it is extremely high + * performance and memory-efficient. It is also fully transactional with + * full ACID semantics, and when the memory map is read-only, the + * database integrity cannot be corrupted by stray pointer writes from + * application code. + * + * The library is fully thread-aware and supports concurrent read/write + * access from multiple processes and threads. Data pages use a copy-on- + * write strategy so no active data pages are ever overwritten, which + * also provides resistance to corruption and eliminates the need of any + * special recovery procedures after a system crash. Writes are fully + * serialized; only one write transaction may be active at a time, which + * guarantees that writers can never deadlock. The database structure is + * multi-versioned so readers run with no locks; writers cannot block + * readers, and readers don't block writers. + * + * Unlike other well-known database mechanisms which use either write-ahead + * transaction logs or append-only data writes, LMDB requires no maintenance + * during operation. Both write-ahead loggers and append-only databases + * require periodic checkpointing and/or compaction of their log or database + * files otherwise they grow without bound. LMDB tracks free pages within + * the database and re-uses them for new write operations, so the database + * size does not grow without bound in normal use. + * + * The memory map can be used as a read-only or read-write map. It is + * read-only by default as this provides total immunity to corruption. + * Using read-write mode offers much higher write performance, but adds + * the possibility for stray application writes thru pointers to silently + * corrupt the database. Of course if your application code is known to + * be bug-free (...) then this is not an issue. + * + * @section caveats_sec Caveats + * Troubleshooting the lock file, plus semaphores on BSD systems: + * + * - A broken lockfile can cause sync issues. + * Stale reader transactions left behind by an aborted program + * cause further writes to grow the database quickly, and + * stale locks can block further operation. + * + * Fix: Check for stale readers periodically, using the + * #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. + * Stale writers will be cleared automatically on most systems: + * - Windows - automatic + * - BSD, systems using SysV semaphores - automatic + * - Linux, systems using POSIX mutexes with Robust option - automatic + * Otherwise just make all programs using the database close it; + * the lockfile is always reset on first open of the environment. + * + * - On BSD systems or others configured with MDB_USE_SYSV_SEM, + * startup can fail due to semaphores owned by another userid. + * + * Fix: Open and close the database as the user which owns the + * semaphores (likely last user) or as root, while no other + * process is using the database. + * + * Restrictions/caveats (in addition to those listed for some functions): + * + * - Only the database owner should normally use the database on + * BSD systems or when otherwise configured with MDB_USE_POSIX_SEM. + * Multiple users can cause startup to fail later, as noted above. + * + * - There is normally no pure read-only mode, since readers need write + * access to locks and lock file. Exceptions: On read-only filesystems + * or with the #MDB_NOLOCK flag described under #mdb_env_open(). + * + * - By default, in versions before 0.9.10, unused portions of the data + * file might receive garbage data from memory freed by other code. + * (This does not happen when using the #MDB_WRITEMAP flag.) As of + * 0.9.10 the default behavior is to initialize such memory before + * writing to the data file. Since there may be a slight performance + * cost due to this initialization, applications may disable it using + * the #MDB_NOMEMINIT flag. Applications handling sensitive data + * which must not be written should not use this flag. This flag is + * irrelevant when using #MDB_WRITEMAP. + * + * - A thread can only use one transaction at a time, plus any child + * transactions. Each transaction belongs to one thread. See below. + * The #MDB_NOTLS flag changes this for read-only transactions. + * + * - Use an MDB_env* in the process which opened it, without fork()ing. + * + * - Do not have open an LMDB database twice in the same process at + * the same time. Not even from a plain open() call - close()ing it + * breaks flock() advisory locking. + * + * - Avoid long-lived transactions. Read transactions prevent + * reuse of pages freed by newer write transactions, thus the + * database can grow quickly. Write transactions prevent + * other write transactions, since writes are serialized. + * + * - Avoid suspending a process with active transactions. These + * would then be "long-lived" as above. Also read transactions + * suspended when writers commit could sometimes see wrong data. + * + * ...when several processes can use a database concurrently: + * + * - Avoid aborting a process with an active transaction. + * The transaction becomes "long-lived" as above until a check + * for stale readers is performed or the lockfile is reset, + * since the process may not remove it from the lockfile. + * + * This does not apply to write transactions if the system clears + * stale writers, see above. + * + * - If you do that anyway, do a periodic check for stale readers. Or + * close the environment once in a while, so the lockfile can get reset. + * + * - Do not use LMDB databases on remote filesystems, even between + * processes on the same host. This breaks flock() on some OSes, + * possibly memory map sync, and certainly sync between programs + * on different hosts. + * + * - Opening a database can fail if another process is opening or + * closing it at exactly the same time. + * + * @author Howard Chu, Symas Corporation. + * + * @copyright Copyright 2011-2014 Howard Chu, Symas Corp. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + * + * @par Derived From: + * This code is derived from btree.c written by Martin Hedenfalk. + * + * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef _LMDB_H_ +#define _LMDB_H_ + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** Unix permissions for creating files, or dummy definition for Windows */ +#ifdef _MSC_VER +typedef int mdb_mode_t; +#else +typedef mode_t mdb_mode_t; +#endif + +/** An abstraction for a file handle. + * On POSIX systems file handles are small integers. On Windows + * they're opaque pointers. + */ +#ifdef _WIN32 +typedef void *mdb_filehandle_t; +#else +typedef int mdb_filehandle_t; +#endif + +/** @defgroup mdb LMDB API + * @{ + * @brief OpenLDAP Lightning Memory-Mapped Database Manager + */ +/** @defgroup Version Version Macros + * @{ + */ +/** Library major version */ +#define MDB_VERSION_MAJOR 0 +/** Library minor version */ +#define MDB_VERSION_MINOR 9 +/** Library patch version */ +#define MDB_VERSION_PATCH 14 + +/** Combine args a,b,c into a single integer for easy version comparisons */ +#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) + +/** The full library version as a single integer */ +#define MDB_VERSION_FULL \ + MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) + +/** The release date of this library version */ +#define MDB_VERSION_DATE "September 20, 2014" + +/** A stringifier for the version info */ +#define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")" + +/** A helper for the stringifier macro */ +#define MDB_VERFOO(a,b,c,d) MDB_VERSTR(a,b,c,d) + +/** The full library version as a C string */ +#define MDB_VERSION_STRING \ + MDB_VERFOO(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH,MDB_VERSION_DATE) +/** @} */ + +/** @brief Opaque structure for a database environment. + * + * A DB environment supports multiple databases, all residing in the same + * shared-memory map. + */ +typedef struct MDB_env MDB_env; + +/** @brief Opaque structure for a transaction handle. + * + * All database operations require a transaction handle. Transactions may be + * read-only or read-write. + */ +typedef struct MDB_txn MDB_txn; + +/** @brief A handle for an individual database in the DB environment. */ +typedef unsigned int MDB_dbi; + +/** @brief Opaque structure for navigating through a database */ +typedef struct MDB_cursor MDB_cursor; + +/** @brief Generic structure used for passing keys and data in and out + * of the database. + * + * Values returned from the database are valid only until a subsequent + * update operation, or the end of the transaction. Do not modify or + * free them, they commonly point into the database itself. + * + * Key sizes must be between 1 and #mdb_env_get_maxkeysize() inclusive. + * The same applies to data sizes in databases with the #MDB_DUPSORT flag. + * Other data items can in theory be from 0 to 0xffffffff bytes long. + */ +typedef struct MDB_val { + size_t mv_size; /**< size of the data item */ + void *mv_data; /**< address of the data item */ +} MDB_val; + +/** @brief A callback function used to compare two keys in a database */ +typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b); + +/** @brief A callback function used to relocate a position-dependent data item + * in a fixed-address database. + * + * The \b newptr gives the item's desired address in + * the memory map, and \b oldptr gives its previous address. The item's actual + * data resides at the address in \b item. This callback is expected to walk + * through the fields of the record in \b item and modify any + * values based at the \b oldptr address to be relative to the \b newptr address. + * @param[in,out] item The item that is to be relocated. + * @param[in] oldptr The previous address. + * @param[in] newptr The new address to relocate to. + * @param[in] relctx An application-provided context, set by #mdb_set_relctx(). + * @todo This feature is currently unimplemented. + */ +typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx); + +/** @defgroup mdb_env Environment Flags + * @{ + */ + /** mmap at a fixed address (experimental) */ +#define MDB_FIXEDMAP 0x01 + /** no environment directory */ +#define MDB_NOSUBDIR 0x4000 + /** don't fsync after commit */ +#define MDB_NOSYNC 0x10000 + /** read only */ +#define MDB_RDONLY 0x20000 + /** don't fsync metapage after commit */ +#define MDB_NOMETASYNC 0x40000 + /** use writable mmap */ +#define MDB_WRITEMAP 0x80000 + /** use asynchronous msync when #MDB_WRITEMAP is used */ +#define MDB_MAPASYNC 0x100000 + /** tie reader locktable slots to #MDB_txn objects instead of to threads */ +#define MDB_NOTLS 0x200000 + /** don't do any locking, caller must manage their own locks */ +#define MDB_NOLOCK 0x400000 + /** don't do readahead (no effect on Windows) */ +#define MDB_NORDAHEAD 0x800000 + /** don't initialize malloc'd memory before writing to datafile */ +#define MDB_NOMEMINIT 0x1000000 +/** @} */ + +/** @defgroup mdb_dbi_open Database Flags + * @{ + */ + /** use reverse string keys */ +#define MDB_REVERSEKEY 0x02 + /** use sorted duplicates */ +#define MDB_DUPSORT 0x04 + /** numeric keys in native byte order. + * The keys must all be of the same size. */ +#define MDB_INTEGERKEY 0x08 + /** with #MDB_DUPSORT, sorted dup items have fixed size */ +#define MDB_DUPFIXED 0x10 + /** with #MDB_DUPSORT, dups are numeric in native byte order */ +#define MDB_INTEGERDUP 0x20 + /** with #MDB_DUPSORT, use reverse string dups */ +#define MDB_REVERSEDUP 0x40 + /** create DB if not already existing */ +#define MDB_CREATE 0x40000 +/** @} */ + +/** @defgroup mdb_put Write Flags + * @{ + */ +/** For put: Don't write if the key already exists. */ +#define MDB_NOOVERWRITE 0x10 +/** Only for #MDB_DUPSORT<br> + * For put: don't write if the key and data pair already exist.<br> + * For mdb_cursor_del: remove all duplicate data items. + */ +#define MDB_NODUPDATA 0x20 +/** For mdb_cursor_put: overwrite the current key/data pair */ +#define MDB_CURRENT 0x40 +/** For put: Just reserve space for data, don't copy it. Return a + * pointer to the reserved space. + */ +#define MDB_RESERVE 0x10000 +/** Data is being appended, don't split full pages. */ +#define MDB_APPEND 0x20000 +/** Duplicate data is being appended, don't split full pages. */ +#define MDB_APPENDDUP 0x40000 +/** Store multiple data items in one call. Only for #MDB_DUPFIXED. */ +#define MDB_MULTIPLE 0x80000 +/* @} */ + +/** @defgroup mdb_copy Copy Flags + * @{ + */ +/** Compacting copy: Omit free space from copy, and renumber all + * pages sequentially. + */ +#define MDB_CP_COMPACT 0x01 +/* @} */ + +/** @brief Cursor Get operations. + * + * This is the set of all operations for retrieving data + * using a cursor. + */ +typedef enum MDB_cursor_op { + MDB_FIRST, /**< Position at first key/data item */ + MDB_FIRST_DUP, /**< Position at first data item of current key. + Only for #MDB_DUPSORT */ + MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */ + MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */ + MDB_GET_CURRENT, /**< Return key/data at current cursor position */ + MDB_GET_MULTIPLE, /**< Return key and up to a page of duplicate data items + from current cursor position. Move cursor to prepare + for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ + MDB_LAST, /**< Position at last key/data item */ + MDB_LAST_DUP, /**< Position at last data item of current key. + Only for #MDB_DUPSORT */ + MDB_NEXT, /**< Position at next data item */ + MDB_NEXT_DUP, /**< Position at next data item of current key. + Only for #MDB_DUPSORT */ + MDB_NEXT_MULTIPLE, /**< Return key and up to a page of duplicate data items + from next cursor position. Move cursor to prepare + for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ + MDB_NEXT_NODUP, /**< Position at first data item of next key */ + MDB_PREV, /**< Position at previous data item */ + MDB_PREV_DUP, /**< Position at previous data item of current key. + Only for #MDB_DUPSORT */ + MDB_PREV_NODUP, /**< Position at last data item of previous key */ + MDB_SET, /**< Position at specified key */ + MDB_SET_KEY, /**< Position at specified key, return key + data */ + MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */ +} MDB_cursor_op; + +/** @defgroup errors Return Codes + * + * BerkeleyDB uses -30800 to -30999, we'll go under them + * @{ + */ + /** Successful result */ +#define MDB_SUCCESS 0 + /** key/data pair already exists */ +#define MDB_KEYEXIST (-30799) + /** key/data pair not found (EOF) */ +#define MDB_NOTFOUND (-30798) + /** Requested page not found - this usually indicates corruption */ +#define MDB_PAGE_NOTFOUND (-30797) + /** Located page was wrong type */ +#define MDB_CORRUPTED (-30796) + /** Update of meta page failed or environment had fatal error */ +#define MDB_PANIC (-30795) + /** Environment version mismatch */ +#define MDB_VERSION_MISMATCH (-30794) + /** File is not a valid LMDB file */ +#define MDB_INVALID (-30793) + /** Environment mapsize reached */ +#define MDB_MAP_FULL (-30792) + /** Environment maxdbs reached */ +#define MDB_DBS_FULL (-30791) + /** Environment maxreaders reached */ +#define MDB_READERS_FULL (-30790) + /** Too many TLS keys in use - Windows only */ +#define MDB_TLS_FULL (-30789) + /** Txn has too many dirty pages */ +#define MDB_TXN_FULL (-30788) + /** Cursor stack too deep - internal error */ +#define MDB_CURSOR_FULL (-30787) + /** Page has not enough space - internal error */ +#define MDB_PAGE_FULL (-30786) + /** Database contents grew beyond environment mapsize */ +#define MDB_MAP_RESIZED (-30785) + /** MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed */ +#define MDB_INCOMPATIBLE (-30784) + /** Invalid reuse of reader locktable slot */ +#define MDB_BAD_RSLOT (-30783) + /** Transaction cannot recover - it must be aborted */ +#define MDB_BAD_TXN (-30782) + /** Unsupported size of key/DB name/data, or wrong DUPFIXED size */ +#define MDB_BAD_VALSIZE (-30781) + /** The specified DBI was changed unexpectedly */ +#define MDB_BAD_DBI (-30780) + /** The last defined error code */ +#define MDB_LAST_ERRCODE MDB_BAD_DBI +/** @} */ + +/** @brief Statistics for a database in the environment */ +typedef struct MDB_stat { + unsigned int ms_psize; /**< Size of a database page. + This is currently the same for all databases. */ + unsigned int ms_depth; /**< Depth (height) of the B-tree */ + size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ + size_t ms_leaf_pages; /**< Number of leaf pages */ + size_t ms_overflow_pages; /**< Number of overflow pages */ + size_t ms_entries; /**< Number of data items */ +} MDB_stat; + +/** @brief Information about the environment */ +typedef struct MDB_envinfo { + void *me_mapaddr; /**< Address of map, if fixed */ + size_t me_mapsize; /**< Size of the data memory map */ + size_t me_last_pgno; /**< ID of the last used page */ + size_t me_last_txnid; /**< ID of the last committed transaction */ + unsigned int me_maxreaders; /**< max reader slots in the environment */ + unsigned int me_numreaders; /**< max reader slots used in the environment */ +} MDB_envinfo; + + /** @brief Return the LMDB library version information. + * + * @param[out] major if non-NULL, the library major version number is copied here + * @param[out] minor if non-NULL, the library minor version number is copied here + * @param[out] patch if non-NULL, the library patch version number is copied here + * @retval "version string" The library version as a string + */ +char *mdb_version(int *major, int *minor, int *patch); + + /** @brief Return a string describing a given error code. + * + * This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3) + * function. If the error code is greater than or equal to 0, then the string + * returned by the system function strerror(3) is returned. If the error code + * is less than 0, an error string corresponding to the LMDB library error is + * returned. See @ref errors for a list of LMDB-specific error codes. + * @param[in] err The error code + * @retval "error message" The description of the error + */ +char *mdb_strerror(int err); + + /** @brief Create an LMDB environment handle. + * + * This function allocates memory for a #MDB_env structure. To release + * the allocated memory and discard the handle, call #mdb_env_close(). + * Before the handle may be used, it must be opened using #mdb_env_open(). + * Various other options may also need to be set before opening the handle, + * e.g. #mdb_env_set_mapsize(), #mdb_env_set_maxreaders(), #mdb_env_set_maxdbs(), + * depending on usage requirements. + * @param[out] env The address where the new handle will be stored + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_create(MDB_env **env); + + /** @brief Open an environment handle. + * + * If this function fails, #mdb_env_close() must be called to discard the #MDB_env handle. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] path The directory in which the database files reside. This + * directory must already exist and be writable. + * @param[in] flags Special options for this environment. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * Flags set by mdb_env_set_flags() are also used. + * <ul> + * <li>#MDB_FIXEDMAP + * use a fixed address for the mmap region. This flag must be specified + * when creating the environment, and is stored persistently in the environment. + * If successful, the memory map will always reside at the same virtual address + * and pointers used to reference data items in the database will be constant + * across multiple invocations. This option may not always work, depending on + * how the operating system has allocated memory to shared libraries and other uses. + * The feature is highly experimental. + * <li>#MDB_NOSUBDIR + * By default, LMDB creates its environment in a directory whose + * pathname is given in \b path, and creates its data and lock files + * under that directory. With this option, \b path is used as-is for + * the database main data file. The database lock file is the \b path + * with "-lock" appended. + * <li>#MDB_RDONLY + * Open the environment in read-only mode. No write operations will be + * allowed. LMDB will still modify the lock file - except on read-only + * filesystems, where LMDB does not use locks. + * <li>#MDB_WRITEMAP + * Use a writeable memory map unless MDB_RDONLY is set. This is faster + * and uses fewer mallocs, but loses protection from application bugs + * like wild pointer writes and other bad updates into the database. + * Incompatible with nested transactions. + * Do not mix processes with and without MDB_WRITEMAP on the same + * environment. This can defeat durability (#mdb_env_sync etc). + * <li>#MDB_NOMETASYNC + * Flush system buffers to disk only once per transaction, omit the + * metadata flush. Defer that until the system flushes files to disk, + * or next non-MDB_RDONLY commit or #mdb_env_sync(). This optimization + * maintains database integrity, but a system crash may undo the last + * committed transaction. I.e. it preserves the ACI (atomicity, + * consistency, isolation) but not D (durability) database property. + * This flag may be changed at any time using #mdb_env_set_flags(). + * <li>#MDB_NOSYNC + * Don't flush system buffers to disk when committing a transaction. + * This optimization means a system crash can corrupt the database or + * lose the last transactions if buffers are not yet flushed to disk. + * The risk is governed by how often the system flushes dirty buffers + * to disk and how often #mdb_env_sync() is called. However, if the + * filesystem preserves write order and the #MDB_WRITEMAP flag is not + * used, transactions exhibit ACI (atomicity, consistency, isolation) + * properties and only lose D (durability). I.e. database integrity + * is maintained, but a system crash may undo the final transactions. + * Note that (#MDB_NOSYNC | #MDB_WRITEMAP) leaves the system with no + * hint for when to write transactions to disk, unless #mdb_env_sync() + * is called. (#MDB_MAPASYNC | #MDB_WRITEMAP) may be preferable. + * This flag may be changed at any time using #mdb_env_set_flags(). + * <li>#MDB_MAPASYNC + * When using #MDB_WRITEMAP, use asynchronous flushes to disk. + * As with #MDB_NOSYNC, a system crash can then corrupt the + * database or lose the last transactions. Calling #mdb_env_sync() + * ensures on-disk database integrity until next commit. + * This flag may be changed at any time using #mdb_env_set_flags(). + * <li>#MDB_NOTLS + * Don't use Thread-Local Storage. Tie reader locktable slots to + * #MDB_txn objects instead of to threads. I.e. #mdb_txn_reset() keeps + * the slot reseved for the #MDB_txn object. A thread may use parallel + * read-only transactions. A read-only transaction may span threads if + * the user synchronizes its use. Applications that multiplex many + * user threads over individual OS threads need this option. Such an + * application must also serialize the write transactions in an OS + * thread, since LMDB's write locking is unaware of the user threads. + * <li>#MDB_NOLOCK + * Don't do any locking. If concurrent access is anticipated, the + * caller must manage all concurrency itself. For proper operation + * the caller must enforce single-writer semantics, and must ensure + * that no readers are using old transactions while a writer is + * active. The simplest approach is to use an exclusive lock so that + * no readers may be active at all when a writer begins. + * <li>#MDB_NORDAHEAD + * Turn off readahead. Most operating systems perform readahead on + * read requests by default. This option turns it off if the OS + * supports it. Turning it off may help random read performance + * when the DB is larger than RAM and system RAM is full. + * The option is not implemented on Windows. + * <li>#MDB_NOMEMINIT + * Don't initialize malloc'd memory before writing to unused spaces + * in the data file. By default, memory for pages written to the data + * file is obtained using malloc. While these pages may be reused in + * subsequent transactions, freshly malloc'd pages will be initialized + * to zeroes before use. This avoids persisting leftover data from other + * code (that used the heap and subsequently freed the memory) into the + * data file. Note that many other system libraries may allocate + * and free memory from the heap for arbitrary uses. E.g., stdio may + * use the heap for file I/O buffers. This initialization step has a + * modest performance cost so some applications may want to disable + * it using this flag. This option can be a problem for applications + * which handle sensitive data like passwords, and it makes memory + * checkers like Valgrind noisy. This flag is not needed with #MDB_WRITEMAP, + * which writes directly to the mmap instead of using malloc for pages. The + * initialization is also skipped if #MDB_RESERVE is used; the + * caller is expected to overwrite all of the memory that was + * reserved in that case. + * This flag may be changed at any time using #mdb_env_set_flags(). + * </ul> + * @param[in] mode The UNIX permissions to set on created files. This parameter + * is ignored on Windows. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_VERSION_MISMATCH - the version of the LMDB library doesn't match the + * version that created the database environment. + * <li>#MDB_INVALID - the environment file headers are corrupted. + * <li>ENOENT - the directory specified by the path parameter doesn't exist. + * <li>EACCES - the user didn't have permission to access the environment files. + * <li>EAGAIN - the environment was locked by another process. + * </ul> + */ +int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode); + + /** @brief Copy an LMDB environment to the specified path. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] path The directory in which the copy will reside. This + * directory must already exist and be writable but must otherwise be + * empty. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copy(MDB_env *env, const char *path); + + /** @brief Copy an LMDB environment to the specified file descriptor. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] fd The filedescriptor to write the copy to. It must + * have already been opened for Write access. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); + + /** @brief Copy an LMDB environment to the specified path, with options. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] path The directory in which the copy will reside. This + * directory must already exist and be writable but must otherwise be + * empty. + * @param[in] flags Special options for this operation. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * <ul> + * <li>#MDB_CP_COMPACT - Perform compaction while copying: omit free + * pages and sequentially renumber all pages in output. This option + * consumes more CPU and runs more slowly than the default. + * </ul> + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags); + + /** @brief Copy an LMDB environment to the specified file descriptor, + * with options. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. See + * #mdb_env_copy2() for further details. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] fd The filedescriptor to write the copy to. It must + * have already been opened for Write access. + * @param[in] flags Special options for this operation. + * See #mdb_env_copy2() for options. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd, unsigned int flags); + + /** @brief Return statistics about the LMDB environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] stat The address of an #MDB_stat structure + * where the statistics will be copied + */ +int mdb_env_stat(MDB_env *env, MDB_stat *stat); + + /** @brief Return information about the LMDB environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] stat The address of an #MDB_envinfo structure + * where the information will be copied + */ +int mdb_env_info(MDB_env *env, MDB_envinfo *stat); + + /** @brief Flush the data buffers to disk. + * + * Data is always written to disk when #mdb_txn_commit() is called, + * but the operating system may keep it buffered. LMDB always flushes + * the OS buffers upon commit as well, unless the environment was + * opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC. This call is + * not valid if the environment was opened with #MDB_RDONLY. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] force If non-zero, force a synchronous flush. Otherwise + * if the environment has the #MDB_NOSYNC flag set the flushes + * will be omitted, and with #MDB_MAPASYNC they will be asynchronous. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EACCES - the environment is read-only. + * <li>EINVAL - an invalid parameter was specified. + * <li>EIO - an error occurred during synchronization. + * </ul> + */ +int mdb_env_sync(MDB_env *env, int force); + + /** @brief Close the environment and release the memory map. + * + * Only a single thread may call this function. All transactions, databases, + * and cursors must already be closed before calling this function. Attempts to + * use any such handles after calling this function will cause a SIGSEGV. + * The environment handle will be freed and must not be used again after this call. + * @param[in] env An environment handle returned by #mdb_env_create() + */ +void mdb_env_close(MDB_env *env); + + /** @brief Set environment flags. + * + * This may be used to set some flags in addition to those from + * #mdb_env_open(), or to unset these flags. If several threads + * change the flags at the same time, the result is undefined. + * Most flags cannot be changed after #mdb_env_open(). + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] flags The flags to change, bitwise OR'ed together + * @param[in] onoff A non-zero value sets the flags, zero clears them. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_env_set_flags(MDB_env *env, unsigned int flags, int onoff); + + /** @brief Get environment flags. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] flags The address of an integer to store the flags + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_env_get_flags(MDB_env *env, unsigned int *flags); + + /** @brief Return the path that was used in #mdb_env_open(). + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] path Address of a string pointer to contain the path. This + * is the actual string in the environment, not a copy. It should not be + * altered in any way. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_env_get_path(MDB_env *env, const char **path); + + /** @brief Return the filedescriptor for the given environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] fd Address of a mdb_filehandle_t to contain the descriptor. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd); + + /** @brief Set the size of the memory map to use for this environment. + * + * The size should be a multiple of the OS page size. The default is + * 10485760 bytes. The size of the memory map is also the maximum size + * of the database. The value should be chosen as large as possible, + * to accommodate future growth of the database. + * This function should be called after #mdb_env_create() and before #mdb_env_open(). + * It may be called at later times if no transactions are active in + * this process. Note that the library does not check for this condition, + * the caller must ensure it explicitly. + * + * The new size takes effect immediately for the current process but + * will not be persisted to any others until a write transaction has been + * committed by the current process. Also, only mapsize increases are + * persisted into the environment. + * + * If the mapsize is increased by another process, and data has grown + * beyond the range of the current mapsize, #mdb_txn_begin() will + * return #MDB_MAP_RESIZED. This function may be called with a size + * of zero to adopt the new size. + * + * Any attempt to set a size smaller than the space already consumed + * by the environment will be silently changed to the current size of the used space. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] size The size in bytes + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified, or the environment has + * an active write transaction. + * </ul> + */ +int mdb_env_set_mapsize(MDB_env *env, size_t size); + + /** @brief Set the maximum number of threads/reader slots for the environment. + * + * This defines the number of slots in the lock table that is used to track readers in the + * the environment. The default is 126. + * Starting a read-only transaction normally ties a lock table slot to the + * current thread until the environment closes or the thread exits. If + * MDB_NOTLS is in use, #mdb_txn_begin() instead ties the slot to the + * MDB_txn object until it or the #MDB_env object is destroyed. + * This function may only be called after #mdb_env_create() and before #mdb_env_open(). + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] readers The maximum number of reader lock table slots + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified, or the environment is already open. + * </ul> + */ +int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers); + + /** @brief Get the maximum number of threads/reader slots for the environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] readers Address of an integer to store the number of readers + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers); + + /** @brief Set the maximum number of named databases for the environment. + * + * This function is only needed if multiple databases will be used in the + * environment. Simpler applications that use the environment as a single + * unnamed database can ignore this option. + * This function may only be called after #mdb_env_create() and before #mdb_env_open(). + * + * Currently a moderate number of slots are cheap but a huge number gets + * expensive: 7-120 words per transaction, and every #mdb_dbi_open() + * does a linear search of the opened slots. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] dbs The maximum number of databases + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified, or the environment is already open. + * </ul> + */ +int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs); + + /** @brief Get the maximum size of keys and #MDB_DUPSORT data we can write. + * + * Depends on the compile-time constant #MDB_MAXKEYSIZE. Default 511. + * See @ref MDB_val. + * @param[in] env An environment handle returned by #mdb_env_create() + * @return The maximum size of a key we can write + */ +int mdb_env_get_maxkeysize(MDB_env *env); + + /** @brief Set application information associated with the #MDB_env. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] ctx An arbitrary pointer for whatever the application needs. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_set_userctx(MDB_env *env, void *ctx); + + /** @brief Get the application information associated with the #MDB_env. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @return The pointer set by #mdb_env_set_userctx(). + */ +void *mdb_env_get_userctx(MDB_env *env); + + /** @brief A callback function for most LMDB assert() failures, + * called before printing the message and aborting. + * + * @param[in] env An environment handle returned by #mdb_env_create(). + * @param[in] msg The assertion message, not including newline. + */ +typedef void MDB_assert_func(MDB_env *env, const char *msg); + + /** Set or reset the assert() callback of the environment. + * Disabled if liblmdb is buillt with NDEBUG. + * @note This hack should become obsolete as lmdb's error handling matures. + * @param[in] env An environment handle returned by #mdb_env_create(). + * @param[in] func An #MDB_assert_func function, or 0. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_set_assert(MDB_env *env, MDB_assert_func *func); + + /** @brief Create a transaction for use with the environment. + * + * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit(). + * @note A transaction and its cursors must only be used by a single + * thread, and a thread may only have a single transaction at a time. + * If #MDB_NOTLS is in use, this does not apply to read-only transactions. + * @note Cursors may not span transactions. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] parent If this parameter is non-NULL, the new transaction + * will be a nested transaction, with the transaction indicated by \b parent + * as its parent. Transactions may be nested to any level. A parent + * transaction and its cursors may not issue any other operations than + * mdb_txn_commit and mdb_txn_abort while it has active child transactions. + * @param[in] flags Special options for this transaction. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * <ul> + * <li>#MDB_RDONLY + * This transaction will not perform any write operations. + * </ul> + * @param[out] txn Address where the new #MDB_txn handle will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_PANIC - a fatal error occurred earlier and the environment + * must be shut down. + * <li>#MDB_MAP_RESIZED - another process wrote data beyond this MDB_env's + * mapsize and this environment's map must be resized as well. + * See #mdb_env_set_mapsize(). + * <li>#MDB_READERS_FULL - a read-only transaction was requested and + * the reader lock table is full. See #mdb_env_set_maxreaders(). + * <li>ENOMEM - out of memory. + * </ul> + */ +int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn); + + /** @brief Returns the transaction's #MDB_env + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + */ +MDB_env *mdb_txn_env(MDB_txn *txn); + + /** @brief Return the transaction's ID. + * + * This returns the identifier associated with this transaction. For a + * read-only transaction, this corresponds to the snapshot being read; + * concurrent readers will frequently have the same transaction ID. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @return A transaction ID, valid if input is an active transaction. + */ +size_t mdb_txn_id(MDB_txn *txn); + + /** @brief Commit all the operations of a transaction into the database. + * + * The transaction handle is freed. It and its cursors must not be used + * again after this call, except with #mdb_cursor_renew(). + * @note Earlier documentation incorrectly said all cursors would be freed. + * Only write-transactions free cursors. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * <li>ENOSPC - no more disk space. + * <li>EIO - a low-level I/O error occurred while writing. + * <li>ENOMEM - out of memory. + * </ul> + */ +int mdb_txn_commit(MDB_txn *txn); + + /** @brief Abandon all the operations of the transaction instead of saving them. + * + * The transaction handle is freed. It and its cursors must not be used + * again after this call, except with #mdb_cursor_renew(). + * @note Earlier documentation incorrectly said all cursors would be freed. + * Only write-transactions free cursors. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + */ +void mdb_txn_abort(MDB_txn *txn); + + /** @brief Reset a read-only transaction. + * + * Abort the transaction like #mdb_txn_abort(), but keep the transaction + * handle. #mdb_txn_renew() may reuse the handle. This saves allocation + * overhead if the process will start a new read-only transaction soon, + * and also locking overhead if #MDB_NOTLS is in use. The reader table + * lock is released, but the table slot stays tied to its thread or + * #MDB_txn. Use mdb_txn_abort() to discard a reset handle, and to free + * its lock table slot if MDB_NOTLS is in use. + * Cursors opened within the transaction must not be used + * again after this call, except with #mdb_cursor_renew(). + * Reader locks generally don't interfere with writers, but they keep old + * versions of database pages allocated. Thus they prevent the old pages + * from being reused when writers commit new data, and so under heavy load + * the database size may grow much more rapidly than otherwise. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + */ +void mdb_txn_reset(MDB_txn *txn); + + /** @brief Renew a read-only transaction. + * + * This acquires a new reader lock for a transaction handle that had been + * released by #mdb_txn_reset(). It must be called before a reset transaction + * may be used again. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_PANIC - a fatal error occurred earlier and the environment + * must be shut down. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_txn_renew(MDB_txn *txn); + +/** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */ +#define mdb_open(txn,name,flags,dbi) mdb_dbi_open(txn,name,flags,dbi) +/** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */ +#define mdb_close(env,dbi) mdb_dbi_close(env,dbi) + + /** @brief Open a database in the environment. + * + * A database handle denotes the name and parameters of a database, + * independently of whether such a database exists. + * The database handle may be discarded by calling #mdb_dbi_close(). + * The old database handle is returned if the database was already open. + * The handle may only be closed once. + * The database handle will be private to the current transaction until + * the transaction is successfully committed. If the transaction is + * aborted the handle will be closed automatically. + * After a successful commit the + * handle will reside in the shared environment, and may be used + * by other transactions. This function must not be called from + * multiple concurrent transactions in the same process. A transaction + * that uses this function must finish (either commit or abort) before + * any other transaction in the process may use this function. + * + * To use named databases (with name != NULL), #mdb_env_set_maxdbs() + * must be called before opening the environment. Database names + * are kept as keys in the unnamed database. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] name The name of the database to open. If only a single + * database is needed in the environment, this value may be NULL. + * @param[in] flags Special options for this database. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * <ul> + * <li>#MDB_REVERSEKEY + * Keys are strings to be compared in reverse order, from the end + * of the strings to the beginning. By default, Keys are treated as strings and + * compared from beginning to end. + * <li>#MDB_DUPSORT + * Duplicate keys may be used in the database. (Or, from another perspective, + * keys may have multiple data items, stored in sorted order.) By default + * keys must be unique and may have only a single data item. + * <li>#MDB_INTEGERKEY + * Keys are binary integers in native byte order. Setting this option + * requires all keys to be the same size, typically sizeof(int) + * or sizeof(size_t). + * <li>#MDB_DUPFIXED + * This flag may only be used in combination with #MDB_DUPSORT. This option + * tells the library that the data items for this database are all the same + * size, which allows further optimizations in storage and retrieval. When + * all data items are the same size, the #MDB_GET_MULTIPLE and #MDB_NEXT_MULTIPLE + * cursor operations may be used to retrieve multiple items at once. + * <li>#MDB_INTEGERDUP + * This option specifies that duplicate data items are also integers, and + * should be sorted as such. + * <li>#MDB_REVERSEDUP + * This option specifies that duplicate data items should be compared as + * strings in reverse order. + * <li>#MDB_CREATE + * Create the named database if it doesn't exist. This option is not + * allowed in a read-only transaction or a read-only environment. + * </ul> + * @param[out] dbi Address where the new #MDB_dbi handle will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_NOTFOUND - the specified database doesn't exist in the environment + * and #MDB_CREATE was not specified. + * <li>#MDB_DBS_FULL - too many databases have been opened. See #mdb_env_set_maxdbs(). + * </ul> + */ +int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi); + + /** @brief Retrieve statistics for a database. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[out] stat The address of an #MDB_stat structure + * where the statistics will be copied + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat); + + /** @brief Retrieve the DB flags for a database handle. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[out] flags Address where the flags will be returned. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags); + + /** @brief Close a database handle. Normally unnecessary. Use with care: + * + * This call is not mutex protected. Handles should only be closed by + * a single thread, and only if no other threads are going to reference + * the database handle or one of its cursors any further. Do not close + * a handle if an existing transaction has modified its database. + * Doing so can cause misbehavior from database corruption to errors + * like MDB_BAD_VALSIZE (since the DB name is gone). + * + * Closing a database handle is not necessary, but lets #mdb_dbi_open() + * reuse the handle value. Usually it's better to set a bigger + * #mdb_env_set_maxdbs(), unless that value would be large. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + */ +void mdb_dbi_close(MDB_env *env, MDB_dbi dbi); + + /** @brief Empty or delete+close a database. + * + * See #mdb_dbi_close() for restrictions about closing the DB handle. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] del 0 to empty the DB, 1 to delete it from the + * environment and close the DB handle. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del); + + /** @brief Set a custom key comparison function for a database. + * + * The comparison function is called whenever it is necessary to compare a + * key specified by the application with a key currently stored in the database. + * If no comparison function is specified, and no special key flags were specified + * with #mdb_dbi_open(), the keys are compared lexically, with shorter keys collating + * before longer keys. + * @warning This function must be called before any data access functions are used, + * otherwise data corruption may occur. The same comparison function must be used by every + * program accessing the database, every time the database is used. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] cmp A #MDB_cmp_func function + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp); + + /** @brief Set a custom data comparison function for a #MDB_DUPSORT database. + * + * This comparison function is called whenever it is necessary to compare a data + * item specified by the application with a data item currently stored in the database. + * This function only takes effect if the database was opened with the #MDB_DUPSORT + * flag. + * If no comparison function is specified, and no special key flags were specified + * with #mdb_dbi_open(), the data items are compared lexically, with shorter items collating + * before longer items. + * @warning This function must be called before any data access functions are used, + * otherwise data corruption may occur. The same comparison function must be used by every + * program accessing the database, every time the database is used. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] cmp A #MDB_cmp_func function + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp); + + /** @brief Set a relocation function for a #MDB_FIXEDMAP database. + * + * @todo The relocation function is called whenever it is necessary to move the data + * of an item to a different position in the database (e.g. through tree + * balancing operations, shifts as a result of adds or deletes, etc.). It is + * intended to allow address/position-dependent data items to be stored in + * a database in an environment opened with the #MDB_FIXEDMAP option. + * Currently the relocation feature is unimplemented and setting + * this function has no effect. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] rel A #MDB_rel_func function + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel); + + /** @brief Set a context pointer for a #MDB_FIXEDMAP database's relocation function. + * + * See #mdb_set_relfunc and #MDB_rel_func for more details. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] ctx An arbitrary pointer for whatever the application needs. + * It will be passed to the callback function set by #mdb_set_relfunc + * as its \b relctx parameter whenever the callback is invoked. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx); + + /** @brief Get items from a database. + * + * This function retrieves key/data pairs from the database. The address + * and length of the data associated with the specified \b key are returned + * in the structure to which \b data refers. + * If the database supports duplicate keys (#MDB_DUPSORT) then the + * first data item for the key will be returned. Retrieval of other + * items requires the use of #mdb_cursor_get(). + * + * @note The memory pointed to by the returned values is owned by the + * database. The caller need not dispose of the memory, and may not + * modify it in any way. For values returned in a read-only transaction + * any modification attempts will cause a SIGSEGV. + * @note Values returned from the database are valid only until a + * subsequent update operation, or the end of the transaction. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to search for in the database + * @param[out] data The data corresponding to the key + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_NOTFOUND - the key was not in the database. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); + + /** @brief Store items into a database. + * + * This function stores key/data pairs in the database. The default behavior + * is to enter the new key/data pair, replacing any previously existing key + * if duplicates are disallowed, or adding a duplicate data item if + * duplicates are allowed (#MDB_DUPSORT). + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to store in the database + * @param[in,out] data The data to store + * @param[in] flags Special options for this operation. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * <ul> + * <li>#MDB_NODUPDATA - enter the new key/data pair only if it does not + * already appear in the database. This flag may only be specified + * if the database was opened with #MDB_DUPSORT. The function will + * return #MDB_KEYEXIST if the key/data pair already appears in the + * database. + * <li>#MDB_NOOVERWRITE - enter the new key/data pair only if the key + * does not already appear in the database. The function will return + * #MDB_KEYEXIST if the key already appears in the database, even if + * the database supports duplicates (#MDB_DUPSORT). The \b data + * parameter will be set to point to the existing item. + * <li>#MDB_RESERVE - reserve space for data of the given size, but + * don't copy the given data. Instead, return a pointer to the + * reserved space, which the caller can fill in later - before + * the next update operation or the transaction ends. This saves + * an extra memcpy if the data is being generated later. + * LMDB does nothing else with this memory, the caller is expected + * to modify all of the space requested. + * <li>#MDB_APPEND - append the given key/data pair to the end of the + * database. This option allows fast bulk loading when keys are + * already known to be in the correct order. Loading unsorted keys + * with this flag will cause a #MDB_KEYEXIST error. + * <li>#MDB_APPENDDUP - as above, but for sorted dup data. + * </ul> + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). + * <li>#MDB_TXN_FULL - the transaction has too many dirty pages. + * <li>EACCES - an attempt was made to write in a read-only transaction. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + unsigned int flags); + + /** @brief Delete items from a database. + * + * This function removes key/data pairs from the database. + * If the database does not support sorted duplicate data items + * (#MDB_DUPSORT) the data parameter is ignored. + * If the database supports sorted duplicates and the data parameter + * is NULL, all of the duplicate data items for the key will be + * deleted. Otherwise, if the data parameter is non-NULL + * only the matching data item will be deleted. + * This function will return #MDB_NOTFOUND if the specified key/data + * pair is not in the database. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to delete from the database + * @param[in] data The data to delete + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EACCES - an attempt was made to write in a read-only transaction. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_del(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); + + /** @brief Create a cursor handle. + * + * A cursor is associated with a specific transaction and database. + * A cursor cannot be used when its database handle is closed. Nor + * when its transaction has ended, except with #mdb_cursor_renew(). + * It can be discarded with #mdb_cursor_close(). + * A cursor in a write-transaction can be closed before its transaction + * ends, and will otherwise be closed when its transaction ends. + * A cursor in a read-only transaction must be closed explicitly, before + * or after its transaction ends. It can be reused with + * #mdb_cursor_renew() before finally closing it. + * @note Earlier documentation said that cursors in every transaction + * were closed when the transaction committed or aborted. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[out] cursor Address where the new #MDB_cursor handle will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **cursor); + + /** @brief Close a cursor handle. + * + * The cursor handle will be freed and must not be used again after this call. + * Its transaction must still be live if it is a write-transaction. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + */ +void mdb_cursor_close(MDB_cursor *cursor); + + /** @brief Renew a cursor handle. + * + * A cursor is associated with a specific transaction and database. + * Cursors that are only used in read-only + * transactions may be re-used, to avoid unnecessary malloc/free overhead. + * The cursor may be associated with a new read-only transaction, and + * referencing the same database handle as it was created with. + * This may be done whether the previous transaction is live or dead. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_renew(MDB_txn *txn, MDB_cursor *cursor); + + /** @brief Return the cursor's transaction handle. + * + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + */ +MDB_txn *mdb_cursor_txn(MDB_cursor *cursor); + + /** @brief Return the cursor's database handle. + * + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + */ +MDB_dbi mdb_cursor_dbi(MDB_cursor *cursor); + + /** @brief Retrieve by cursor. + * + * This function retrieves key/data pairs from the database. The address and length + * of the key are returned in the object to which \b key refers (except for the + * case of the #MDB_SET option, in which the \b key object is unchanged), and + * the address and length of the data are returned in the object to which \b data + * refers. + * See #mdb_get() for restrictions on using the output values. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in,out] key The key for a retrieved item + * @param[in,out] data The data of a retrieved item + * @param[in] op A cursor operation #MDB_cursor_op + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_NOTFOUND - no matching key found. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + MDB_cursor_op op); + + /** @brief Store by cursor. + * + * This function stores key/data pairs into the database. + * The cursor is positioned at the new item, or on failure usually near it. + * @note Earlier documentation incorrectly said errors would leave the + * state of the cursor unchanged. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in] key The key operated on. + * @param[in] data The data operated on. + * @param[in] flags Options for this operation. This parameter + * must be set to 0 or one of the values described here. + * <ul> + * <li>#MDB_CURRENT - replace the item at the current cursor position. + * The \b key parameter must still be provided, and must match it. + * If using sorted duplicates (#MDB_DUPSORT) the data item must still + * sort into the same place. This is intended to be used when the + * new data is the same size as the old. Otherwise it will simply + * perform a delete of the old record followed by an insert. + * <li>#MDB_NODUPDATA - enter the new key/data pair only if it does not + * already appear in the database. This flag may only be specified + * if the database was opened with #MDB_DUPSORT. The function will + * return #MDB_KEYEXIST if the key/data pair already appears in the + * database. + * <li>#MDB_NOOVERWRITE - enter the new key/data pair only if the key + * does not already appear in the database. The function will return + * #MDB_KEYEXIST if the key already appears in the database, even if + * the database supports duplicates (#MDB_DUPSORT). + * <li>#MDB_RESERVE - reserve space for data of the given size, but + * don't copy the given data. Instead, return a pointer to the + * reserved space, which the caller can fill in later. This saves + * an extra memcpy if the data is being generated later. + * <li>#MDB_APPEND - append the given key/data pair to the end of the + * database. No key comparisons are performed. This option allows + * fast bulk loading when keys are already known to be in the + * correct order. Loading unsorted keys with this flag will cause + * data corruption. + * <li>#MDB_APPENDDUP - as above, but for sorted dup data. + * <li>#MDB_MULTIPLE - store multiple contiguous data elements in a + * single request. This flag may only be specified if the database + * was opened with #MDB_DUPFIXED. The \b data argument must be an + * array of two MDB_vals. The mv_size of the first MDB_val must be + * the size of a single data element. The mv_data of the first MDB_val + * must point to the beginning of the array of contiguous data elements. + * The mv_size of the second MDB_val must be the count of the number + * of data elements to store. On return this field will be set to + * the count of the number of elements actually written. The mv_data + * of the second MDB_val is unused. + * </ul> + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). + * <li>#MDB_TXN_FULL - the transaction has too many dirty pages. + * <li>EACCES - an attempt was made to modify a read-only database. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + unsigned int flags); + + /** @brief Delete current key/data pair + * + * This function deletes the key/data pair to which the cursor refers. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in] flags Options for this operation. This parameter + * must be set to 0 or one of the values described here. + * <ul> + * <li>#MDB_NODUPDATA - delete all of the data items for the current key. + * This flag may only be specified if the database was opened with #MDB_DUPSORT. + * </ul> + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EACCES - an attempt was made to modify a read-only database. + * <li>EINVAL - an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_del(MDB_cursor *cursor, unsigned int flags); + + /** @brief Return count of duplicates for current key. + * + * This call is only valid on databases that support sorted duplicate + * data items #MDB_DUPSORT. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[out] countp Address where the count will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * <ul> + * <li>EINVAL - cursor is not initialized, or an invalid parameter was specified. + * </ul> + */ +int mdb_cursor_count(MDB_cursor *cursor, size_t *countp); + + /** @brief Compare two data items according to a particular database. + * + * This returns a comparison as if the two data items were keys in the + * specified database. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] a The first item to compare + * @param[in] b The second item to compare + * @return < 0 if a < b, 0 if a == b, > 0 if a > b + */ +int mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); + + /** @brief Compare two data items according to a particular database. + * + * This returns a comparison as if the two items were data items of + * the specified database. The database must have the #MDB_DUPSORT flag. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] a The first item to compare + * @param[in] b The second item to compare + * @return < 0 if a < b, 0 if a == b, > 0 if a > b + */ +int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); + + /** @brief A callback function used to print a message from the library. + * + * @param[in] msg The string to be printed. + * @param[in] ctx An arbitrary context pointer for the callback. + * @return < 0 on failure, >= 0 on success. + */ +typedef int (MDB_msg_func)(const char *msg, void *ctx); + + /** @brief Dump the entries in the reader lock table. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] func A #MDB_msg_func function + * @param[in] ctx Anything the message function needs + * @return < 0 on failure, >= 0 on success. + */ +int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx); + + /** @brief Check for stale entries in the reader lock table. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] dead Number of stale slots that were cleared + * @return 0 on success, non-zero on failure. + */ +int mdb_reader_check(MDB_env *env, int *dead); +/** @} */ + +#ifdef __cplusplus +} +#endif +/** @page tools LMDB Command Line Tools + The following describes the command line tools that are available for LMDB. + \li \ref mdb_copy_1 + \li \ref mdb_dump_1 + \li \ref mdb_load_1 + \li \ref mdb_stat_1 +*/ + +#endif /* _LMDB_H_ */ diff --git a/external/db_drivers/liblmdb64/mdb.c b/external/db_drivers/liblmdb64/mdb.c new file mode 100644 index 000000000..c4dc26903 --- /dev/null +++ b/external/db_drivers/liblmdb64/mdb.c @@ -0,0 +1,9665 @@ +/** @file mdb.c + * @brief Lightning memory-mapped database library + * + * A Btree-based database management library modeled loosely on the + * BerkeleyDB API, but much simplified. + */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + * + * This code is derived from btree.c written by Martin Hedenfalk. + * + * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +#ifdef _WIN32 +#include <malloc.h> +#include <windows.h> +/** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it + * as int64 which is wrong. MSVC doesn't define it at all, so just + * don't use it. + */ +#define MDB_PID_T int +#define MDB_THR_T DWORD +#include <sys/types.h> +#include <sys/stat.h> +#ifdef __GNUC__ +# include <sys/param.h> +#else +# define LITTLE_ENDIAN 1234 +# define BIG_ENDIAN 4321 +# define BYTE_ORDER LITTLE_ENDIAN +# ifndef SSIZE_MAX +# define SSIZE_MAX INT_MAX +# endif +#endif +#else +#include <sys/types.h> +#include <sys/stat.h> +#define MDB_PID_T pid_t +#define MDB_THR_T pthread_t +#include <sys/param.h> +#include <sys/uio.h> +#include <sys/mman.h> +#ifdef HAVE_SYS_FILE_H +#include <sys/file.h> +#endif +#include <fcntl.h> +#endif + +#if defined(__mips) && defined(__linux) +/* MIPS has cache coherency issues, requires explicit cache control */ +#include <asm/cachectl.h> +extern int cacheflush(char *addr, int nbytes, int cache); +#define CACHEFLUSH(addr, bytes, cache) cacheflush(addr, bytes, cache) +#else +#define CACHEFLUSH(addr, bytes, cache) +#endif + +#if defined(__linux) && !defined(MDB_FDATASYNC_WORKS) +/** fdatasync is broken on ext3/ext4fs on older kernels, see + * description in #mdb_env_open2 comments. You can safely + * define MDB_FDATASYNC_WORKS if this code will only be run + * on kernels 3.6 and newer. + */ +#define BROKEN_FDATASYNC +#endif + +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#if defined(__sun) || defined(ANDROID) +/* Most platforms have posix_memalign, older may only have memalign */ +#define HAVE_MEMALIGN 1 +#include <malloc.h> +#endif + +#if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER)) +#include <netinet/in.h> +#include <resolv.h> /* defines BYTE_ORDER on HPUX and Solaris */ +#endif + +#if defined(__APPLE__) || defined (BSD) +# define MDB_USE_SYSV_SEM 1 +# define MDB_FDATASYNC fsync +#elif defined(ANDROID) +# define MDB_FDATASYNC fsync +#endif + +#ifndef _WIN32 +#include <pthread.h> +#ifdef MDB_USE_SYSV_SEM +#include <sys/ipc.h> +#include <sys/sem.h> +#ifdef _SEM_SEMUN_UNDEFINED +union semun { + int val; + struct semid_ds *buf; + unsigned short *array; +}; +#endif /* _SEM_SEMUN_UNDEFINED */ +#endif /* MDB_USE_SYSV_SEM */ +#endif /* !_WIN32 */ + +#ifdef USE_VALGRIND +#include <valgrind/memcheck.h> +#define VGMEMP_CREATE(h,r,z) VALGRIND_CREATE_MEMPOOL(h,r,z) +#define VGMEMP_ALLOC(h,a,s) VALGRIND_MEMPOOL_ALLOC(h,a,s) +#define VGMEMP_FREE(h,a) VALGRIND_MEMPOOL_FREE(h,a) +#define VGMEMP_DESTROY(h) VALGRIND_DESTROY_MEMPOOL(h) +#define VGMEMP_DEFINED(a,s) VALGRIND_MAKE_MEM_DEFINED(a,s) +#else +#define VGMEMP_CREATE(h,r,z) +#define VGMEMP_ALLOC(h,a,s) +#define VGMEMP_FREE(h,a) +#define VGMEMP_DESTROY(h) +#define VGMEMP_DEFINED(a,s) +#endif + +#ifndef BYTE_ORDER +# if (defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)) +/* Solaris just defines one or the other */ +# define LITTLE_ENDIAN 1234 +# define BIG_ENDIAN 4321 +# ifdef _LITTLE_ENDIAN +# define BYTE_ORDER LITTLE_ENDIAN +# else +# define BYTE_ORDER BIG_ENDIAN +# endif +# else +# define BYTE_ORDER __BYTE_ORDER +# endif +#endif + +#ifndef LITTLE_ENDIAN +#define LITTLE_ENDIAN __LITTLE_ENDIAN +#endif +#ifndef BIG_ENDIAN +#define BIG_ENDIAN __BIG_ENDIAN +#endif + +#if defined(__i386) || defined(__x86_64) || defined(_M_IX86) +#define MISALIGNED_OK 1 +#endif + +#include "lmdb.h" +#include "midl.h" + +#if (BYTE_ORDER == LITTLE_ENDIAN) == (BYTE_ORDER == BIG_ENDIAN) +# error "Unknown or unsupported endianness (BYTE_ORDER)" +#elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF +# error "Two's complement, reasonably sized integer types, please" +#endif + +#ifdef __GNUC__ +/** Put infrequently used env functions in separate section */ +# ifdef __APPLE__ +# define ESECT __attribute__ ((section("__TEXT,text_env"))) +# else +# define ESECT __attribute__ ((section("text_env"))) +# endif +#else +#define ESECT +#endif + +/** @defgroup internal LMDB Internals + * @{ + */ +/** @defgroup compat Compatibility Macros + * A bunch of macros to minimize the amount of platform-specific ifdefs + * needed throughout the rest of the code. When the features this library + * needs are similar enough to POSIX to be hidden in a one-or-two line + * replacement, this macro approach is used. + * @{ + */ + + /** Features under development */ +#ifndef MDB_DEVEL +#define MDB_DEVEL 0 +#endif + +#if defined(_WIN32) || defined(MDB_USE_SYSV_SEM) || defined(EOWNERDEAD) +#define MDB_ROBUST_SUPPORTED 1 +#endif + + /** Wrapper around __func__, which is a C99 feature */ +#if __STDC_VERSION__ >= 199901L +# define mdb_func_ __func__ +#elif __GNUC__ >= 2 || _MSC_VER >= 1300 +# define mdb_func_ __FUNCTION__ +#else +/* If a debug message says <mdb_unknown>(), update the #if statements above */ +# define mdb_func_ "<mdb_unknown>" +#endif + +/* Internal error codes, not exposed outside liblmdb */ +#define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10) +#ifdef _WIN32 +#define MDB_OWNERDEAD ((int) WAIT_ABANDONED) +#elif defined MDB_USE_SYSV_SEM +#define MDB_OWNERDEAD (MDB_LAST_ERRCODE + 11) +#else +#define MDB_OWNERDEAD EOWNERDEAD +#endif + +#ifdef _WIN32 +#define MDB_USE_HASH 1 +#define MDB_PIDLOCK 0 +#define THREAD_RET DWORD +#define pthread_t HANDLE +#define pthread_mutex_t HANDLE +#define pthread_cond_t HANDLE +typedef HANDLE mdb_mutex_t; +#define pthread_key_t DWORD +#define pthread_self() GetCurrentThreadId() +#define pthread_key_create(x,y) \ + ((*(x) = TlsAlloc()) == TLS_OUT_OF_INDEXES ? ErrCode() : 0) +#define pthread_key_delete(x) TlsFree(x) +#define pthread_getspecific(x) TlsGetValue(x) +#define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode()) +#define pthread_mutex_unlock(x) ReleaseMutex(*x) +#define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE) +#define pthread_cond_signal(x) SetEvent(*x) +#define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0) +#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL) +#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE) +#define MDB_MUTEX(env, rw) ((env)->me_##rw##mutex) +#define LOCK_MUTEX0(mutex) WaitForSingleObject(mutex, INFINITE) +#define UNLOCK_MUTEX(mutex) ReleaseMutex(mutex) +#define mdb_mutex_consistent(mutex) 0 +#define getpid() GetCurrentProcessId() +#define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd)) +#define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len)) +#define ErrCode() GetLastError() +#define GET_PAGESIZE(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;} +#define close(fd) (CloseHandle(fd) ? 0 : -1) +#define munmap(ptr,len) UnmapViewOfFile(ptr) +#ifdef PROCESS_QUERY_LIMITED_INFORMATION +#define MDB_PROCESS_QUERY_LIMITED_INFORMATION PROCESS_QUERY_LIMITED_INFORMATION +#else +#define MDB_PROCESS_QUERY_LIMITED_INFORMATION 0x1000 +#endif +#define Z "I" +#else +#define THREAD_RET void * +#define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg) +#define THREAD_FINISH(thr) pthread_join(thr,NULL) +#define Z "z" /**< printf format modifier for size_t */ + + /** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */ +#define MDB_PIDLOCK 1 + +#ifdef MDB_USE_SYSV_SEM + +typedef struct mdb_mutex { + int semid; + int semnum; + int *locked; +} mdb_mutex_t; + +#define MDB_MUTEX(env, rw) (&(env)->me_##rw##mutex) +#define LOCK_MUTEX0(mutex) mdb_sem_wait(mutex) +#define UNLOCK_MUTEX(mutex) do { \ + struct sembuf sb = { 0, 1, SEM_UNDO }; \ + sb.sem_num = (mutex)->semnum; \ + *(mutex)->locked = 0; \ + semop((mutex)->semid, &sb, 1); \ +} while(0) + +static int +mdb_sem_wait(mdb_mutex_t *sem) +{ + int rc, *locked = sem->locked; + struct sembuf sb = { 0, -1, SEM_UNDO }; + sb.sem_num = sem->semnum; + do { + if (!semop(sem->semid, &sb, 1)) { + rc = *locked ? MDB_OWNERDEAD : MDB_SUCCESS; + *locked = 1; + break; + } + } while ((rc = errno) == EINTR); + return rc; +} + +#define mdb_mutex_consistent(mutex) 0 + +#else + /** Pointer/HANDLE type of shared mutex/semaphore. + */ +typedef pthread_mutex_t mdb_mutex_t; + /** Mutex for the reader table (rw = r) or write transaction (rw = w). + */ +#define MDB_MUTEX(env, rw) (&(env)->me_txns->mti_##rw##mutex) + /** Lock the reader or writer mutex. + * Returns 0 or a code to give #mdb_mutex_failed(), as in #LOCK_MUTEX(). + */ +#define LOCK_MUTEX0(mutex) pthread_mutex_lock(mutex) + /** Unlock the reader or writer mutex. + */ +#define UNLOCK_MUTEX(mutex) pthread_mutex_unlock(mutex) + /** Mark mutex-protected data as repaired, after death of previous owner. + */ +#define mdb_mutex_consistent(mutex) pthread_mutex_consistent(mutex) +#endif /* MDB_USE_SYSV_SEM */ + + /** Get the error code for the last failed system function. + */ +#define ErrCode() errno + + /** An abstraction for a file handle. + * On POSIX systems file handles are small integers. On Windows + * they're opaque pointers. + */ +#define HANDLE int + + /** A value for an invalid file handle. + * Mainly used to initialize file variables and signify that they are + * unused. + */ +#define INVALID_HANDLE_VALUE (-1) + + /** Get the size of a memory page for the system. + * This is the basic size that the platform's memory manager uses, and is + * fundamental to the use of memory-mapped files. + */ +#define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE)) +#endif + +#if defined(_WIN32) +#define MNAME_LEN 32 +#elif defined(MDB_USE_SYSV_SEM) +#define MNAME_LEN (sizeof(int)) +#else +#define MNAME_LEN (sizeof(pthread_mutex_t)) +#endif + +#ifdef MDB_USE_SYSV_SEM +#define SYSV_SEM_FLAG 1 /**< SysV sems in lockfile format */ +#else +#define SYSV_SEM_FLAG 0 +#endif + +/** @} */ + +#ifdef MDB_ROBUST_SUPPORTED + /** Lock mutex, handle any error, set rc = result. + * Return 0 on success, nonzero (not rc) on error. + */ +#define LOCK_MUTEX(rc, env, mutex) \ + (((rc) = LOCK_MUTEX0(mutex)) && \ + ((rc) = mdb_mutex_failed(env, mutex, rc))) +static int mdb_mutex_failed(MDB_env *env, mdb_mutex_t *mutex, int rc); +#else +#define LOCK_MUTEX(rc, env, mutex) ((rc) = LOCK_MUTEX0(mutex)) +#define mdb_mutex_failed(env, mutex, rc) (rc) +#endif + +#ifndef _WIN32 +/** A flag for opening a file and requesting synchronous data writes. + * This is only used when writing a meta page. It's not strictly needed; + * we could just do a normal write and then immediately perform a flush. + * But if this flag is available it saves us an extra system call. + * + * @note If O_DSYNC is undefined but exists in /usr/include, + * preferably set some compiler flag to get the definition. + * Otherwise compile with the less efficient -DMDB_DSYNC=O_SYNC. + */ +#ifndef MDB_DSYNC +# define MDB_DSYNC O_DSYNC +#endif +#endif + +/** Function for flushing the data of a file. Define this to fsync + * if fdatasync() is not supported. + */ +#ifndef MDB_FDATASYNC +# define MDB_FDATASYNC fdatasync +#endif + +#ifndef MDB_MSYNC +# define MDB_MSYNC(addr,len,flags) msync(addr,len,flags) +#endif + +#ifndef MS_SYNC +#define MS_SYNC 1 +#endif + +#ifndef MS_ASYNC +#define MS_ASYNC 0 +#endif + + /** A page number in the database. + * Note that 64 bit page numbers are overkill, since pages themselves + * already represent 12-13 bits of addressable memory, and the OS will + * always limit applications to a maximum of 63 bits of address space. + * + * @note In the #MDB_node structure, we only store 48 bits of this value, + * which thus limits us to only 60 bits of addressable data. + */ +typedef MDB_ID pgno_t; + + /** A transaction ID. + * See struct MDB_txn.mt_txnid for details. + */ +typedef MDB_ID txnid_t; + +/** @defgroup debug Debug Macros + * @{ + */ +#ifndef MDB_DEBUG + /** Enable debug output. Needs variable argument macros (a C99 feature). + * Set this to 1 for copious tracing. Set to 2 to add dumps of all IDLs + * read from and written to the database (used for free space management). + */ +#define MDB_DEBUG 0 +#endif + +#if MDB_DEBUG +static int mdb_debug; +static txnid_t mdb_debug_start; + + /** Print a debug message with printf formatting. + * Requires double parenthesis around 2 or more args. + */ +# define DPRINTF(args) ((void) ((mdb_debug) && DPRINTF0 args)) +# define DPRINTF0(fmt, ...) \ + fprintf(stderr, "%s:%d " fmt "\n", mdb_func_, __LINE__, __VA_ARGS__) +#else +# define DPRINTF(args) ((void) 0) +#endif + /** Print a debug string. + * The string is printed literally, with no format processing. + */ +#define DPUTS(arg) DPRINTF(("%s", arg)) + /** Debuging output value of a cursor DBI: Negative in a sub-cursor. */ +#define DDBI(mc) \ + (((mc)->mc_flags & C_SUB) ? -(int)(mc)->mc_dbi : (int)(mc)->mc_dbi) +/** @} */ + + /** @brief The maximum size of a database page. + * + * It is 32k or 64k, since value-PAGEBASE must fit in + * #MDB_page.%mp_upper. + * + * LMDB will use database pages < OS pages if needed. + * That causes more I/O in write transactions: The OS must + * know (read) the whole page before writing a partial page. + * + * Note that we don't currently support Huge pages. On Linux, + * regular data files cannot use Huge pages, and in general + * Huge pages aren't actually pageable. We rely on the OS + * demand-pager to read our data and page it out when memory + * pressure from other processes is high. So until OSs have + * actual paging support for Huge pages, they're not viable. + */ +#define MAX_PAGESIZE (PAGEBASE ? 0x10000 : 0x8000) + + /** The minimum number of keys required in a database page. + * Setting this to a larger value will place a smaller bound on the + * maximum size of a data item. Data items larger than this size will + * be pushed into overflow pages instead of being stored directly in + * the B-tree node. This value used to default to 4. With a page size + * of 4096 bytes that meant that any item larger than 1024 bytes would + * go into an overflow page. That also meant that on average 2-3KB of + * each overflow page was wasted space. The value cannot be lower than + * 2 because then there would no longer be a tree structure. With this + * value, items larger than 2KB will go into overflow pages, and on + * average only 1KB will be wasted. + */ +#define MDB_MINKEYS 2 + + /** A stamp that identifies a file as an LMDB file. + * There's nothing special about this value other than that it is easily + * recognizable, and it will reflect any byte order mismatches. + */ +#define MDB_MAGIC 0xBEEFC0DE + + /** The version number for a database's datafile format. */ +#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1) + /** The version number for a database's lockfile format. */ +#define MDB_LOCK_VERSION ((MDB_DEVEL) ? 999 : 1) + + /** @brief The max size of a key we can write, or 0 for dynamic max. + * + * Define this as 0 to compute the max from the page size. 511 + * is default for backwards compat: liblmdb <= 0.9.10 can break + * when modifying a DB with keys/dupsort data bigger than its max. + * #MDB_DEVEL sets the default to 0. + * + * Data items in an #MDB_DUPSORT database are also limited to + * this size, since they're actually keys of a sub-DB. Keys and + * #MDB_DUPSORT data items must fit on a node in a regular page. + */ +#ifndef MDB_MAXKEYSIZE +#define MDB_MAXKEYSIZE ((MDB_DEVEL) ? 0 : 511) +#endif + + /** The maximum size of a key we can write to the environment. */ +#if MDB_MAXKEYSIZE +#define ENV_MAXKEY(env) (MDB_MAXKEYSIZE) +#else +#define ENV_MAXKEY(env) ((env)->me_maxkey) +#endif + + /** @brief The maximum size of a data item. + * + * We only store a 32 bit value for node sizes. + */ +#define MAXDATASIZE 0xffffffffUL + +#if MDB_DEBUG + /** Key size which fits in a #DKBUF. + * @ingroup debug + */ +#define DKBUF_MAXKEYSIZE ((MDB_MAXKEYSIZE) > 0 ? (MDB_MAXKEYSIZE) : 511) + /** A key buffer. + * @ingroup debug + * This is used for printing a hex dump of a key's contents. + */ +#define DKBUF char kbuf[DKBUF_MAXKEYSIZE*2+1] + /** Display a key in hex. + * @ingroup debug + * Invoke a function to display a key in hex. + */ +#define DKEY(x) mdb_dkey(x, kbuf) +#else +#define DKBUF +#define DKEY(x) 0 +#endif + + /** An invalid page number. + * Mainly used to denote an empty tree. + */ +#define P_INVALID (~(pgno_t)0) + + /** Test if the flags \b f are set in a flag word \b w. */ +#define F_ISSET(w, f) (((w) & (f)) == (f)) + + /** Round \b n up to an even number. */ +#define EVEN(n) (((n) + 1U) & -2) /* sign-extending -2 to match n+1U */ + + /** Used for offsets within a single page. + * Since memory pages are typically 4 or 8KB in size, 12-13 bits, + * this is plenty. + */ +typedef uint16_t indx_t; + + /** Default size of memory map. + * This is certainly too small for any actual applications. Apps should always set + * the size explicitly using #mdb_env_set_mapsize(). + */ +#define DEFAULT_MAPSIZE 1048576 + +/** @defgroup readers Reader Lock Table + * Readers don't acquire any locks for their data access. Instead, they + * simply record their transaction ID in the reader table. The reader + * mutex is needed just to find an empty slot in the reader table. The + * slot's address is saved in thread-specific data so that subsequent read + * transactions started by the same thread need no further locking to proceed. + * + * If #MDB_NOTLS is set, the slot address is not saved in thread-specific data. + * + * No reader table is used if the database is on a read-only filesystem, or + * if #MDB_NOLOCK is set. + * + * Since the database uses multi-version concurrency control, readers don't + * actually need any locking. This table is used to keep track of which + * readers are using data from which old transactions, so that we'll know + * when a particular old transaction is no longer in use. Old transactions + * that have discarded any data pages can then have those pages reclaimed + * for use by a later write transaction. + * + * The lock table is constructed such that reader slots are aligned with the + * processor's cache line size. Any slot is only ever used by one thread. + * This alignment guarantees that there will be no contention or cache + * thrashing as threads update their own slot info, and also eliminates + * any need for locking when accessing a slot. + * + * A writer thread will scan every slot in the table to determine the oldest + * outstanding reader transaction. Any freed pages older than this will be + * reclaimed by the writer. The writer doesn't use any locks when scanning + * this table. This means that there's no guarantee that the writer will + * see the most up-to-date reader info, but that's not required for correct + * operation - all we need is to know the upper bound on the oldest reader, + * we don't care at all about the newest reader. So the only consequence of + * reading stale information here is that old pages might hang around a + * while longer before being reclaimed. That's actually good anyway, because + * the longer we delay reclaiming old pages, the more likely it is that a + * string of contiguous pages can be found after coalescing old pages from + * many old transactions together. + * @{ + */ + /** Number of slots in the reader table. + * This value was chosen somewhat arbitrarily. 126 readers plus a + * couple mutexes fit exactly into 8KB on my development machine. + * Applications should set the table size using #mdb_env_set_maxreaders(). + */ +#define DEFAULT_READERS 126 + + /** The size of a CPU cache line in bytes. We want our lock structures + * aligned to this size to avoid false cache line sharing in the + * lock table. + * This value works for most CPUs. For Itanium this should be 128. + */ +#ifndef CACHELINE +#define CACHELINE 64 +#endif + + /** The information we store in a single slot of the reader table. + * In addition to a transaction ID, we also record the process and + * thread ID that owns a slot, so that we can detect stale information, + * e.g. threads or processes that went away without cleaning up. + * @note We currently don't check for stale records. We simply re-init + * the table when we know that we're the only process opening the + * lock file. + */ +typedef struct MDB_rxbody { + /** Current Transaction ID when this transaction began, or (txnid_t)-1. + * Multiple readers that start at the same time will probably have the + * same ID here. Again, it's not important to exclude them from + * anything; all we need to know is which version of the DB they + * started from so we can avoid overwriting any data used in that + * particular version. + */ + volatile txnid_t mrb_txnid; + /** The process ID of the process owning this reader txn. */ + volatile MDB_PID_T mrb_pid; + /** The thread ID of the thread owning this txn. */ + volatile MDB_THR_T mrb_tid; +} MDB_rxbody; + + /** The actual reader record, with cacheline padding. */ +typedef struct MDB_reader { + union { + MDB_rxbody mrx; + /** shorthand for mrb_txnid */ +#define mr_txnid mru.mrx.mrb_txnid +#define mr_pid mru.mrx.mrb_pid +#define mr_tid mru.mrx.mrb_tid + /** cache line alignment */ + char pad[(sizeof(MDB_rxbody)+CACHELINE-1) & ~(CACHELINE-1)]; + } mru; +} MDB_reader; + + /** The header for the reader table. + * The table resides in a memory-mapped file. (This is a different file + * than is used for the main database.) + * + * For POSIX the actual mutexes reside in the shared memory of this + * mapped file. On Windows, mutexes are named objects allocated by the + * kernel; we store the mutex names in this mapped file so that other + * processes can grab them. This same approach is also used on + * MacOSX/Darwin (using named semaphores) since MacOSX doesn't support + * process-shared POSIX mutexes. For these cases where a named object + * is used, the object name is derived from a 64 bit FNV hash of the + * environment pathname. As such, naming collisions are extremely + * unlikely. If a collision occurs, the results are unpredictable. + */ +typedef struct MDB_txbody { + /** Stamp identifying this as an LMDB file. It must be set + * to #MDB_MAGIC. */ + uint32_t mtb_magic; + /** Format of this lock file. Must be set to #MDB_LOCK_FORMAT. */ + uint32_t mtb_format; +#if defined(_WIN32) + char mtb_rmname[MNAME_LEN]; +#elif defined(MDB_USE_SYSV_SEM) + int mtb_semid; + int mtb_rlocked; +#else + /** Mutex protecting access to this table. + * This is the #MDB_MUTEX(env,r) reader table lock. + */ + pthread_mutex_t mtb_rmutex; +#endif + /** The ID of the last transaction committed to the database. + * This is recorded here only for convenience; the value can always + * be determined by reading the main database meta pages. + */ + volatile txnid_t mtb_txnid; + /** The number of slots that have been used in the reader table. + * This always records the maximum count, it is not decremented + * when readers release their slots. + */ + volatile unsigned mtb_numreaders; +} MDB_txbody; + + /** The actual reader table definition. */ +typedef struct MDB_txninfo { + union { + MDB_txbody mtb; +#define mti_magic mt1.mtb.mtb_magic +#define mti_format mt1.mtb.mtb_format +#define mti_rmutex mt1.mtb.mtb_rmutex +#define mti_rmname mt1.mtb.mtb_rmname +#define mti_txnid mt1.mtb.mtb_txnid +#define mti_numreaders mt1.mtb.mtb_numreaders +#ifdef MDB_USE_SYSV_SEM +#define mti_semid mt1.mtb.mtb_semid +#define mti_rlocked mt1.mtb.mtb_rlocked +#endif + char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)]; + } mt1; + union { +#if defined(_WIN32) + char mt2_wmname[MNAME_LEN]; +#define mti_wmname mt2.mt2_wmname +#elif defined MDB_USE_SYSV_SEM + int mt2_wlocked; +#define mti_wlocked mt2.mt2_wlocked +#else + pthread_mutex_t mt2_wmutex; +#define mti_wmutex mt2.mt2_wmutex +#endif + char pad[(MNAME_LEN+CACHELINE-1) & ~(CACHELINE-1)]; + } mt2; + MDB_reader mti_readers[1]; +} MDB_txninfo; + + /** Lockfile format signature: version, features and field layout */ +#define MDB_LOCK_FORMAT \ + ((uint32_t) \ + ((MDB_LOCK_VERSION) \ + /* Flags which describe functionality */ \ + + (SYSV_SEM_FLAG << 18) \ + + (((MDB_PIDLOCK) != 0) << 16))) +/** @} */ + +/** Common header for all page types. + * Overflow records occupy a number of contiguous pages with no + * headers on any page after the first. + */ +typedef struct MDB_page { +#define mp_pgno mp_p.p_pgno +#define mp_next mp_p.p_next + union { + pgno_t p_pgno; /**< page number */ + struct MDB_page *p_next; /**< for in-memory list of freed pages */ + } mp_p; + uint16_t mp_pad; +/** @defgroup mdb_page Page Flags + * @ingroup internal + * Flags for the page headers. + * @{ + */ +#define P_BRANCH 0x01 /**< branch page */ +#define P_LEAF 0x02 /**< leaf page */ +#define P_OVERFLOW 0x04 /**< overflow page */ +#define P_META 0x08 /**< meta page */ +#define P_DIRTY 0x10 /**< dirty page, also set for #P_SUBP pages */ +#define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */ +#define P_SUBP 0x40 /**< for #MDB_DUPSORT sub-pages */ +#define P_LOOSE 0x4000 /**< page was dirtied then freed, can be reused */ +#define P_KEEP 0x8000 /**< leave this page alone during spill */ +/** @} */ + uint16_t mp_flags; /**< @ref mdb_page */ +#define mp_lower mp_pb.pb.pb_lower +#define mp_upper mp_pb.pb.pb_upper +#define mp_pages mp_pb.pb_pages + union { + struct { + indx_t pb_lower; /**< lower bound of free space */ + indx_t pb_upper; /**< upper bound of free space */ + } pb; + uint32_t pb_pages; /**< number of overflow pages */ + } mp_pb; + indx_t mp_ptrs[1]; /**< dynamic size */ +} MDB_page; + + /** Size of the page header, excluding dynamic data at the end */ +#define PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs)) + + /** Address of first usable data byte in a page, after the header */ +#define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ)) + + /** ITS#7713, change PAGEBASE to handle 65536 byte pages */ +#define PAGEBASE ((MDB_DEVEL) ? PAGEHDRSZ : 0) + + /** Number of nodes on a page */ +#define NUMKEYS(p) (((p)->mp_lower - (PAGEHDRSZ-PAGEBASE)) >> 1) + + /** The amount of space remaining in the page */ +#define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower) + + /** The percentage of space used in the page, in tenths of a percent. */ +#define PAGEFILL(env, p) (1000L * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \ + ((env)->me_psize - PAGEHDRSZ)) + /** The minimum page fill factor, in tenths of a percent. + * Pages emptier than this are candidates for merging. + */ +#define FILL_THRESHOLD 250 + + /** Test if a page is a leaf page */ +#define IS_LEAF(p) F_ISSET((p)->mp_flags, P_LEAF) + /** Test if a page is a LEAF2 page */ +#define IS_LEAF2(p) F_ISSET((p)->mp_flags, P_LEAF2) + /** Test if a page is a branch page */ +#define IS_BRANCH(p) F_ISSET((p)->mp_flags, P_BRANCH) + /** Test if a page is an overflow page */ +#define IS_OVERFLOW(p) F_ISSET((p)->mp_flags, P_OVERFLOW) + /** Test if a page is a sub page */ +#define IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP) + + /** The number of overflow pages needed to store the given size. */ +#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) + + /** Link in #MDB_txn.%mt_loose_pgs list */ +#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2)) + + /** Header for a single key/data pair within a page. + * Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2. + * We guarantee 2-byte alignment for 'MDB_node's. + */ +typedef struct MDB_node { + /** lo and hi are used for data size on leaf nodes and for + * child pgno on branch nodes. On 64 bit platforms, flags + * is also used for pgno. (Branch nodes have no flags). + * They are in host byte order in case that lets some + * accesses be optimized into a 32-bit word access. + */ +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned short mn_lo, mn_hi; /**< part of data size or pgno */ +#else + unsigned short mn_hi, mn_lo; +#endif +/** @defgroup mdb_node Node Flags + * @ingroup internal + * Flags for node headers. + * @{ + */ +#define F_BIGDATA 0x01 /**< data put on overflow page */ +#define F_SUBDATA 0x02 /**< data is a sub-database */ +#define F_DUPDATA 0x04 /**< data has duplicates */ + +/** valid flags for #mdb_node_add() */ +#define NODE_ADD_FLAGS (F_DUPDATA|F_SUBDATA|MDB_RESERVE|MDB_APPEND) + +/** @} */ + unsigned short mn_flags; /**< @ref mdb_node */ + unsigned short mn_ksize; /**< key size */ + char mn_data[1]; /**< key and data are appended here */ +} MDB_node; + + /** Size of the node header, excluding dynamic data at the end */ +#define NODESIZE offsetof(MDB_node, mn_data) + + /** Bit position of top word in page number, for shifting mn_flags */ +#define PGNO_TOPWORD ((pgno_t)-1 > 0xffffffffu ? 32 : 0) + + /** Size of a node in a branch page with a given key. + * This is just the node header plus the key, there is no data. + */ +#define INDXSIZE(k) (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size)) + + /** Size of a node in a leaf page with a given key and data. + * This is node header plus key plus data size. + */ +#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size) + + /** Address of node \b i in page \b p */ +#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] + PAGEBASE)) + + /** Address of the key for the node */ +#define NODEKEY(node) (void *)((node)->mn_data) + + /** Address of the data for a node */ +#define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize) + + /** Get the page number pointed to by a branch node */ +#define NODEPGNO(node) \ + ((node)->mn_lo | ((pgno_t) (node)->mn_hi << 16) | \ + (PGNO_TOPWORD ? ((pgno_t) (node)->mn_flags << PGNO_TOPWORD) : 0)) + /** Set the page number in a branch node */ +#define SETPGNO(node,pgno) do { \ + (node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16; \ + if (PGNO_TOPWORD) (node)->mn_flags = (pgno) >> PGNO_TOPWORD; } while(0) + + /** Get the size of the data in a leaf node */ +#define NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16)) + /** Set the size of the data for a leaf node */ +#define SETDSZ(node,size) do { \ + (node)->mn_lo = (size) & 0xffff; (node)->mn_hi = (size) >> 16;} while(0) + /** The size of a key in a node */ +#define NODEKSZ(node) ((node)->mn_ksize) + + /** Copy a page number from src to dst */ +#ifdef MISALIGNED_OK +#define COPY_PGNO(dst,src) dst = src +#else +#if SIZE_MAX > 4294967295UL +#define COPY_PGNO(dst,src) do { \ + unsigned short *s, *d; \ + s = (unsigned short *)&(src); \ + d = (unsigned short *)&(dst); \ + *d++ = *s++; \ + *d++ = *s++; \ + *d++ = *s++; \ + *d = *s; \ +} while (0) +#else +#define COPY_PGNO(dst,src) do { \ + unsigned short *s, *d; \ + s = (unsigned short *)&(src); \ + d = (unsigned short *)&(dst); \ + *d++ = *s++; \ + *d = *s; \ +} while (0) +#endif +#endif + /** The address of a key in a LEAF2 page. + * LEAF2 pages are used for #MDB_DUPFIXED sorted-duplicate sub-DBs. + * There are no node headers, keys are stored contiguously. + */ +#define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks))) + + /** Set the \b node's key into \b keyptr, if requested. */ +#define MDB_GET_KEY(node, keyptr) { if ((keyptr) != NULL) { \ + (keyptr)->mv_size = NODEKSZ(node); (keyptr)->mv_data = NODEKEY(node); } } + + /** Set the \b node's key into \b key. */ +#define MDB_GET_KEY2(node, key) { key.mv_size = NODEKSZ(node); key.mv_data = NODEKEY(node); } + + /** Information about a single database in the environment. */ +typedef struct MDB_db { + uint32_t md_pad; /**< also ksize for LEAF2 pages */ + uint16_t md_flags; /**< @ref mdb_dbi_open */ + uint16_t md_depth; /**< depth of this tree */ + pgno_t md_branch_pages; /**< number of internal pages */ + pgno_t md_leaf_pages; /**< number of leaf pages */ + pgno_t md_overflow_pages; /**< number of overflow pages */ + size_t md_entries; /**< number of data items */ + pgno_t md_root; /**< the root page of this tree */ +} MDB_db; + + /** mdb_dbi_open flags */ +#define MDB_VALID 0x8000 /**< DB handle is valid, for me_dbflags */ +#define PERSISTENT_FLAGS (0xffff & ~(MDB_VALID)) +#define VALID_FLAGS (MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|\ + MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE) + + /** Handle for the DB used to track free pages. */ +#define FREE_DBI 0 + /** Handle for the default DB. */ +#define MAIN_DBI 1 + + /** Meta page content. + * A meta page is the start point for accessing a database snapshot. + * Pages 0-1 are meta pages. Transaction N writes meta page #(N % 2). + */ +typedef struct MDB_meta { + /** Stamp identifying this as an LMDB file. It must be set + * to #MDB_MAGIC. */ + uint32_t mm_magic; + /** Version number of this file. Must be set to #MDB_DATA_VERSION. */ + uint32_t mm_version; + void *mm_address; /**< address for fixed mapping */ + size_t mm_mapsize; /**< size of mmap region */ + MDB_db mm_dbs[2]; /**< first is free space, 2nd is main db */ + /** The size of pages used in this DB */ +#define mm_psize mm_dbs[0].md_pad + /** Any persistent environment flags. @ref mdb_env */ +#define mm_flags mm_dbs[0].md_flags + pgno_t mm_last_pg; /**< last used page in file */ + volatile txnid_t mm_txnid; /**< txnid that committed this page */ +} MDB_meta; + + /** Buffer for a stack-allocated meta page. + * The members define size and alignment, and silence type + * aliasing warnings. They are not used directly; that could + * mean incorrectly using several union members in parallel. + */ +typedef union MDB_metabuf { + MDB_page mb_page; + struct { + char mm_pad[PAGEHDRSZ]; + MDB_meta mm_meta; + } mb_metabuf; +} MDB_metabuf; + + /** Auxiliary DB info. + * The information here is mostly static/read-only. There is + * only a single copy of this record in the environment. + */ +typedef struct MDB_dbx { + MDB_val md_name; /**< name of the database */ + MDB_cmp_func *md_cmp; /**< function for comparing keys */ + MDB_cmp_func *md_dcmp; /**< function for comparing data items */ + MDB_rel_func *md_rel; /**< user relocate function */ + void *md_relctx; /**< user-provided context for md_rel */ +} MDB_dbx; + + /** A database transaction. + * Every operation requires a transaction handle. + */ +struct MDB_txn { + MDB_txn *mt_parent; /**< parent of a nested txn */ + MDB_txn *mt_child; /**< nested txn under this txn */ + pgno_t mt_next_pgno; /**< next unallocated page */ + /** The ID of this transaction. IDs are integers incrementing from 1. + * Only committed write transactions increment the ID. If a transaction + * aborts, the ID may be re-used by the next writer. + */ + txnid_t mt_txnid; + MDB_env *mt_env; /**< the DB environment */ + /** The list of pages that became unused during this transaction. + */ + MDB_IDL mt_free_pgs; + /** The list of loose pages that became unused and may be reused + * in this transaction, linked through #NEXT_LOOSE_PAGE(page). + */ + MDB_page *mt_loose_pgs; + /* #Number of loose pages (#mt_loose_pgs) */ + int mt_loose_count; + /** The sorted list of dirty pages we temporarily wrote to disk + * because the dirty list was full. page numbers in here are + * shifted left by 1, deleted slots have the LSB set. + */ + MDB_IDL mt_spill_pgs; + union { + /** For write txns: Modified pages. Sorted when not MDB_WRITEMAP. */ + MDB_ID2L dirty_list; + /** For read txns: This thread/txn's reader table slot, or NULL. */ + MDB_reader *reader; + } mt_u; + /** Array of records for each DB known in the environment. */ + MDB_dbx *mt_dbxs; + /** Array of MDB_db records for each known DB */ + MDB_db *mt_dbs; + /** Array of sequence numbers for each DB handle */ + unsigned int *mt_dbiseqs; +/** @defgroup mt_dbflag Transaction DB Flags + * @ingroup internal + * @{ + */ +#define DB_DIRTY 0x01 /**< DB was modified or is DUPSORT data */ +#define DB_STALE 0x02 /**< Named-DB record is older than txnID */ +#define DB_NEW 0x04 /**< Named-DB handle opened in this txn */ +#define DB_VALID 0x08 /**< DB handle is valid, see also #MDB_VALID */ +/** @} */ + /** In write txns, array of cursors for each DB */ + MDB_cursor **mt_cursors; + /** Array of flags for each DB */ + unsigned char *mt_dbflags; + /** Number of DB records in use. This number only ever increments; + * we don't decrement it when individual DB handles are closed. + */ + MDB_dbi mt_numdbs; + +/** @defgroup mdb_txn Transaction Flags + * @ingroup internal + * @{ + */ +#define MDB_TXN_RDONLY 0x01 /**< read-only transaction */ +#define MDB_TXN_ERROR 0x02 /**< txn is unusable after an error */ +#define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */ +#define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */ +/** @} */ + unsigned int mt_flags; /**< @ref mdb_txn */ + /** #dirty_list room: Array size - \#dirty pages visible to this txn. + * Includes ancestor txns' dirty pages not hidden by other txns' + * dirty/spilled pages. Thus commit(nested txn) has room to merge + * dirty_list into mt_parent after freeing hidden mt_parent pages. + */ + unsigned int mt_dirty_room; +}; + +/** Enough space for 2^32 nodes with minimum of 2 keys per node. I.e., plenty. + * At 4 keys per node, enough for 2^64 nodes, so there's probably no need to + * raise this on a 64 bit machine. + */ +#define CURSOR_STACK 32 + +struct MDB_xcursor; + + /** Cursors are used for all DB operations. + * A cursor holds a path of (page pointer, key index) from the DB + * root to a position in the DB, plus other state. #MDB_DUPSORT + * cursors include an xcursor to the current data item. Write txns + * track their cursors and keep them up to date when data moves. + * Exception: An xcursor's pointer to a #P_SUBP page can be stale. + * (A node with #F_DUPDATA but no #F_SUBDATA contains a subpage). + */ +struct MDB_cursor { + /** Next cursor on this DB in this txn */ + MDB_cursor *mc_next; + /** Backup of the original cursor if this cursor is a shadow */ + MDB_cursor *mc_backup; + /** Context used for databases with #MDB_DUPSORT, otherwise NULL */ + struct MDB_xcursor *mc_xcursor; + /** The transaction that owns this cursor */ + MDB_txn *mc_txn; + /** The database handle this cursor operates on */ + MDB_dbi mc_dbi; + /** The database record for this cursor */ + MDB_db *mc_db; + /** The database auxiliary record for this cursor */ + MDB_dbx *mc_dbx; + /** The @ref mt_dbflag for this database */ + unsigned char *mc_dbflag; + unsigned short mc_snum; /**< number of pushed pages */ + unsigned short mc_top; /**< index of top page, normally mc_snum-1 */ +/** @defgroup mdb_cursor Cursor Flags + * @ingroup internal + * Cursor state flags. + * @{ + */ +#define C_INITIALIZED 0x01 /**< cursor has been initialized and is valid */ +#define C_EOF 0x02 /**< No more data */ +#define C_SUB 0x04 /**< Cursor is a sub-cursor */ +#define C_DEL 0x08 /**< last op was a cursor_del */ +#define C_SPLITTING 0x20 /**< Cursor is in page_split */ +#define C_UNTRACK 0x40 /**< Un-track cursor when closing */ +/** @} */ + unsigned int mc_flags; /**< @ref mdb_cursor */ + MDB_page *mc_pg[CURSOR_STACK]; /**< stack of pushed pages */ + indx_t mc_ki[CURSOR_STACK]; /**< stack of page indices */ +}; + + /** Context for sorted-dup records. + * We could have gone to a fully recursive design, with arbitrarily + * deep nesting of sub-databases. But for now we only handle these + * levels - main DB, optional sub-DB, sorted-duplicate DB. + */ +typedef struct MDB_xcursor { + /** A sub-cursor for traversing the Dup DB */ + MDB_cursor mx_cursor; + /** The database record for this Dup DB */ + MDB_db mx_db; + /** The auxiliary DB record for this Dup DB */ + MDB_dbx mx_dbx; + /** The @ref mt_dbflag for this Dup DB */ + unsigned char mx_dbflag; +} MDB_xcursor; + + /** State of FreeDB old pages, stored in the MDB_env */ +typedef struct MDB_pgstate { + pgno_t *mf_pghead; /**< Reclaimed freeDB pages, or NULL before use */ + txnid_t mf_pglast; /**< ID of last used record, or 0 if !mf_pghead */ +} MDB_pgstate; + + /** The database environment. */ +struct MDB_env { + HANDLE me_fd; /**< The main data file */ + HANDLE me_lfd; /**< The lock file */ + HANDLE me_mfd; /**< just for writing the meta pages */ + /** Failed to update the meta page. Probably an I/O error. */ +#define MDB_FATAL_ERROR 0x80000000U + /** Some fields are initialized. */ +#define MDB_ENV_ACTIVE 0x20000000U + /** me_txkey is set */ +#define MDB_ENV_TXKEY 0x10000000U + /** fdatasync is unreliable */ +#define MDB_FSYNCONLY 0x08000000U + uint32_t me_flags; /**< @ref mdb_env */ + unsigned int me_psize; /**< DB page size, inited from me_os_psize */ + unsigned int me_os_psize; /**< OS page size, from #GET_PAGESIZE */ + unsigned int me_maxreaders; /**< size of the reader table */ + unsigned int me_numreaders; /**< max numreaders set by this env */ + MDB_dbi me_numdbs; /**< number of DBs opened */ + MDB_dbi me_maxdbs; /**< size of the DB table */ + MDB_PID_T me_pid; /**< process ID of this env */ + char *me_path; /**< path to the DB files */ + char *me_map; /**< the memory map of the data file */ + MDB_txninfo *me_txns; /**< the memory map of the lock file or NULL */ + MDB_meta *me_metas[2]; /**< pointers to the two meta pages */ + void *me_pbuf; /**< scratch area for DUPSORT put() */ + MDB_txn *me_txn; /**< current write transaction */ + MDB_txn *me_txn0; /**< prealloc'd write transaction */ + size_t me_mapsize; /**< size of the data memory map */ + off_t me_size; /**< current file size */ + pgno_t me_maxpg; /**< me_mapsize / me_psize */ + MDB_dbx *me_dbxs; /**< array of static DB info */ + uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */ + unsigned int *me_dbiseqs; /**< array of dbi sequence numbers */ + pthread_key_t me_txkey; /**< thread-key for readers */ + txnid_t me_pgoldest; /**< ID of oldest reader last time we looked */ + MDB_pgstate me_pgstate; /**< state of old pages from freeDB */ +# define me_pglast me_pgstate.mf_pglast +# define me_pghead me_pgstate.mf_pghead + MDB_page *me_dpages; /**< list of malloc'd blocks for re-use */ + /** IDL of pages that became unused in a write txn */ + MDB_IDL me_free_pgs; + /** ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE. */ + MDB_ID2L me_dirty_list; + /** Max number of freelist items that can fit in a single overflow page */ + int me_maxfree_1pg; + /** Max size of a node on a page */ + unsigned int me_nodemax; +#if !(MDB_MAXKEYSIZE) + unsigned int me_maxkey; /**< max size of a key */ +#endif + int me_live_reader; /**< have liveness lock in reader table */ +#ifdef _WIN32 + int me_pidquery; /**< Used in OpenProcess */ +#endif +#if defined(_WIN32) || defined(MDB_USE_SYSV_SEM) + /* Windows mutexes/SysV semaphores do not reside in shared mem */ + mdb_mutex_t me_rmutex; + mdb_mutex_t me_wmutex; +#endif + void *me_userctx; /**< User-settable context */ + MDB_assert_func *me_assert_func; /**< Callback for assertion failures */ +}; + + /** Nested transaction */ +typedef struct MDB_ntxn { + MDB_txn mnt_txn; /**< the transaction */ + MDB_pgstate mnt_pgstate; /**< parent transaction's saved freestate */ +} MDB_ntxn; + + /** max number of pages to commit in one writev() call */ +#define MDB_COMMIT_PAGES 64 +#if defined(IOV_MAX) && IOV_MAX < MDB_COMMIT_PAGES +#undef MDB_COMMIT_PAGES +#define MDB_COMMIT_PAGES IOV_MAX +#endif + + /** max bytes to write in one call */ +#define MAX_WRITE (0x80000000U >> (sizeof(ssize_t) == 4)) + + /** Check \b txn and \b dbi arguments to a function */ +#define TXN_DBI_EXIST(txn, dbi) \ + ((txn) && (dbi) < (txn)->mt_numdbs && ((txn)->mt_dbflags[dbi] & DB_VALID)) + + /** Check for misused \b dbi handles */ +#define TXN_DBI_CHANGED(txn, dbi) \ + ((txn)->mt_dbiseqs[dbi] != (txn)->mt_env->me_dbiseqs[dbi]) + +static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp); +static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp); +static int mdb_page_touch(MDB_cursor *mc); + +static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl); +static int mdb_page_search_root(MDB_cursor *mc, + MDB_val *key, int modify); +#define MDB_PS_MODIFY 1 +#define MDB_PS_ROOTONLY 2 +#define MDB_PS_FIRST 4 +#define MDB_PS_LAST 8 +static int mdb_page_search(MDB_cursor *mc, + MDB_val *key, int flags); +static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst); + +#define MDB_SPLIT_REPLACE MDB_APPENDDUP /**< newkey is not new */ +static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, + pgno_t newpgno, unsigned int nflags); + +static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); +static int mdb_env_pick_meta(const MDB_env *env); +static int mdb_env_write_meta(MDB_txn *txn); +#if !(defined(_WIN32) || defined(MDB_USE_SYSV_SEM)) /* Drop unused excl arg */ +# define mdb_env_close0(env, excl) mdb_env_close1(env) +#endif +static void mdb_env_close0(MDB_env *env, int excl); + +static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp); +static int mdb_node_add(MDB_cursor *mc, indx_t indx, + MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags); +static void mdb_node_del(MDB_cursor *mc, int ksize); +static void mdb_node_shrink(MDB_page *mp, indx_t indx); +static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst); +static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data); +static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data); +static size_t mdb_branch_size(MDB_env *env, MDB_val *key); + +static int mdb_rebalance(MDB_cursor *mc); +static int mdb_update_key(MDB_cursor *mc, MDB_val *key); + +static void mdb_cursor_pop(MDB_cursor *mc); +static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp); + +static int mdb_cursor_del0(MDB_cursor *mc); +static int mdb_del0(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags); +static int mdb_cursor_sibling(MDB_cursor *mc, int move_right); +static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); +static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); +static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op, + int *exactp); +static int mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data); +static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data); + +static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx); +static void mdb_xcursor_init0(MDB_cursor *mc); +static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node); + +static int mdb_drop0(MDB_cursor *mc, int subs); +static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); +static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead); + +/** @cond */ +static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long; +/** @endcond */ + +#ifdef _WIN32 +static SECURITY_DESCRIPTOR mdb_null_sd; +static SECURITY_ATTRIBUTES mdb_all_sa; +static int mdb_sec_inited; +#endif + +/** Return the library version info. */ +char * +mdb_version(int *major, int *minor, int *patch) +{ + if (major) *major = MDB_VERSION_MAJOR; + if (minor) *minor = MDB_VERSION_MINOR; + if (patch) *patch = MDB_VERSION_PATCH; + return MDB_VERSION_STRING; +} + +/** Table of descriptions for LMDB @ref errors */ +static char *const mdb_errstr[] = { + "MDB_KEYEXIST: Key/data pair already exists", + "MDB_NOTFOUND: No matching key/data pair found", + "MDB_PAGE_NOTFOUND: Requested page not found", + "MDB_CORRUPTED: Located page was wrong type", + "MDB_PANIC: Update of meta page failed or environment had fatal error", + "MDB_VERSION_MISMATCH: Database environment version mismatch", + "MDB_INVALID: File is not an LMDB file", + "MDB_MAP_FULL: Environment mapsize limit reached", + "MDB_DBS_FULL: Environment maxdbs limit reached", + "MDB_READERS_FULL: Environment maxreaders limit reached", + "MDB_TLS_FULL: Thread-local storage keys full - too many environments open", + "MDB_TXN_FULL: Transaction has too many dirty pages - transaction too big", + "MDB_CURSOR_FULL: Internal error - cursor stack limit reached", + "MDB_PAGE_FULL: Internal error - page has no more space", + "MDB_MAP_RESIZED: Database contents grew beyond environment mapsize", + "MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed", + "MDB_BAD_RSLOT: Invalid reuse of reader locktable slot", + "MDB_BAD_TXN: Transaction cannot recover - it must be aborted", + "MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size", + "MDB_BAD_DBI: The specified DBI handle was closed/changed unexpectedly", +}; + +char * +mdb_strerror(int err) +{ +#ifdef _WIN32 + /** HACK: pad 4KB on stack over the buf. Return system msgs in buf. + * This works as long as no function between the call to mdb_strerror + * and the actual use of the message uses more than 4K of stack. + */ + char pad[4096]; + char buf[1024], *ptr = buf; +#endif + int i; + if (!err) + return ("Successful return: 0"); + + if (err >= MDB_KEYEXIST && err <= MDB_LAST_ERRCODE) { + i = err - MDB_KEYEXIST; + return mdb_errstr[i]; + } + +#ifdef _WIN32 + /* These are the C-runtime error codes we use. The comment indicates + * their numeric value, and the Win32 error they would correspond to + * if the error actually came from a Win32 API. A major mess, we should + * have used LMDB-specific error codes for everything. + */ + switch(err) { + case ENOENT: /* 2, FILE_NOT_FOUND */ + case EIO: /* 5, ACCESS_DENIED */ + case ENOMEM: /* 12, INVALID_ACCESS */ + case EACCES: /* 13, INVALID_DATA */ + case EBUSY: /* 16, CURRENT_DIRECTORY */ + case EINVAL: /* 22, BAD_COMMAND */ + case ENOSPC: /* 28, OUT_OF_PAPER */ + return strerror(err); + default: + ; + } + buf[0] = 0; + FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, err, 0, ptr, sizeof(buf), (va_list *)pad); + return ptr; +#else + return strerror(err); +#endif +} + +/** assert(3) variant in cursor context */ +#define mdb_cassert(mc, expr) mdb_assert0((mc)->mc_txn->mt_env, expr, #expr) +/** assert(3) variant in transaction context */ +#define mdb_tassert(mc, expr) mdb_assert0((txn)->mt_env, expr, #expr) +/** assert(3) variant in environment context */ +#define mdb_eassert(env, expr) mdb_assert0(env, expr, #expr) + +#ifndef NDEBUG +# define mdb_assert0(env, expr, expr_txt) ((expr) ? (void)0 : \ + mdb_assert_fail(env, expr_txt, mdb_func_, __FILE__, __LINE__)) + +static void +mdb_assert_fail(MDB_env *env, const char *expr_txt, + const char *func, const char *file, int line) +{ + char buf[400]; + sprintf(buf, "%.100s:%d: Assertion '%.200s' failed in %.40s()", + file, line, expr_txt, func); + if (env->me_assert_func) + env->me_assert_func(env, buf); + fprintf(stderr, "%s\n", buf); + abort(); +} +#else +# define mdb_assert0(env, expr, expr_txt) ((void) 0) +#endif /* NDEBUG */ + +#if MDB_DEBUG +/** Return the page number of \b mp which may be sub-page, for debug output */ +static pgno_t +mdb_dbg_pgno(MDB_page *mp) +{ + pgno_t ret; + COPY_PGNO(ret, mp->mp_pgno); + return ret; +} + +/** Display a key in hexadecimal and return the address of the result. + * @param[in] key the key to display + * @param[in] buf the buffer to write into. Should always be #DKBUF. + * @return The key in hexadecimal form. + */ +char * +mdb_dkey(MDB_val *key, char *buf) +{ + char *ptr = buf; + unsigned char *c = key->mv_data; + unsigned int i; + + if (!key) + return ""; + + if (key->mv_size > DKBUF_MAXKEYSIZE) + return "MDB_MAXKEYSIZE"; + /* may want to make this a dynamic check: if the key is mostly + * printable characters, print it as-is instead of converting to hex. + */ +#if 1 + buf[0] = '\0'; + for (i=0; i<key->mv_size; i++) + ptr += sprintf(ptr, "%02x", *c++); +#else + sprintf(buf, "%.*s", key->mv_size, key->mv_data); +#endif + return buf; +} + +static const char * +mdb_leafnode_type(MDB_node *n) +{ + static char *const tp[2][2] = {{"", ": DB"}, {": sub-page", ": sub-DB"}}; + return F_ISSET(n->mn_flags, F_BIGDATA) ? ": overflow page" : + tp[F_ISSET(n->mn_flags, F_DUPDATA)][F_ISSET(n->mn_flags, F_SUBDATA)]; +} + +/** Display all the keys in the page. */ +void +mdb_page_list(MDB_page *mp) +{ + pgno_t pgno = mdb_dbg_pgno(mp); + const char *type, *state = (mp->mp_flags & P_DIRTY) ? ", dirty" : ""; + MDB_node *node; + unsigned int i, nkeys, nsize, total = 0; + MDB_val key; + DKBUF; + + switch (mp->mp_flags & (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP)) { + case P_BRANCH: type = "Branch page"; break; + case P_LEAF: type = "Leaf page"; break; + case P_LEAF|P_SUBP: type = "Sub-page"; break; + case P_LEAF|P_LEAF2: type = "LEAF2 page"; break; + case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break; + case P_OVERFLOW: + fprintf(stderr, "Overflow page %"Z"u pages %u%s\n", + pgno, mp->mp_pages, state); + return; + case P_META: + fprintf(stderr, "Meta-page %"Z"u txnid %"Z"u\n", + pgno, ((MDB_meta *)METADATA(mp))->mm_txnid); + return; + default: + fprintf(stderr, "Bad page %"Z"u flags 0x%u\n", pgno, mp->mp_flags); + return; + } + + nkeys = NUMKEYS(mp); + fprintf(stderr, "%s %"Z"u numkeys %d%s\n", type, pgno, nkeys, state); + + for (i=0; i<nkeys; i++) { + if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */ + key.mv_size = nsize = mp->mp_pad; + key.mv_data = LEAF2KEY(mp, i, nsize); + total += nsize; + fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key)); + continue; + } + node = NODEPTR(mp, i); + key.mv_size = node->mn_ksize; + key.mv_data = node->mn_data; + nsize = NODESIZE + key.mv_size; + if (IS_BRANCH(mp)) { + fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node), + DKEY(&key)); + total += nsize; + } else { + if (F_ISSET(node->mn_flags, F_BIGDATA)) + nsize += sizeof(pgno_t); + else + nsize += NODEDSZ(node); + total += nsize; + nsize += sizeof(indx_t); + fprintf(stderr, "key %d: nsize %d, %s%s\n", + i, nsize, DKEY(&key), mdb_leafnode_type(node)); + } + total = EVEN(total); + } + fprintf(stderr, "Total: header %d + contents %d + unused %d\n", + IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + mp->mp_lower, total, SIZELEFT(mp)); +} + +void +mdb_cursor_chk(MDB_cursor *mc) +{ + unsigned int i; + MDB_node *node; + MDB_page *mp; + + if (!mc->mc_snum && !(mc->mc_flags & C_INITIALIZED)) return; + for (i=0; i<mc->mc_top; i++) { + mp = mc->mc_pg[i]; + node = NODEPTR(mp, mc->mc_ki[i]); + if (NODEPGNO(node) != mc->mc_pg[i+1]->mp_pgno) + printf("oops!\n"); + } + if (mc->mc_ki[i] >= NUMKEYS(mc->mc_pg[i])) + printf("ack!\n"); +} +#endif + +#if (MDB_DEBUG) > 2 +/** Count all the pages in each DB and in the freelist + * and make sure it matches the actual number of pages + * being used. + * All named DBs must be open for a correct count. + */ +static void mdb_audit(MDB_txn *txn) +{ + MDB_cursor mc; + MDB_val key, data; + MDB_ID freecount, count; + MDB_dbi i; + int rc; + + freecount = 0; + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) + freecount += *(MDB_ID *)data.mv_data; + mdb_tassert(txn, rc == MDB_NOTFOUND); + + count = 0; + for (i = 0; i<txn->mt_numdbs; i++) { + MDB_xcursor mx; + if (!(txn->mt_dbflags[i] & DB_VALID)) + continue; + mdb_cursor_init(&mc, txn, i, &mx); + if (txn->mt_dbs[i].md_root == P_INVALID) + continue; + count += txn->mt_dbs[i].md_branch_pages + + txn->mt_dbs[i].md_leaf_pages + + txn->mt_dbs[i].md_overflow_pages; + if (txn->mt_dbs[i].md_flags & MDB_DUPSORT) { + rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST); + for (; rc == MDB_SUCCESS; rc = mdb_cursor_sibling(&mc, 1)) { + unsigned j; + MDB_page *mp; + mp = mc.mc_pg[mc.mc_top]; + for (j=0; j<NUMKEYS(mp); j++) { + MDB_node *leaf = NODEPTR(mp, j); + if (leaf->mn_flags & F_SUBDATA) { + MDB_db db; + memcpy(&db, NODEDATA(leaf), sizeof(db)); + count += db.md_branch_pages + db.md_leaf_pages + + db.md_overflow_pages; + } + } + } + mdb_tassert(txn, rc == MDB_NOTFOUND); + } + } + if (freecount + count + 2 /* metapages */ != txn->mt_next_pgno) { + fprintf(stderr, "audit: %lu freecount: %lu count: %lu total: %lu next_pgno: %lu\n", + txn->mt_txnid, freecount, count+2, freecount+count+2, txn->mt_next_pgno); + } +} +#endif + +int +mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) +{ + return txn->mt_dbxs[dbi].md_cmp(a, b); +} + +int +mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) +{ + return txn->mt_dbxs[dbi].md_dcmp(a, b); +} + +/** Allocate memory for a page. + * Re-use old malloc'd pages first for singletons, otherwise just malloc. + */ +static MDB_page * +mdb_page_malloc(MDB_txn *txn, unsigned num) +{ + MDB_env *env = txn->mt_env; + MDB_page *ret = env->me_dpages; + size_t psize = env->me_psize, sz = psize, off; + /* For ! #MDB_NOMEMINIT, psize counts how much to init. + * For a single page alloc, we init everything after the page header. + * For multi-page, we init the final page; if the caller needed that + * many pages they will be filling in at least up to the last page. + */ + if (num == 1) { + if (ret) { + VGMEMP_ALLOC(env, ret, sz); + VGMEMP_DEFINED(ret, sizeof(ret->mp_next)); + env->me_dpages = ret->mp_next; + return ret; + } + psize -= off = PAGEHDRSZ; + } else { + sz *= num; + off = sz - psize; + } + if ((ret = malloc(sz)) != NULL) { + VGMEMP_ALLOC(env, ret, sz); + if (!(env->me_flags & MDB_NOMEMINIT)) { + memset((char *)ret + off, 0, psize); + ret->mp_pad = 0; + } + } else { + txn->mt_flags |= MDB_TXN_ERROR; + } + return ret; +} +/** Free a single page. + * Saves single pages to a list, for future reuse. + * (This is not used for multi-page overflow pages.) + */ +static void +mdb_page_free(MDB_env *env, MDB_page *mp) +{ + mp->mp_next = env->me_dpages; + VGMEMP_FREE(env, mp); + env->me_dpages = mp; +} + +/** Free a dirty page */ +static void +mdb_dpage_free(MDB_env *env, MDB_page *dp) +{ + if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) { + mdb_page_free(env, dp); + } else { + /* large pages just get freed directly */ + VGMEMP_FREE(env, dp); + free(dp); + } +} + +/** Return all dirty pages to dpage list */ +static void +mdb_dlist_free(MDB_txn *txn) +{ + MDB_env *env = txn->mt_env; + MDB_ID2L dl = txn->mt_u.dirty_list; + unsigned i, n = dl[0].mid; + + for (i = 1; i <= n; i++) { + mdb_dpage_free(env, dl[i].mptr); + } + dl[0].mid = 0; +} + +/** Loosen or free a single page. + * Saves single pages to a list for future reuse + * in this same txn. It has been pulled from the freeDB + * and already resides on the dirty list, but has been + * deleted. Use these pages first before pulling again + * from the freeDB. + * + * If the page wasn't dirtied in this txn, just add it + * to this txn's free list. + */ +static int +mdb_page_loose(MDB_cursor *mc, MDB_page *mp) +{ + int loose = 0; + pgno_t pgno = mp->mp_pgno; + MDB_txn *txn = mc->mc_txn; + + if ((mp->mp_flags & P_DIRTY) && mc->mc_dbi != FREE_DBI) { + if (txn->mt_parent) { + MDB_ID2 *dl = txn->mt_u.dirty_list; + /* If txn has a parent, make sure the page is in our + * dirty list. + */ + if (dl[0].mid) { + unsigned x = mdb_mid2l_search(dl, pgno); + if (x <= dl[0].mid && dl[x].mid == pgno) { + if (mp != dl[x].mptr) { /* bad cursor? */ + mc->mc_flags &= ~(C_INITIALIZED|C_EOF); + txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CORRUPTED; + } + /* ok, it's ours */ + loose = 1; + } + } + } else { + /* no parent txn, so it's just ours */ + loose = 1; + } + } + if (loose) { + DPRINTF(("loosen db %d page %"Z"u", DDBI(mc), + mp->mp_pgno)); + NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs; + txn->mt_loose_pgs = mp; + txn->mt_loose_count++; + mp->mp_flags |= P_LOOSE; + } else { + int rc = mdb_midl_append(&txn->mt_free_pgs, pgno); + if (rc) + return rc; + } + + return MDB_SUCCESS; +} + +/** Set or clear P_KEEP in dirty, non-overflow, non-sub pages watched by txn. + * @param[in] mc A cursor handle for the current operation. + * @param[in] pflags Flags of the pages to update: + * P_DIRTY to set P_KEEP, P_DIRTY|P_KEEP to clear it. + * @param[in] all No shortcuts. Needed except after a full #mdb_page_flush(). + * @return 0 on success, non-zero on failure. + */ +static int +mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) +{ + enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP }; + MDB_txn *txn = mc->mc_txn; + MDB_cursor *m3; + MDB_xcursor *mx; + MDB_page *dp, *mp; + MDB_node *leaf; + unsigned i, j; + int rc = MDB_SUCCESS, level; + + /* Mark pages seen by cursors */ + if (mc->mc_flags & C_UNTRACK) + mc = NULL; /* will find mc in mt_cursors */ + for (i = txn->mt_numdbs;; mc = txn->mt_cursors[--i]) { + for (; mc; mc=mc->mc_next) { + if (!(mc->mc_flags & C_INITIALIZED)) + continue; + for (m3 = mc;; m3 = &mx->mx_cursor) { + mp = NULL; + for (j=0; j<m3->mc_snum; j++) { + mp = m3->mc_pg[j]; + if ((mp->mp_flags & Mask) == pflags) + mp->mp_flags ^= P_KEEP; + } + mx = m3->mc_xcursor; + /* Proceed to mx if it is at a sub-database */ + if (! (mx && (mx->mx_cursor.mc_flags & C_INITIALIZED))) + break; + if (! (mp && (mp->mp_flags & P_LEAF))) + break; + leaf = NODEPTR(mp, m3->mc_ki[j-1]); + if (!(leaf->mn_flags & F_SUBDATA)) + break; + } + } + if (i == 0) + break; + } + + if (all) { + /* Mark dirty root pages */ + for (i=0; i<txn->mt_numdbs; i++) { + if (txn->mt_dbflags[i] & DB_DIRTY) { + pgno_t pgno = txn->mt_dbs[i].md_root; + if (pgno == P_INVALID) + continue; + if ((rc = mdb_page_get(txn, pgno, &dp, &level)) != MDB_SUCCESS) + break; + if ((dp->mp_flags & Mask) == pflags && level <= 1) + dp->mp_flags ^= P_KEEP; + } + } + } + + return rc; +} + +static int mdb_page_flush(MDB_txn *txn, int keep); + +/** Spill pages from the dirty list back to disk. + * This is intended to prevent running into #MDB_TXN_FULL situations, + * but note that they may still occur in a few cases: + * 1) our estimate of the txn size could be too small. Currently this + * seems unlikely, except with a large number of #MDB_MULTIPLE items. + * 2) child txns may run out of space if their parents dirtied a + * lot of pages and never spilled them. TODO: we probably should do + * a preemptive spill during #mdb_txn_begin() of a child txn, if + * the parent's dirty_room is below a given threshold. + * + * Otherwise, if not using nested txns, it is expected that apps will + * not run into #MDB_TXN_FULL any more. The pages are flushed to disk + * the same way as for a txn commit, e.g. their P_DIRTY flag is cleared. + * If the txn never references them again, they can be left alone. + * If the txn only reads them, they can be used without any fuss. + * If the txn writes them again, they can be dirtied immediately without + * going thru all of the work of #mdb_page_touch(). Such references are + * handled by #mdb_page_unspill(). + * + * Also note, we never spill DB root pages, nor pages of active cursors, + * because we'll need these back again soon anyway. And in nested txns, + * we can't spill a page in a child txn if it was already spilled in a + * parent txn. That would alter the parent txns' data even though + * the child hasn't committed yet, and we'd have no way to undo it if + * the child aborted. + * + * @param[in] m0 cursor A cursor handle identifying the transaction and + * database for which we are checking space. + * @param[in] key For a put operation, the key being stored. + * @param[in] data For a put operation, the data being stored. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data) +{ + MDB_txn *txn = m0->mc_txn; + MDB_page *dp; + MDB_ID2L dl = txn->mt_u.dirty_list; + unsigned int i, j, need; + int rc; + + if (m0->mc_flags & C_SUB) + return MDB_SUCCESS; + + /* Estimate how much space this op will take */ + i = m0->mc_db->md_depth; + /* Named DBs also dirty the main DB */ + if (m0->mc_dbi > MAIN_DBI) + i += txn->mt_dbs[MAIN_DBI].md_depth; + /* For puts, roughly factor in the key+data size */ + if (key) + i += (LEAFSIZE(key, data) + txn->mt_env->me_psize) / txn->mt_env->me_psize; + i += i; /* double it for good measure */ + need = i; + + if (txn->mt_dirty_room > i) + return MDB_SUCCESS; + + if (!txn->mt_spill_pgs) { + txn->mt_spill_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX); + if (!txn->mt_spill_pgs) + return ENOMEM; + } else { + /* purge deleted slots */ + MDB_IDL sl = txn->mt_spill_pgs; + unsigned int num = sl[0]; + j=0; + for (i=1; i<=num; i++) { + if (!(sl[i] & 1)) + sl[++j] = sl[i]; + } + sl[0] = j; + } + + /* Preserve pages which may soon be dirtied again */ + if ((rc = mdb_pages_xkeep(m0, P_DIRTY, 1)) != MDB_SUCCESS) + goto done; + + /* Less aggressive spill - we originally spilled the entire dirty list, + * with a few exceptions for cursor pages and DB root pages. But this + * turns out to be a lot of wasted effort because in a large txn many + * of those pages will need to be used again. So now we spill only 1/8th + * of the dirty pages. Testing revealed this to be a good tradeoff, + * better than 1/2, 1/4, or 1/10. + */ + if (need < MDB_IDL_UM_MAX / 8) + need = MDB_IDL_UM_MAX / 8; + + /* Save the page IDs of all the pages we're flushing */ + /* flush from the tail forward, this saves a lot of shifting later on. */ + for (i=dl[0].mid; i && need; i--) { + MDB_ID pn = dl[i].mid << 1; + dp = dl[i].mptr; + if (dp->mp_flags & (P_LOOSE|P_KEEP)) + continue; + /* Can't spill twice, make sure it's not already in a parent's + * spill list. + */ + if (txn->mt_parent) { + MDB_txn *tx2; + for (tx2 = txn->mt_parent; tx2; tx2 = tx2->mt_parent) { + if (tx2->mt_spill_pgs) { + j = mdb_midl_search(tx2->mt_spill_pgs, pn); + if (j <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[j] == pn) { + dp->mp_flags |= P_KEEP; + break; + } + } + } + if (tx2) + continue; + } + if ((rc = mdb_midl_append(&txn->mt_spill_pgs, pn))) + goto done; + need--; + } + mdb_midl_sort(txn->mt_spill_pgs); + + /* Flush the spilled part of dirty list */ + if ((rc = mdb_page_flush(txn, i)) != MDB_SUCCESS) + goto done; + + /* Reset any dirty pages we kept that page_flush didn't see */ + rc = mdb_pages_xkeep(m0, P_DIRTY|P_KEEP, i); + +done: + txn->mt_flags |= rc ? MDB_TXN_ERROR : MDB_TXN_SPILLS; + return rc; +} + +/** Find oldest txnid still referenced. Expects txn->mt_txnid > 0. */ +static txnid_t +mdb_find_oldest(MDB_txn *txn) +{ + int i; + txnid_t mr, oldest = txn->mt_txnid - 1; + if (txn->mt_env->me_txns) { + MDB_reader *r = txn->mt_env->me_txns->mti_readers; + for (i = txn->mt_env->me_txns->mti_numreaders; --i >= 0; ) { + if (r[i].mr_pid) { + mr = r[i].mr_txnid; + if (oldest > mr) + oldest = mr; + } + } + } + return oldest; +} + +/** Add a page to the txn's dirty list */ +static void +mdb_page_dirty(MDB_txn *txn, MDB_page *mp) +{ + MDB_ID2 mid; + int rc, (*insert)(MDB_ID2L, MDB_ID2 *); + + if (txn->mt_env->me_flags & MDB_WRITEMAP) { + insert = mdb_mid2l_append; + } else { + insert = mdb_mid2l_insert; + } + mid.mid = mp->mp_pgno; + mid.mptr = mp; + rc = insert(txn->mt_u.dirty_list, &mid); + mdb_tassert(txn, rc == 0); + txn->mt_dirty_room--; +} + +/** Allocate page numbers and memory for writing. Maintain me_pglast, + * me_pghead and mt_next_pgno. + * + * If there are free pages available from older transactions, they + * are re-used first. Otherwise allocate a new page at mt_next_pgno. + * Do not modify the freedB, just merge freeDB records into me_pghead[] + * and move me_pglast to say which records were consumed. Only this + * function can create me_pghead and move me_pglast/mt_next_pgno. + * @param[in] mc cursor A cursor handle identifying the transaction and + * database for which we are allocating. + * @param[in] num the number of pages to allocate. + * @param[out] mp Address of the allocated page(s). Requests for multiple pages + * will always be satisfied by a single contiguous chunk of memory. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) +{ +#ifdef MDB_PARANOID /* Seems like we can ignore this now */ + /* Get at most <Max_retries> more freeDB records once me_pghead + * has enough pages. If not enough, use new pages from the map. + * If <Paranoid> and mc is updating the freeDB, only get new + * records if me_pghead is empty. Then the freelist cannot play + * catch-up with itself by growing while trying to save it. + */ + enum { Paranoid = 1, Max_retries = 500 }; +#else + enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ }; +#endif + int rc, retry = num * 60; + MDB_txn *txn = mc->mc_txn; + MDB_env *env = txn->mt_env; + pgno_t pgno, *mop = env->me_pghead; + unsigned i, j, mop_len = mop ? mop[0] : 0, n2 = num-1; + MDB_page *np; + txnid_t oldest = 0, last; + MDB_cursor_op op; + MDB_cursor m2; + int found_old = 0; + + /* If there are any loose pages, just use them */ + if (num == 1 && txn->mt_loose_pgs) { + np = txn->mt_loose_pgs; + txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np); + txn->mt_loose_count--; + DPRINTF(("db %d use loose page %"Z"u", DDBI(mc), + np->mp_pgno)); + *mp = np; + return MDB_SUCCESS; + } + + *mp = NULL; + + /* If our dirty list is already full, we can't do anything */ + if (txn->mt_dirty_room == 0) { + rc = MDB_TXN_FULL; + goto fail; + } + + for (op = MDB_FIRST;; op = MDB_NEXT) { + MDB_val key, data; + MDB_node *leaf; + pgno_t *idl; + + /* Seek a big enough contiguous page range. Prefer + * pages at the tail, just truncating the list. + */ + if (mop_len > n2) { + i = mop_len; + do { + pgno = mop[i]; + if (mop[i-n2] == pgno+n2) + goto search_done; + } while (--i > n2); + if (--retry < 0) + break; + } + + if (op == MDB_FIRST) { /* 1st iteration */ + /* Prepare to fetch more and coalesce */ + last = env->me_pglast; + oldest = env->me_pgoldest; + mdb_cursor_init(&m2, txn, FREE_DBI, NULL); + if (last) { + op = MDB_SET_RANGE; + key.mv_data = &last; /* will look up last+1 */ + key.mv_size = sizeof(last); + } + if (Paranoid && mc->mc_dbi == FREE_DBI) + retry = -1; + } + if (Paranoid && retry < 0 && mop_len) + break; + + last++; + /* Do not fetch more if the record will be too recent */ + if (oldest <= last) { + if (!found_old) { + oldest = mdb_find_oldest(txn); + env->me_pgoldest = oldest; + found_old = 1; + } + if (oldest <= last) + break; + } + rc = mdb_cursor_get(&m2, &key, NULL, op); + if (rc) { + if (rc == MDB_NOTFOUND) + break; + goto fail; + } + last = *(txnid_t*)key.mv_data; + if (oldest <= last) { + if (!found_old) { + oldest = mdb_find_oldest(txn); + env->me_pgoldest = oldest; + found_old = 1; + } + if (oldest <= last) + break; + } + np = m2.mc_pg[m2.mc_top]; + leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]); + if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS) + return rc; + + idl = (MDB_ID *) data.mv_data; + i = idl[0]; + if (!mop) { + if (!(env->me_pghead = mop = mdb_midl_alloc(i))) { + rc = ENOMEM; + goto fail; + } + } else { + if ((rc = mdb_midl_need(&env->me_pghead, i)) != 0) + goto fail; + mop = env->me_pghead; + } + env->me_pglast = last; +#if (MDB_DEBUG) > 1 + DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u", + last, txn->mt_dbs[FREE_DBI].md_root, i)); + for (j = i; j; j--) + DPRINTF(("IDL %"Z"u", idl[j])); +#endif + /* Merge in descending sorted order */ + mdb_midl_xmerge(mop, idl); + mop_len = mop[0]; + } + + /* Use new pages from the map when nothing suitable in the freeDB */ + i = 0; + pgno = txn->mt_next_pgno; + if (pgno + num >= env->me_maxpg) { + DPUTS("DB size maxed out"); + rc = MDB_MAP_FULL; + goto fail; + } + +search_done: + if (env->me_flags & MDB_WRITEMAP) { + np = (MDB_page *)(env->me_map + env->me_psize * pgno); + } else { + if (!(np = mdb_page_malloc(txn, num))) { + rc = ENOMEM; + goto fail; + } + } + if (i) { + mop[0] = mop_len -= num; + /* Move any stragglers down */ + for (j = i-num; j < mop_len; ) + mop[++j] = mop[++i]; + } else { + txn->mt_next_pgno = pgno + num; + } + np->mp_pgno = pgno; + mdb_page_dirty(txn, np); + *mp = np; + + return MDB_SUCCESS; + +fail: + txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +/** Copy the used portions of a non-overflow page. + * @param[in] dst page to copy into + * @param[in] src page to copy from + * @param[in] psize size of a page + */ +static void +mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize) +{ + enum { Align = sizeof(pgno_t) }; + indx_t upper = src->mp_upper, lower = src->mp_lower, unused = upper-lower; + + /* If page isn't full, just copy the used portion. Adjust + * alignment so memcpy may copy words instead of bytes. + */ + if ((unused &= -Align) && !IS_LEAF2(src)) { + upper = (upper + PAGEBASE) & -Align; + memcpy(dst, src, (lower + PAGEBASE + (Align-1)) & -Align); + memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper), + psize - upper); + } else { + memcpy(dst, src, psize - unused); + } +} + +/** Pull a page off the txn's spill list, if present. + * If a page being referenced was spilled to disk in this txn, bring + * it back and make it dirty/writable again. + * @param[in] txn the transaction handle. + * @param[in] mp the page being referenced. It must not be dirty. + * @param[out] ret the writable page, if any. ret is unchanged if + * mp wasn't spilled. + */ +static int +mdb_page_unspill(MDB_txn *txn, MDB_page *mp, MDB_page **ret) +{ + MDB_env *env = txn->mt_env; + const MDB_txn *tx2; + unsigned x; + pgno_t pgno = mp->mp_pgno, pn = pgno << 1; + + for (tx2 = txn; tx2; tx2=tx2->mt_parent) { + if (!tx2->mt_spill_pgs) + continue; + x = mdb_midl_search(tx2->mt_spill_pgs, pn); + if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) { + MDB_page *np; + int num; + if (txn->mt_dirty_room == 0) + return MDB_TXN_FULL; + if (IS_OVERFLOW(mp)) + num = mp->mp_pages; + else + num = 1; + if (env->me_flags & MDB_WRITEMAP) { + np = mp; + } else { + np = mdb_page_malloc(txn, num); + if (!np) + return ENOMEM; + if (num > 1) + memcpy(np, mp, num * env->me_psize); + else + mdb_page_copy(np, mp, env->me_psize); + } + if (tx2 == txn) { + /* If in current txn, this page is no longer spilled. + * If it happens to be the last page, truncate the spill list. + * Otherwise mark it as deleted by setting the LSB. + */ + if (x == txn->mt_spill_pgs[0]) + txn->mt_spill_pgs[0]--; + else + txn->mt_spill_pgs[x] |= 1; + } /* otherwise, if belonging to a parent txn, the + * page remains spilled until child commits + */ + + mdb_page_dirty(txn, np); + np->mp_flags |= P_DIRTY; + *ret = np; + break; + } + } + return MDB_SUCCESS; +} + +/** Touch a page: make it dirty and re-insert into tree with updated pgno. + * @param[in] mc cursor pointing to the page to be touched + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_touch(MDB_cursor *mc) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top], *np; + MDB_txn *txn = mc->mc_txn; + MDB_cursor *m2, *m3; + pgno_t pgno; + int rc; + + if (!F_ISSET(mp->mp_flags, P_DIRTY)) { + if (txn->mt_flags & MDB_TXN_SPILLS) { + np = NULL; + rc = mdb_page_unspill(txn, mp, &np); + if (rc) + goto fail; + if (np) + goto done; + } + if ((rc = mdb_midl_need(&txn->mt_free_pgs, 1)) || + (rc = mdb_page_alloc(mc, 1, &np))) + goto fail; + pgno = np->mp_pgno; + DPRINTF(("touched db %d page %"Z"u -> %"Z"u", DDBI(mc), + mp->mp_pgno, pgno)); + mdb_cassert(mc, mp->mp_pgno != pgno); + mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); + /* Update the parent page, if any, to point to the new page */ + if (mc->mc_top) { + MDB_page *parent = mc->mc_pg[mc->mc_top-1]; + MDB_node *node = NODEPTR(parent, mc->mc_ki[mc->mc_top-1]); + SETPGNO(node, pgno); + } else { + mc->mc_db->md_root = pgno; + } + } else if (txn->mt_parent && !IS_SUBP(mp)) { + MDB_ID2 mid, *dl = txn->mt_u.dirty_list; + pgno = mp->mp_pgno; + /* If txn has a parent, make sure the page is in our + * dirty list. + */ + if (dl[0].mid) { + unsigned x = mdb_mid2l_search(dl, pgno); + if (x <= dl[0].mid && dl[x].mid == pgno) { + if (mp != dl[x].mptr) { /* bad cursor? */ + mc->mc_flags &= ~(C_INITIALIZED|C_EOF); + txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CORRUPTED; + } + return 0; + } + } + mdb_cassert(mc, dl[0].mid < MDB_IDL_UM_MAX); + /* No - copy it */ + np = mdb_page_malloc(txn, 1); + if (!np) + return ENOMEM; + mid.mid = pgno; + mid.mptr = np; + rc = mdb_mid2l_insert(dl, &mid); + mdb_cassert(mc, rc == 0); + } else { + return 0; + } + + mdb_page_copy(np, mp, txn->mt_env->me_psize); + np->mp_pgno = pgno; + np->mp_flags |= P_DIRTY; + +done: + /* Adjust cursors pointing to mp */ + mc->mc_pg[mc->mc_top] = np; + m2 = txn->mt_cursors[mc->mc_dbi]; + if (mc->mc_flags & C_SUB) { + for (; m2; m2=m2->mc_next) { + m3 = &m2->mc_xcursor->mx_cursor; + if (m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[mc->mc_top] == mp) + m3->mc_pg[mc->mc_top] = np; + } + } else { + for (; m2; m2=m2->mc_next) { + if (m2->mc_snum < mc->mc_snum) continue; + if (m2->mc_pg[mc->mc_top] == mp) { + m2->mc_pg[mc->mc_top] = np; + if ((mc->mc_db->md_flags & MDB_DUPSORT) && + IS_LEAF(np) && + m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) + { + MDB_node *leaf = NODEPTR(np, mc->mc_ki[mc->mc_top]); + if (!(leaf->mn_flags & F_SUBDATA)) + m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + } + } + } + } + return 0; + +fail: + txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +int +mdb_env_sync(MDB_env *env, int force) +{ + int rc = 0; + if (env->me_flags & MDB_RDONLY) + return EACCES; + if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) { + if (env->me_flags & MDB_WRITEMAP) { + int flags = ((env->me_flags & MDB_MAPASYNC) && !force) + ? MS_ASYNC : MS_SYNC; + if (MDB_MSYNC(env->me_map, env->me_mapsize, flags)) + rc = ErrCode(); +#ifdef _WIN32 + else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd)) + rc = ErrCode(); +#endif + } else { +#ifdef BROKEN_FDATASYNC + if (env->me_flags & MDB_FSYNCONLY) { + if (fsync(env->me_fd)) + rc = ErrCode(); + } else +#endif + if (MDB_FDATASYNC(env->me_fd)) + rc = ErrCode(); + } + } + return rc; +} + +/** Back up parent txn's cursors, then grab the originals for tracking */ +static int +mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst) +{ + MDB_cursor *mc, *bk; + MDB_xcursor *mx; + size_t size; + int i; + + for (i = src->mt_numdbs; --i >= 0; ) { + if ((mc = src->mt_cursors[i]) != NULL) { + size = sizeof(MDB_cursor); + if (mc->mc_xcursor) + size += sizeof(MDB_xcursor); + for (; mc; mc = bk->mc_next) { + bk = malloc(size); + if (!bk) + return ENOMEM; + *bk = *mc; + mc->mc_backup = bk; + mc->mc_db = &dst->mt_dbs[i]; + /* Kill pointers into src - and dst to reduce abuse: The + * user may not use mc until dst ends. Otherwise we'd... + */ + mc->mc_txn = NULL; /* ...set this to dst */ + mc->mc_dbflag = NULL; /* ...and &dst->mt_dbflags[i] */ + if ((mx = mc->mc_xcursor) != NULL) { + *(MDB_xcursor *)(bk+1) = *mx; + mx->mx_cursor.mc_txn = NULL; /* ...and dst. */ + } + mc->mc_next = dst->mt_cursors[i]; + dst->mt_cursors[i] = mc; + } + } + } + return MDB_SUCCESS; +} + +/** Close this write txn's cursors, give parent txn's cursors back to parent. + * @param[in] txn the transaction handle. + * @param[in] merge true to keep changes to parent cursors, false to revert. + * @return 0 on success, non-zero on failure. + */ +static void +mdb_cursors_close(MDB_txn *txn, unsigned merge) +{ + MDB_cursor **cursors = txn->mt_cursors, *mc, *next, *bk; + MDB_xcursor *mx; + int i; + + for (i = txn->mt_numdbs; --i >= 0; ) { + for (mc = cursors[i]; mc; mc = next) { + next = mc->mc_next; + if ((bk = mc->mc_backup) != NULL) { + if (merge) { + /* Commit changes to parent txn */ + mc->mc_next = bk->mc_next; + mc->mc_backup = bk->mc_backup; + mc->mc_txn = bk->mc_txn; + mc->mc_db = bk->mc_db; + mc->mc_dbflag = bk->mc_dbflag; + if ((mx = mc->mc_xcursor) != NULL) + mx->mx_cursor.mc_txn = bk->mc_txn; + } else { + /* Abort nested txn */ + *mc = *bk; + if ((mx = mc->mc_xcursor) != NULL) + *mx = *(MDB_xcursor *)(bk+1); + } + mc = bk; + } + /* Only malloced cursors are permanently tracked. */ + free(mc); + } + cursors[i] = NULL; + } +} + +#if !(MDB_DEBUG) +#define mdb_txn_reset0(txn, act) mdb_txn_reset0(txn) +#endif +static void +mdb_txn_reset0(MDB_txn *txn, const char *act); + +#if !(MDB_PIDLOCK) /* Currently the same as defined(_WIN32) */ +enum Pidlock_op { + Pidset, Pidcheck +}; +#else +enum Pidlock_op { + Pidset = F_SETLK, Pidcheck = F_GETLK +}; +#endif + +/** Set or check a pid lock. Set returns 0 on success. + * Check returns 0 if the process is certainly dead, nonzero if it may + * be alive (the lock exists or an error happened so we do not know). + * + * On Windows Pidset is a no-op, we merely check for the existence + * of the process with the given pid. On POSIX we use a single byte + * lock on the lockfile, set at an offset equal to the pid. + */ +static int +mdb_reader_pid(MDB_env *env, enum Pidlock_op op, MDB_PID_T pid) +{ +#if !(MDB_PIDLOCK) /* Currently the same as defined(_WIN32) */ + int ret = 0; + HANDLE h; + if (op == Pidcheck) { + h = OpenProcess(env->me_pidquery, FALSE, pid); + /* No documented "no such process" code, but other program use this: */ + if (!h) + return ErrCode() != ERROR_INVALID_PARAMETER; + /* A process exists until all handles to it close. Has it exited? */ + ret = WaitForSingleObject(h, 0) != 0; + CloseHandle(h); + } + return ret; +#else + for (;;) { + int rc; + struct flock lock_info; + memset(&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_WRLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = pid; + lock_info.l_len = 1; + if ((rc = fcntl(env->me_lfd, op, &lock_info)) == 0) { + if (op == F_GETLK && lock_info.l_type != F_UNLCK) + rc = -1; + } else if ((rc = ErrCode()) == EINTR) { + continue; + } + return rc; + } +#endif +} + +/** Common code for #mdb_txn_begin() and #mdb_txn_renew(). + * @param[in] txn the transaction handle to initialize + * @return 0 on success, non-zero on failure. + */ +static int +mdb_txn_renew0(MDB_txn *txn) +{ + MDB_env *env = txn->mt_env; + MDB_txninfo *ti = env->me_txns; + MDB_meta *meta; + unsigned int i, nr; + uint16_t x; + int rc, new_notls = 0; + + if (txn->mt_flags & MDB_TXN_RDONLY) { + /* Setup db info */ + txn->mt_numdbs = env->me_numdbs; + txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ + if (!ti) { + meta = env->me_metas[ mdb_env_pick_meta(env) ]; + txn->mt_txnid = meta->mm_txnid; + txn->mt_u.reader = NULL; + } else { + MDB_reader *r = (env->me_flags & MDB_NOTLS) ? txn->mt_u.reader : + pthread_getspecific(env->me_txkey); + if (r) { + if (r->mr_pid != env->me_pid || r->mr_txnid != (txnid_t)-1) + return MDB_BAD_RSLOT; + } else { + MDB_PID_T pid = env->me_pid; + MDB_THR_T tid = pthread_self(); + mdb_mutex_t *rmutex = MDB_MUTEX(env, r); + + if (!env->me_live_reader) { + rc = mdb_reader_pid(env, Pidset, pid); + if (rc) + return rc; + env->me_live_reader = 1; + } + + if (LOCK_MUTEX(rc, env, rmutex)) + return rc; + nr = ti->mti_numreaders; + for (i=0; i<nr; i++) + if (ti->mti_readers[i].mr_pid == 0) + break; + if (i == env->me_maxreaders) { + UNLOCK_MUTEX(rmutex); + return MDB_READERS_FULL; + } + r = &ti->mti_readers[i]; + r->mr_txnid = (txnid_t)-1; + r->mr_tid = tid; + r->mr_pid = pid; /* should be written last, see ITS#7971. */ + if (i == nr) + ti->mti_numreaders = ++nr; + /* Save numreaders for un-mutexed mdb_env_close() */ + env->me_numreaders = nr; + UNLOCK_MUTEX(rmutex); + + new_notls = (env->me_flags & MDB_NOTLS); + if (!new_notls && (rc=pthread_setspecific(env->me_txkey, r))) { + r->mr_pid = 0; + return rc; + } + } + do /* LY: Retry on a race, ITS#7970. */ + r->mr_txnid = ti->mti_txnid; + while(r->mr_txnid != ti->mti_txnid); + txn->mt_txnid = r->mr_txnid; + txn->mt_u.reader = r; + meta = env->me_metas[txn->mt_txnid & 1]; + } + } else { + if (ti) { + if (LOCK_MUTEX(rc, env, MDB_MUTEX(env, w))) + return rc; + txn->mt_txnid = ti->mti_txnid; + meta = env->me_metas[txn->mt_txnid & 1]; + } else { + meta = env->me_metas[ mdb_env_pick_meta(env) ]; + txn->mt_txnid = meta->mm_txnid; + } + /* Setup db info */ + txn->mt_numdbs = env->me_numdbs; + txn->mt_txnid++; +#if MDB_DEBUG + if (txn->mt_txnid == mdb_debug_start) + mdb_debug = 1; +#endif + txn->mt_flags = 0; + txn->mt_child = NULL; + txn->mt_loose_pgs = NULL; + txn->mt_loose_count = 0; + txn->mt_dirty_room = MDB_IDL_UM_MAX; + txn->mt_u.dirty_list = env->me_dirty_list; + txn->mt_u.dirty_list[0].mid = 0; + txn->mt_free_pgs = env->me_free_pgs; + txn->mt_free_pgs[0] = 0; + txn->mt_spill_pgs = NULL; + env->me_txn = txn; + memcpy(txn->mt_dbiseqs, env->me_dbiseqs, env->me_maxdbs * sizeof(unsigned int)); + } + + /* Copy the DB info and flags */ + memcpy(txn->mt_dbs, meta->mm_dbs, 2 * sizeof(MDB_db)); + + /* Moved to here to avoid a data race in read TXNs */ + txn->mt_next_pgno = meta->mm_last_pg+1; + + for (i=2; i<txn->mt_numdbs; i++) { + x = env->me_dbflags[i]; + txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS; + txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_STALE : 0; + } + txn->mt_dbflags[0] = txn->mt_dbflags[1] = DB_VALID; + + if (env->me_maxpg < txn->mt_next_pgno) { + mdb_txn_reset0(txn, "renew0-mapfail"); + if (new_notls) { + txn->mt_u.reader->mr_pid = 0; + txn->mt_u.reader = NULL; + } + return MDB_MAP_RESIZED; + } + + return MDB_SUCCESS; +} + +int +mdb_txn_renew(MDB_txn *txn) +{ + int rc; + + if (!txn || txn->mt_dbxs) /* A reset txn has mt_dbxs==NULL */ + return EINVAL; + + if (txn->mt_env->me_flags & MDB_FATAL_ERROR) { + DPUTS("environment had fatal error, must shutdown!"); + return MDB_PANIC; + } + + rc = mdb_txn_renew0(txn); + if (rc == MDB_SUCCESS) { + DPRINTF(("renew txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root)); + } + return rc; +} + +int +mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) +{ + MDB_txn *txn; + MDB_ntxn *ntxn; + int rc, size, tsize = sizeof(MDB_txn); + + if (env->me_flags & MDB_FATAL_ERROR) { + DPUTS("environment had fatal error, must shutdown!"); + return MDB_PANIC; + } + if ((env->me_flags & MDB_RDONLY) && !(flags & MDB_RDONLY)) + return EACCES; + if (parent) { + /* Nested transactions: Max 1 child, write txns only, no writemap */ + if (parent->mt_child || + (flags & MDB_RDONLY) || + (parent->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) || + (env->me_flags & MDB_WRITEMAP)) + { + return (parent->mt_flags & MDB_TXN_RDONLY) ? EINVAL : MDB_BAD_TXN; + } + tsize = sizeof(MDB_ntxn); + } + size = tsize; + if (!(flags & MDB_RDONLY)) { + if (!parent) { + txn = env->me_txn0; /* just reuse preallocated write txn */ + goto ok; + } + /* child txns use own copy of cursors */ + size += env->me_maxdbs * sizeof(MDB_cursor *); + } + size += env->me_maxdbs * (sizeof(MDB_db)+1); + + if ((txn = calloc(1, size)) == NULL) { + DPRINTF(("calloc: %s", strerror(errno))); + return ENOMEM; + } + txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); + if (flags & MDB_RDONLY) { + txn->mt_flags |= MDB_TXN_RDONLY; + txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs); + txn->mt_dbiseqs = env->me_dbiseqs; + } else { + txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs); + if (parent) { + txn->mt_dbiseqs = parent->mt_dbiseqs; + txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs); + } else { + txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); + txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); + } + } + txn->mt_env = env; + +ok: + if (parent) { + unsigned int i; + txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE); + if (!txn->mt_u.dirty_list || + !(txn->mt_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX))) + { + free(txn->mt_u.dirty_list); + free(txn); + return ENOMEM; + } + txn->mt_txnid = parent->mt_txnid; + txn->mt_dirty_room = parent->mt_dirty_room; + txn->mt_u.dirty_list[0].mid = 0; + txn->mt_spill_pgs = NULL; + txn->mt_next_pgno = parent->mt_next_pgno; + parent->mt_child = txn; + txn->mt_parent = parent; + txn->mt_numdbs = parent->mt_numdbs; + txn->mt_flags = parent->mt_flags; + txn->mt_dbxs = parent->mt_dbxs; + memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); + /* Copy parent's mt_dbflags, but clear DB_NEW */ + for (i=0; i<txn->mt_numdbs; i++) + txn->mt_dbflags[i] = parent->mt_dbflags[i] & ~DB_NEW; + rc = 0; + ntxn = (MDB_ntxn *)txn; + ntxn->mnt_pgstate = env->me_pgstate; /* save parent me_pghead & co */ + if (env->me_pghead) { + size = MDB_IDL_SIZEOF(env->me_pghead); + env->me_pghead = mdb_midl_alloc(env->me_pghead[0]); + if (env->me_pghead) + memcpy(env->me_pghead, ntxn->mnt_pgstate.mf_pghead, size); + else + rc = ENOMEM; + } + if (!rc) + rc = mdb_cursor_shadow(parent, txn); + if (rc) + mdb_txn_reset0(txn, "beginchild-fail"); + } else { + rc = mdb_txn_renew0(txn); + } + if (rc) { + if (txn != env->me_txn0) + free(txn); + } else { + *ret = txn; + DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root)); + } + + return rc; +} + +MDB_env * +mdb_txn_env(MDB_txn *txn) +{ + if(!txn) return NULL; + return txn->mt_env; +} + +size_t +mdb_txn_id(MDB_txn *txn) +{ + if(!txn) return 0; + return txn->mt_txnid; +} + +/** Export or close DBI handles opened in this txn. */ +static void +mdb_dbis_update(MDB_txn *txn, int keep) +{ + int i; + MDB_dbi n = txn->mt_numdbs; + MDB_env *env = txn->mt_env; + unsigned char *tdbflags = txn->mt_dbflags; + + for (i = n; --i >= 2;) { + if (tdbflags[i] & DB_NEW) { + if (keep) { + env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID; + } else { + char *ptr = env->me_dbxs[i].md_name.mv_data; + if (ptr) { + env->me_dbxs[i].md_name.mv_data = NULL; + env->me_dbxs[i].md_name.mv_size = 0; + env->me_dbflags[i] = 0; + env->me_dbiseqs[i]++; + free(ptr); + } + } + } + } + if (keep && env->me_numdbs < n) + env->me_numdbs = n; +} + +/** Common code for #mdb_txn_reset() and #mdb_txn_abort(). + * May be called twice for readonly txns: First reset it, then abort. + * @param[in] txn the transaction handle to reset + * @param[in] act why the transaction is being reset + */ +static void +mdb_txn_reset0(MDB_txn *txn, const char *act) +{ + MDB_env *env = txn->mt_env; + + /* Close any DBI handles opened in this txn */ + mdb_dbis_update(txn, 0); + + DPRINTF(("%s txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", + act, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root)); + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + if (txn->mt_u.reader) { + txn->mt_u.reader->mr_txnid = (txnid_t)-1; + if (!(env->me_flags & MDB_NOTLS)) + txn->mt_u.reader = NULL; /* txn does not own reader */ + } + txn->mt_numdbs = 0; /* close nothing if called again */ + txn->mt_dbxs = NULL; /* mark txn as reset */ + } else { + pgno_t *pghead = env->me_pghead; + + mdb_cursors_close(txn, 0); + if (!(env->me_flags & MDB_WRITEMAP)) { + mdb_dlist_free(txn); + } + + if (!txn->mt_parent) { + if (mdb_midl_shrink(&txn->mt_free_pgs)) + env->me_free_pgs = txn->mt_free_pgs; + /* me_pgstate: */ + env->me_pghead = NULL; + env->me_pglast = 0; + + env->me_txn = NULL; + /* The writer mutex was locked in mdb_txn_begin. */ + if (env->me_txns) + UNLOCK_MUTEX(MDB_MUTEX(env, w)); + } else { + txn->mt_parent->mt_child = NULL; + env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate; + mdb_midl_free(txn->mt_free_pgs); + mdb_midl_free(txn->mt_spill_pgs); + free(txn->mt_u.dirty_list); + } + + mdb_midl_free(pghead); + } +} + +void +mdb_txn_reset(MDB_txn *txn) +{ + if (txn == NULL) + return; + + /* This call is only valid for read-only txns */ + if (!(txn->mt_flags & MDB_TXN_RDONLY)) + return; + + mdb_txn_reset0(txn, "reset"); +} + +void +mdb_txn_abort(MDB_txn *txn) +{ + if (txn == NULL) + return; + + if (txn->mt_child) + mdb_txn_abort(txn->mt_child); + + mdb_txn_reset0(txn, "abort"); + /* Free reader slot tied to this txn (if MDB_NOTLS && writable FS) */ + if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader) + txn->mt_u.reader->mr_pid = 0; + + if (txn != txn->mt_env->me_txn0) + free(txn); +} + +/** Save the freelist as of this transaction to the freeDB. + * This changes the freelist. Keep trying until it stabilizes. + */ +static int +mdb_freelist_save(MDB_txn *txn) +{ + /* env->me_pghead[] can grow and shrink during this call. + * env->me_pglast and txn->mt_free_pgs[] can only grow. + * Page numbers cannot disappear from txn->mt_free_pgs[]. + */ + MDB_cursor mc; + MDB_env *env = txn->mt_env; + int rc, maxfree_1pg = env->me_maxfree_1pg, more = 1; + txnid_t pglast = 0, head_id = 0; + pgno_t freecnt = 0, *free_pgs, *mop; + ssize_t head_room = 0, total_room = 0, mop_len, clean_limit; + + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + + if (env->me_pghead) { + /* Make sure first page of freeDB is touched and on freelist */ + rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST|MDB_PS_MODIFY); + if (rc && rc != MDB_NOTFOUND) + return rc; + } + + if (!env->me_pghead && txn->mt_loose_pgs) { + /* Put loose page numbers in mt_free_pgs, since + * we may be unable to return them to me_pghead. + */ + MDB_page *mp = txn->mt_loose_pgs; + if ((rc = mdb_midl_need(&txn->mt_free_pgs, txn->mt_loose_count)) != 0) + return rc; + for (; mp; mp = NEXT_LOOSE_PAGE(mp)) + mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); + txn->mt_loose_pgs = NULL; + txn->mt_loose_count = 0; + } + + /* MDB_RESERVE cancels meminit in ovpage malloc (when no WRITEMAP) */ + clean_limit = (env->me_flags & (MDB_NOMEMINIT|MDB_WRITEMAP)) + ? SSIZE_MAX : maxfree_1pg; + + for (;;) { + /* Come back here after each Put() in case freelist changed */ + MDB_val key, data; + pgno_t *pgs; + ssize_t j; + + /* If using records from freeDB which we have not yet + * deleted, delete them and any we reserved for me_pghead. + */ + while (pglast < env->me_pglast) { + rc = mdb_cursor_first(&mc, &key, NULL); + if (rc) + return rc; + pglast = head_id = *(txnid_t *)key.mv_data; + total_room = head_room = 0; + mdb_tassert(txn, pglast <= env->me_pglast); + rc = mdb_cursor_del(&mc, 0); + if (rc) + return rc; + } + + /* Save the IDL of pages freed by this txn, to a single record */ + if (freecnt < txn->mt_free_pgs[0]) { + if (!freecnt) { + /* Make sure last page of freeDB is touched and on freelist */ + rc = mdb_page_search(&mc, NULL, MDB_PS_LAST|MDB_PS_MODIFY); + if (rc && rc != MDB_NOTFOUND) + return rc; + } + free_pgs = txn->mt_free_pgs; + /* Write to last page of freeDB */ + key.mv_size = sizeof(txn->mt_txnid); + key.mv_data = &txn->mt_txnid; + do { + freecnt = free_pgs[0]; + data.mv_size = MDB_IDL_SIZEOF(free_pgs); + rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); + if (rc) + return rc; + /* Retry if mt_free_pgs[] grew during the Put() */ + free_pgs = txn->mt_free_pgs; + } while (freecnt < free_pgs[0]); + mdb_midl_sort(free_pgs); + memcpy(data.mv_data, free_pgs, data.mv_size); +#if (MDB_DEBUG) > 1 + { + unsigned int i = free_pgs[0]; + DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u", + txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i)); + for (; i; i--) + DPRINTF(("IDL %"Z"u", free_pgs[i])); + } +#endif + continue; + } + + mop = env->me_pghead; + mop_len = (mop ? mop[0] : 0) + txn->mt_loose_count; + + /* Reserve records for me_pghead[]. Split it if multi-page, + * to avoid searching freeDB for a page range. Use keys in + * range [1,me_pglast]: Smaller than txnid of oldest reader. + */ + if (total_room >= mop_len) { + if (total_room == mop_len || --more < 0) + break; + } else if (head_room >= maxfree_1pg && head_id > 1) { + /* Keep current record (overflow page), add a new one */ + head_id--; + head_room = 0; + } + /* (Re)write {key = head_id, IDL length = head_room} */ + total_room -= head_room; + head_room = mop_len - total_room; + if (head_room > maxfree_1pg && head_id > 1) { + /* Overflow multi-page for part of me_pghead */ + head_room /= head_id; /* amortize page sizes */ + head_room += maxfree_1pg - head_room % (maxfree_1pg + 1); + } else if (head_room < 0) { + /* Rare case, not bothering to delete this record */ + head_room = 0; + } + key.mv_size = sizeof(head_id); + key.mv_data = &head_id; + data.mv_size = (head_room + 1) * sizeof(pgno_t); + rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); + if (rc) + return rc; + /* IDL is initially empty, zero out at least the length */ + pgs = (pgno_t *)data.mv_data; + j = head_room > clean_limit ? head_room : 0; + do { + pgs[j] = 0; + } while (--j >= 0); + total_room += head_room; + } + + /* Return loose page numbers to me_pghead, though usually none are + * left at this point. The pages themselves remain in dirty_list. + */ + if (txn->mt_loose_pgs) { + MDB_page *mp = txn->mt_loose_pgs; + unsigned count = txn->mt_loose_count; + MDB_IDL loose; + /* Room for loose pages + temp IDL with same */ + if ((rc = mdb_midl_need(&env->me_pghead, 2*count+1)) != 0) + return rc; + mop = env->me_pghead; + loose = mop + MDB_IDL_ALLOCLEN(mop) - count; + for (count = 0; mp; mp = NEXT_LOOSE_PAGE(mp)) + loose[ ++count ] = mp->mp_pgno; + loose[0] = count; + mdb_midl_sort(loose); + mdb_midl_xmerge(mop, loose); + txn->mt_loose_pgs = NULL; + txn->mt_loose_count = 0; + mop_len = mop[0]; + } + + /* Fill in the reserved me_pghead records */ + rc = MDB_SUCCESS; + if (mop_len) { + MDB_val key, data; + + mop += mop_len; + rc = mdb_cursor_first(&mc, &key, &data); + for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) { + txnid_t id = *(txnid_t *)key.mv_data; + ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1; + MDB_ID save; + + mdb_tassert(txn, len >= 0 && id <= env->me_pglast); + key.mv_data = &id; + if (len > mop_len) { + len = mop_len; + data.mv_size = (len + 1) * sizeof(MDB_ID); + } + data.mv_data = mop -= len; + save = mop[0]; + mop[0] = len; + rc = mdb_cursor_put(&mc, &key, &data, MDB_CURRENT); + mop[0] = save; + if (rc || !(mop_len -= len)) + break; + } + } + return rc; +} + +/** Flush (some) dirty pages to the map, after clearing their dirty flag. + * @param[in] txn the transaction that's being committed + * @param[in] keep number of initial pages in dirty_list to keep dirty. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_flush(MDB_txn *txn, int keep) +{ + MDB_env *env = txn->mt_env; + MDB_ID2L dl = txn->mt_u.dirty_list; + unsigned psize = env->me_psize, j; + int i, pagecount = dl[0].mid, rc; + size_t size = 0, pos = 0; + pgno_t pgno = 0; + MDB_page *dp = NULL; +#ifdef _WIN32 + OVERLAPPED ov; +#else + struct iovec iov[MDB_COMMIT_PAGES]; + ssize_t wpos = 0, wsize = 0, wres; + size_t next_pos = 1; /* impossible pos, so pos != next_pos */ + int n = 0; +#endif + + j = i = keep; + + if (env->me_flags & MDB_WRITEMAP) { + /* Clear dirty flags */ + while (++i <= pagecount) { + dp = dl[i].mptr; + /* Don't flush this page yet */ + if (dp->mp_flags & (P_LOOSE|P_KEEP)) { + dp->mp_flags &= ~P_KEEP; + dl[++j] = dl[i]; + continue; + } + dp->mp_flags &= ~P_DIRTY; + } + goto done; + } + + /* Write the pages */ + for (;;) { + if (++i <= pagecount) { + dp = dl[i].mptr; + /* Don't flush this page yet */ + if (dp->mp_flags & (P_LOOSE|P_KEEP)) { + dp->mp_flags &= ~P_KEEP; + dl[i].mid = 0; + continue; + } + pgno = dl[i].mid; + /* clear dirty flag */ + dp->mp_flags &= ~P_DIRTY; + pos = pgno * psize; + size = psize; + if (IS_OVERFLOW(dp)) size *= dp->mp_pages; + } +#ifdef _WIN32 + else break; + + /* Windows actually supports scatter/gather I/O, but only on + * unbuffered file handles. Since we're relying on the OS page + * cache for all our data, that's self-defeating. So we just + * write pages one at a time. We use the ov structure to set + * the write offset, to at least save the overhead of a Seek + * system call. + */ + DPRINTF(("committing page %"Z"u", pgno)); + memset(&ov, 0, sizeof(ov)); + ov.Offset = pos & 0xffffffff; + ov.OffsetHigh = pos >> 16 >> 16; + if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) { + rc = ErrCode(); + DPRINTF(("WriteFile: %d", rc)); + return rc; + } +#else + /* Write up to MDB_COMMIT_PAGES dirty pages at a time. */ + if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) { + if (n) { + /* Write previous page(s) */ +#ifdef MDB_USE_PWRITEV + wres = pwritev(env->me_fd, iov, n, wpos); +#else + if (n == 1) { + wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos); + } else { + if (lseek(env->me_fd, wpos, SEEK_SET) == -1) { + rc = ErrCode(); + DPRINTF(("lseek: %s", strerror(rc))); + return rc; + } + wres = writev(env->me_fd, iov, n); + } +#endif + if (wres != wsize) { + if (wres < 0) { + rc = ErrCode(); + DPRINTF(("Write error: %s", strerror(rc))); + } else { + rc = EIO; /* TODO: Use which error code? */ + DPUTS("short write, filesystem full?"); + } + return rc; + } + n = 0; + } + if (i > pagecount) + break; + wpos = pos; + wsize = 0; + } + DPRINTF(("committing page %"Z"u", pgno)); + next_pos = pos + size; + iov[n].iov_len = size; + iov[n].iov_base = (char *)dp; + wsize += size; + n++; +#endif /* _WIN32 */ + } + + /* MIPS has cache coherency issues, this is a no-op everywhere else + * Note: for any size >= on-chip cache size, entire on-chip cache is + * flushed. + */ + CACHEFLUSH(env->me_map, txn->mt_next_pgno * env->me_psize, DCACHE); + + for (i = keep; ++i <= pagecount; ) { + dp = dl[i].mptr; + /* This is a page we skipped above */ + if (!dl[i].mid) { + dl[++j] = dl[i]; + dl[j].mid = dp->mp_pgno; + continue; + } + mdb_dpage_free(env, dp); + } + +done: + i--; + txn->mt_dirty_room += i - j; + dl[0].mid = j; + return MDB_SUCCESS; +} + +int +mdb_txn_commit(MDB_txn *txn) +{ + int rc; + unsigned int i; + MDB_env *env; + + if (txn == NULL || txn->mt_env == NULL) + return EINVAL; + + if (txn->mt_child) { + rc = mdb_txn_commit(txn->mt_child); + txn->mt_child = NULL; + if (rc) + goto fail; + } + + env = txn->mt_env; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + mdb_dbis_update(txn, 1); + txn->mt_numdbs = 2; /* so txn_abort() doesn't close any new handles */ + mdb_txn_abort(txn); + return MDB_SUCCESS; + } + + if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) { + DPUTS("error flag is set, can't commit"); + if (txn->mt_parent) + txn->mt_parent->mt_flags |= MDB_TXN_ERROR; + rc = MDB_BAD_TXN; + goto fail; + } + + if (txn->mt_parent) { + MDB_txn *parent = txn->mt_parent; + MDB_page **lp; + MDB_ID2L dst, src; + MDB_IDL pspill; + unsigned x, y, len, ps_len; + + /* Append our free list to parent's */ + rc = mdb_midl_append_list(&parent->mt_free_pgs, txn->mt_free_pgs); + if (rc) + goto fail; + mdb_midl_free(txn->mt_free_pgs); + /* Failures after this must either undo the changes + * to the parent or set MDB_TXN_ERROR in the parent. + */ + + parent->mt_next_pgno = txn->mt_next_pgno; + parent->mt_flags = txn->mt_flags; + + /* Merge our cursors into parent's and close them */ + mdb_cursors_close(txn, 1); + + /* Update parent's DB table. */ + memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); + parent->mt_numdbs = txn->mt_numdbs; + parent->mt_dbflags[0] = txn->mt_dbflags[0]; + parent->mt_dbflags[1] = txn->mt_dbflags[1]; + for (i=2; i<txn->mt_numdbs; i++) { + /* preserve parent's DB_NEW status */ + x = parent->mt_dbflags[i] & DB_NEW; + parent->mt_dbflags[i] = txn->mt_dbflags[i] | x; + } + + dst = parent->mt_u.dirty_list; + src = txn->mt_u.dirty_list; + /* Remove anything in our dirty list from parent's spill list */ + if ((pspill = parent->mt_spill_pgs) && (ps_len = pspill[0])) { + x = y = ps_len; + pspill[0] = (pgno_t)-1; + /* Mark our dirty pages as deleted in parent spill list */ + for (i=0, len=src[0].mid; ++i <= len; ) { + MDB_ID pn = src[i].mid << 1; + while (pn > pspill[x]) + x--; + if (pn == pspill[x]) { + pspill[x] = 1; + y = --x; + } + } + /* Squash deleted pagenums if we deleted any */ + for (x=y; ++x <= ps_len; ) + if (!(pspill[x] & 1)) + pspill[++y] = pspill[x]; + pspill[0] = y; + } + + /* Find len = length of merging our dirty list with parent's */ + x = dst[0].mid; + dst[0].mid = 0; /* simplify loops */ + if (parent->mt_parent) { + len = x + src[0].mid; + y = mdb_mid2l_search(src, dst[x].mid + 1) - 1; + for (i = x; y && i; y--) { + pgno_t yp = src[y].mid; + while (yp < dst[i].mid) + i--; + if (yp == dst[i].mid) { + i--; + len--; + } + } + } else { /* Simplify the above for single-ancestor case */ + len = MDB_IDL_UM_MAX - txn->mt_dirty_room; + } + /* Merge our dirty list with parent's */ + y = src[0].mid; + for (i = len; y; dst[i--] = src[y--]) { + pgno_t yp = src[y].mid; + while (yp < dst[x].mid) + dst[i--] = dst[x--]; + if (yp == dst[x].mid) + free(dst[x--].mptr); + } + mdb_tassert(txn, i == x); + dst[0].mid = len; + free(txn->mt_u.dirty_list); + parent->mt_dirty_room = txn->mt_dirty_room; + if (txn->mt_spill_pgs) { + if (parent->mt_spill_pgs) { + /* TODO: Prevent failure here, so parent does not fail */ + rc = mdb_midl_append_list(&parent->mt_spill_pgs, txn->mt_spill_pgs); + if (rc) + parent->mt_flags |= MDB_TXN_ERROR; + mdb_midl_free(txn->mt_spill_pgs); + mdb_midl_sort(parent->mt_spill_pgs); + } else { + parent->mt_spill_pgs = txn->mt_spill_pgs; + } + } + + /* Append our loose page list to parent's */ + for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(lp)) + ; + *lp = txn->mt_loose_pgs; + parent->mt_loose_count += txn->mt_loose_count; + + parent->mt_child = NULL; + mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead); + free(txn); + return rc; + } + + if (txn != env->me_txn) { + DPUTS("attempt to commit unknown transaction"); + rc = EINVAL; + goto fail; + } + + mdb_cursors_close(txn, 0); + + if (!txn->mt_u.dirty_list[0].mid && + !(txn->mt_flags & (MDB_TXN_DIRTY|MDB_TXN_SPILLS))) + goto done; + + DPRINTF(("committing txn %"Z"u %p on mdbenv %p, root page %"Z"u", + txn->mt_txnid, (void*)txn, (void*)env, txn->mt_dbs[MAIN_DBI].md_root)); + + /* Update DB root pointers */ + if (txn->mt_numdbs > 2) { + MDB_cursor mc; + MDB_dbi i; + MDB_val data; + data.mv_size = sizeof(MDB_db); + + mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); + for (i = 2; i < txn->mt_numdbs; i++) { + if (txn->mt_dbflags[i] & DB_DIRTY) { + if (TXN_DBI_CHANGED(txn, i)) { + rc = MDB_BAD_DBI; + goto fail; + } + data.mv_data = &txn->mt_dbs[i]; + rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0); + if (rc) + goto fail; + } + } + } + + rc = mdb_freelist_save(txn); + if (rc) + goto fail; + + mdb_midl_free(env->me_pghead); + env->me_pghead = NULL; + if (mdb_midl_shrink(&txn->mt_free_pgs)) + env->me_free_pgs = txn->mt_free_pgs; + +#if (MDB_DEBUG) > 2 + mdb_audit(txn); +#endif + + if ((rc = mdb_page_flush(txn, 0)) || + (rc = mdb_env_sync(env, 0)) || + (rc = mdb_env_write_meta(txn))) + goto fail; + + /* Free P_LOOSE pages left behind in dirty_list */ + if (!(env->me_flags & MDB_WRITEMAP)) + mdb_dlist_free(txn); + +done: + env->me_pglast = 0; + env->me_txn = NULL; + mdb_dbis_update(txn, 1); + + if (env->me_txns) + UNLOCK_MUTEX(MDB_MUTEX(env, w)); + if (txn != env->me_txn0) + free(txn); + + return MDB_SUCCESS; + +fail: + mdb_txn_abort(txn); + return rc; +} + +/** Read the environment parameters of a DB environment before + * mapping it into memory. + * @param[in] env the environment handle + * @param[out] meta address of where to store the meta information + * @return 0 on success, non-zero on failure. + */ +static int ESECT +mdb_env_read_header(MDB_env *env, MDB_meta *meta) +{ + MDB_metabuf pbuf; + MDB_page *p; + MDB_meta *m; + int i, rc, off; + enum { Size = sizeof(pbuf) }; + + /* We don't know the page size yet, so use a minimum value. + * Read both meta pages so we can use the latest one. + */ + + for (i=off=0; i<2; i++, off = meta->mm_psize) { +#ifdef _WIN32 + DWORD len; + OVERLAPPED ov; + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + rc = ReadFile(env->me_fd, &pbuf, Size, &len, &ov) ? (int)len : -1; + if (rc == -1 && ErrCode() == ERROR_HANDLE_EOF) + rc = 0; +#else + rc = pread(env->me_fd, &pbuf, Size, off); +#endif + if (rc != Size) { + if (rc == 0 && off == 0) + return ENOENT; + rc = rc < 0 ? (int) ErrCode() : MDB_INVALID; + DPRINTF(("read: %s", mdb_strerror(rc))); + return rc; + } + + p = (MDB_page *)&pbuf; + + if (!F_ISSET(p->mp_flags, P_META)) { + DPRINTF(("page %"Z"u not a meta page", p->mp_pgno)); + return MDB_INVALID; + } + + m = METADATA(p); + if (m->mm_magic != MDB_MAGIC) { + DPUTS("meta has invalid magic"); + return MDB_INVALID; + } + + if (m->mm_version != MDB_DATA_VERSION) { + DPRINTF(("database is version %u, expected version %u", + m->mm_version, MDB_DATA_VERSION)); + return MDB_VERSION_MISMATCH; + } + + if (off == 0 || m->mm_txnid > meta->mm_txnid) + *meta = *m; + } + return 0; +} + +/** Fill in most of the zeroed #MDB_meta for an empty database environment */ +static void ESECT +mdb_env_init_meta0(MDB_env *env, MDB_meta *meta) +{ + meta->mm_magic = MDB_MAGIC; + meta->mm_version = MDB_DATA_VERSION; + meta->mm_mapsize = env->me_mapsize; + meta->mm_psize = env->me_psize; + meta->mm_last_pg = 1; + meta->mm_flags = env->me_flags & 0xffff; + meta->mm_flags |= MDB_INTEGERKEY; + meta->mm_dbs[0].md_root = P_INVALID; + meta->mm_dbs[1].md_root = P_INVALID; +} + +/** Write the environment parameters of a freshly created DB environment. + * @param[in] env the environment handle + * @param[in] meta the #MDB_meta to write + * @return 0 on success, non-zero on failure. + */ +static int ESECT +mdb_env_init_meta(MDB_env *env, MDB_meta *meta) +{ + MDB_page *p, *q; + int rc; + unsigned int psize; +#ifdef _WIN32 + DWORD len; + OVERLAPPED ov; + memset(&ov, 0, sizeof(ov)); +#define DO_PWRITE(rc, fd, ptr, size, len, pos) do { \ + ov.Offset = pos; \ + rc = WriteFile(fd, ptr, size, &len, &ov); } while(0) +#else + int len; +#define DO_PWRITE(rc, fd, ptr, size, len, pos) do { \ + len = pwrite(fd, ptr, size, pos); \ + rc = (len >= 0); } while(0) +#endif + + DPUTS("writing new meta page"); + + psize = env->me_psize; + + p = calloc(2, psize); + p->mp_pgno = 0; + p->mp_flags = P_META; + *(MDB_meta *)METADATA(p) = *meta; + + q = (MDB_page *)((char *)p + psize); + q->mp_pgno = 1; + q->mp_flags = P_META; + *(MDB_meta *)METADATA(q) = *meta; + + DO_PWRITE(rc, env->me_fd, p, psize * 2, len, 0); + if (!rc) + rc = ErrCode(); + else if ((unsigned) len == psize * 2) + rc = MDB_SUCCESS; + else + rc = ENOSPC; + free(p); + return rc; +} + +/** Update the environment info to commit a transaction. + * @param[in] txn the transaction that's being committed + * @return 0 on success, non-zero on failure. + */ +static int +mdb_env_write_meta(MDB_txn *txn) +{ + MDB_env *env; + MDB_meta meta, metab, *mp; + size_t mapsize; + off_t off; + int rc, len, toggle; + char *ptr; + HANDLE mfd; +#ifdef _WIN32 + OVERLAPPED ov; +#else + int r2; +#endif + + toggle = txn->mt_txnid & 1; + DPRINTF(("writing meta page %d for root page %"Z"u", + toggle, txn->mt_dbs[MAIN_DBI].md_root)); + + env = txn->mt_env; + mp = env->me_metas[toggle]; + mapsize = env->me_metas[toggle ^ 1]->mm_mapsize; + /* Persist any increases of mapsize config */ + if (mapsize < env->me_mapsize) + mapsize = env->me_mapsize; + + if (env->me_flags & MDB_WRITEMAP) { + mp->mm_mapsize = mapsize; + mp->mm_dbs[0] = txn->mt_dbs[0]; + mp->mm_dbs[1] = txn->mt_dbs[1]; + mp->mm_last_pg = txn->mt_next_pgno - 1; +#if !(defined(_MSC_VER) || defined(__i386__) || defined(__x86_64__)) + /* LY: issue a memory barrier, if not x86. ITS#7969 */ + __sync_synchronize(); +#endif + mp->mm_txnid = txn->mt_txnid; + if (!(env->me_flags & (MDB_NOMETASYNC|MDB_NOSYNC))) { + unsigned meta_size = env->me_psize; + rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC; + ptr = env->me_map; + if (toggle) { +#ifndef _WIN32 /* POSIX msync() requires ptr = start of OS page */ + if (meta_size < env->me_os_psize) + meta_size += meta_size; + else +#endif + ptr += meta_size; + } + if (MDB_MSYNC(ptr, meta_size, rc)) { + rc = ErrCode(); + goto fail; + } + } + goto done; + } + metab.mm_txnid = env->me_metas[toggle]->mm_txnid; + metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg; + + meta.mm_mapsize = mapsize; + meta.mm_dbs[0] = txn->mt_dbs[0]; + meta.mm_dbs[1] = txn->mt_dbs[1]; + meta.mm_last_pg = txn->mt_next_pgno - 1; + meta.mm_txnid = txn->mt_txnid; + + off = offsetof(MDB_meta, mm_mapsize); + ptr = (char *)&meta + off; + len = sizeof(MDB_meta) - off; + if (toggle) + off += env->me_psize; + off += PAGEHDRSZ; + + /* Write to the SYNC fd */ + mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ? + env->me_fd : env->me_mfd; +#ifdef _WIN32 + { + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + if (!WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov)) + rc = -1; + } +#else + rc = pwrite(mfd, ptr, len, off); +#endif + if (rc != len) { + rc = rc < 0 ? ErrCode() : EIO; + DPUTS("write failed, disk error?"); + /* On a failure, the pagecache still contains the new data. + * Write some old data back, to prevent it from being used. + * Use the non-SYNC fd; we know it will fail anyway. + */ + meta.mm_last_pg = metab.mm_last_pg; + meta.mm_txnid = metab.mm_txnid; +#ifdef _WIN32 + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + WriteFile(env->me_fd, ptr, len, NULL, &ov); +#else + r2 = pwrite(env->me_fd, ptr, len, off); + (void)r2; /* Silence warnings. We don't care about pwrite's return value */ +#endif +fail: + env->me_flags |= MDB_FATAL_ERROR; + return rc; + } + /* MIPS has cache coherency issues, this is a no-op everywhere else */ + CACHEFLUSH(env->me_map + off, len, DCACHE); +done: + /* Memory ordering issues are irrelevant; since the entire writer + * is wrapped by wmutex, all of these changes will become visible + * after the wmutex is unlocked. Since the DB is multi-version, + * readers will get consistent data regardless of how fresh or + * how stale their view of these values is. + */ + if (env->me_txns) + env->me_txns->mti_txnid = txn->mt_txnid; + + return MDB_SUCCESS; +} + +/** Check both meta pages to see which one is newer. + * @param[in] env the environment handle + * @return meta toggle (0 or 1). + */ +static int +mdb_env_pick_meta(const MDB_env *env) +{ + return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid); +} + +int ESECT +mdb_env_create(MDB_env **env) +{ + MDB_env *e; + + e = calloc(1, sizeof(MDB_env)); + if (!e) + return ENOMEM; + + e->me_maxreaders = DEFAULT_READERS; + e->me_maxdbs = e->me_numdbs = 2; + e->me_fd = INVALID_HANDLE_VALUE; + e->me_lfd = INVALID_HANDLE_VALUE; + e->me_mfd = INVALID_HANDLE_VALUE; +#ifdef MDB_USE_SYSV_SEM + e->me_rmutex.semid = -1; + e->me_wmutex.semid = -1; +#endif + e->me_pid = getpid(); + GET_PAGESIZE(e->me_os_psize); + VGMEMP_CREATE(e,0,0); + *env = e; + return MDB_SUCCESS; +} + +static int ESECT +mdb_env_map(MDB_env *env, void *addr) +{ + MDB_page *p; + unsigned int flags = env->me_flags; +#ifdef _WIN32 + int rc; + HANDLE mh; + LONG sizelo, sizehi; + size_t msize; + + if (flags & MDB_RDONLY) { + /* Don't set explicit map size, use whatever exists */ + msize = 0; + sizelo = 0; + sizehi = 0; + } else { + msize = env->me_mapsize; + sizelo = msize & 0xffffffff; + sizehi = msize >> 16 >> 16; /* only needed on Win64 */ + + /* Windows won't create mappings for zero length files. + * and won't map more than the file size. + * Just set the maxsize right now. + */ + if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo + || !SetEndOfFile(env->me_fd) + || SetFilePointer(env->me_fd, 0, NULL, 0) != 0) + return ErrCode(); + } + + mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ? + PAGE_READWRITE : PAGE_READONLY, + sizehi, sizelo, NULL); + if (!mh) + return ErrCode(); + env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ? + FILE_MAP_WRITE : FILE_MAP_READ, + 0, 0, msize, addr); + rc = env->me_map ? 0 : ErrCode(); + CloseHandle(mh); + if (rc) + return rc; +#else + int prot = PROT_READ; + if (flags & MDB_WRITEMAP) { + prot |= PROT_WRITE; + if (ftruncate(env->me_fd, env->me_mapsize) < 0) + return ErrCode(); + } + env->me_map = mmap(addr, env->me_mapsize, prot, MAP_SHARED, + env->me_fd, 0); + if (env->me_map == MAP_FAILED) { + env->me_map = NULL; + return ErrCode(); + } + + if (flags & MDB_NORDAHEAD) { + /* Turn off readahead. It's harmful when the DB is larger than RAM. */ +#ifdef MADV_RANDOM + madvise(env->me_map, env->me_mapsize, MADV_RANDOM); +#else +#ifdef POSIX_MADV_RANDOM + posix_madvise(env->me_map, env->me_mapsize, POSIX_MADV_RANDOM); +#endif /* POSIX_MADV_RANDOM */ +#endif /* MADV_RANDOM */ + } +#endif /* _WIN32 */ + + /* Can happen because the address argument to mmap() is just a + * hint. mmap() can pick another, e.g. if the range is in use. + * The MAP_FIXED flag would prevent that, but then mmap could + * instead unmap existing pages to make room for the new map. + */ + if (addr && env->me_map != addr) + return EBUSY; /* TODO: Make a new MDB_* error code? */ + + p = (MDB_page *)env->me_map; + env->me_metas[0] = METADATA(p); + env->me_metas[1] = (MDB_meta *)((char *)env->me_metas[0] + env->me_psize); + + return MDB_SUCCESS; +} + +int ESECT +mdb_env_set_mapsize(MDB_env *env, size_t size) +{ + /* If env is already open, caller is responsible for making + * sure there are no active txns. + */ + if (env->me_map) { + int rc; + MDB_meta *meta; + void *old; + if (env->me_txn) + return EINVAL; + meta = env->me_metas[mdb_env_pick_meta(env)]; + if (!size) + size = meta->mm_mapsize; + { + /* Silently round up to minimum if the size is too small */ + size_t minsize = (meta->mm_last_pg + 1) * env->me_psize; + if (size < minsize) + size = minsize; + } + munmap(env->me_map, env->me_mapsize); + env->me_mapsize = size; + old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL; + rc = mdb_env_map(env, old); + if (rc) + return rc; + } + env->me_mapsize = size; + if (env->me_psize) + env->me_maxpg = env->me_mapsize / env->me_psize; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs) +{ + if (env->me_map) + return EINVAL; + env->me_maxdbs = dbs + 2; /* Named databases + main and free DB */ + return MDB_SUCCESS; +} + +int ESECT +mdb_env_set_maxreaders(MDB_env *env, unsigned int readers) +{ + if (env->me_map || readers < 1) + return EINVAL; + env->me_maxreaders = readers; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) +{ + if (!env || !readers) + return EINVAL; + *readers = env->me_maxreaders; + return MDB_SUCCESS; +} + +static int ESECT +mdb_fsize(HANDLE fd, size_t *size) +{ +#ifdef _WIN32 + LARGE_INTEGER fsize; + + if (!GetFileSizeEx(fd, &fsize)) + return ErrCode(); + + *size = fsize.QuadPart; +#else + struct stat st; + + if (fstat(fd, &st)) + return ErrCode(); + + *size = st.st_size; +#endif + return MDB_SUCCESS; +} + +#ifdef BROKEN_FDATASYNC +#include <sys/utsname.h> +#include <sys/vfs.h> +#endif + +/** Further setup required for opening an LMDB environment + */ +static int ESECT +mdb_env_open2(MDB_env *env) +{ + unsigned int flags = env->me_flags; + int i, newenv = 0, rc; + MDB_meta meta; + +#ifdef _WIN32 + /* See if we should use QueryLimited */ + rc = GetVersion(); + if ((rc & 0xff) > 5) + env->me_pidquery = MDB_PROCESS_QUERY_LIMITED_INFORMATION; + else + env->me_pidquery = PROCESS_QUERY_INFORMATION; +#endif /* _WIN32 */ + +#ifdef BROKEN_FDATASYNC + /* ext3/ext4 fdatasync is broken on some older Linux kernels. + * https://lkml.org/lkml/2012/9/3/83 + * Kernels after 3.6-rc6 are known good. + * https://lkml.org/lkml/2012/9/10/556 + * See if the DB is on ext3/ext4, then check for new enough kernel + * Kernels 2.6.32.60, 2.6.34.15, 3.2.30, and 3.5.4 are also known + * to be patched. + */ + { + struct statfs st; + fstatfs(env->me_fd, &st); + while (st.f_type == 0xEF53) { + struct utsname uts; + int i; + uname(&uts); + if (uts.release[0] < '3') { + if (!strncmp(uts.release, "2.6.32.", 7)) { + i = atoi(uts.release+7); + if (i >= 60) + break; /* 2.6.32.60 and newer is OK */ + } else if (!strncmp(uts.release, "2.6.34.", 7)) { + i = atoi(uts.release+7); + if (i >= 15) + break; /* 2.6.34.15 and newer is OK */ + } + } else if (uts.release[0] == '3') { + i = atoi(uts.release+2); + if (i > 5) + break; /* 3.6 and newer is OK */ + if (i == 5) { + i = atoi(uts.release+4); + if (i >= 4) + break; /* 3.5.4 and newer is OK */ + } else if (i == 2) { + i = atoi(uts.release+4); + if (i >= 30) + break; /* 3.2.30 and newer is OK */ + } + } else { /* 4.x and newer is OK */ + break; + } + env->me_flags |= MDB_FSYNCONLY; + break; + } + } +#endif + + if ((i = mdb_env_read_header(env, &meta)) != 0) { + if (i != ENOENT) + return i; + DPUTS("new mdbenv"); + newenv = 1; + env->me_psize = env->me_os_psize; + if (env->me_psize > MAX_PAGESIZE) + env->me_psize = MAX_PAGESIZE; + memset(&meta, 0, sizeof(meta)); + mdb_env_init_meta0(env, &meta); + meta.mm_mapsize = DEFAULT_MAPSIZE; + } else { + env->me_psize = meta.mm_psize; + } + + /* Was a mapsize configured? */ + if (!env->me_mapsize) { + env->me_mapsize = meta.mm_mapsize; + } + { + /* Make sure mapsize >= committed data size. Even when using + * mm_mapsize, which could be broken in old files (ITS#7789). + */ + size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize; + if (env->me_mapsize < minsize) + env->me_mapsize = minsize; + } + meta.mm_mapsize = env->me_mapsize; + + if (newenv && !(flags & MDB_FIXEDMAP)) { + /* mdb_env_map() may grow the datafile. Write the metapages + * first, so the file will be valid if initialization fails. + * Except with FIXEDMAP, since we do not yet know mm_address. + * We could fill in mm_address later, but then a different + * program might end up doing that - one with a memory layout + * and map address which does not suit the main program. + */ + rc = mdb_env_init_meta(env, &meta); + if (rc) + return rc; + newenv = 0; + } + + rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL); + if (rc) + return rc; + + if (newenv) { + if (flags & MDB_FIXEDMAP) + meta.mm_address = env->me_map; + i = mdb_env_init_meta(env, &meta); + if (i != MDB_SUCCESS) { + return i; + } + } + + env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1; + env->me_nodemax = (((env->me_psize - PAGEHDRSZ) / MDB_MINKEYS) & -2) + - sizeof(indx_t); +#if !(MDB_MAXKEYSIZE) + env->me_maxkey = env->me_nodemax - (NODESIZE + sizeof(MDB_db)); +#endif + env->me_maxpg = env->me_mapsize / env->me_psize; + +#if MDB_DEBUG + { + int toggle = mdb_env_pick_meta(env); + MDB_db *db = &env->me_metas[toggle]->mm_dbs[MAIN_DBI]; + + DPRINTF(("opened database version %u, pagesize %u", + env->me_metas[0]->mm_version, env->me_psize)); + DPRINTF(("using meta page %d", toggle)); + DPRINTF(("depth: %u", db->md_depth)); + DPRINTF(("entries: %"Z"u", db->md_entries)); + DPRINTF(("branch pages: %"Z"u", db->md_branch_pages)); + DPRINTF(("leaf pages: %"Z"u", db->md_leaf_pages)); + DPRINTF(("overflow pages: %"Z"u", db->md_overflow_pages)); + DPRINTF(("root: %"Z"u", db->md_root)); + } +#endif + + return MDB_SUCCESS; +} + + +/** Release a reader thread's slot in the reader lock table. + * This function is called automatically when a thread exits. + * @param[in] ptr This points to the slot in the reader lock table. + */ +static void +mdb_env_reader_dest(void *ptr) +{ + MDB_reader *reader = ptr; + + reader->mr_pid = 0; +} + +#ifdef _WIN32 +/** Junk for arranging thread-specific callbacks on Windows. This is + * necessarily platform and compiler-specific. Windows supports up + * to 1088 keys. Let's assume nobody opens more than 64 environments + * in a single process, for now. They can override this if needed. + */ +#ifndef MAX_TLS_KEYS +#define MAX_TLS_KEYS 64 +#endif +static pthread_key_t mdb_tls_keys[MAX_TLS_KEYS]; +static int mdb_tls_nkeys; + +static void NTAPI mdb_tls_callback(PVOID module, DWORD reason, PVOID ptr) +{ + int i; + switch(reason) { + case DLL_PROCESS_ATTACH: break; + case DLL_THREAD_ATTACH: break; + case DLL_THREAD_DETACH: + for (i=0; i<mdb_tls_nkeys; i++) { + MDB_reader *r = pthread_getspecific(mdb_tls_keys[i]); + if (r) { + mdb_env_reader_dest(r); + } + } + break; + case DLL_PROCESS_DETACH: break; + } +} +#ifdef __GNUC__ +#ifdef _WIN64 +const PIMAGE_TLS_CALLBACK mdb_tls_cbp __attribute__((section (".CRT$XLB"))) = mdb_tls_callback; +#else +PIMAGE_TLS_CALLBACK mdb_tls_cbp __attribute__((section (".CRT$XLB"))) = mdb_tls_callback; +#endif +#else +#ifdef _WIN64 +/* Force some symbol references. + * _tls_used forces the linker to create the TLS directory if not already done + * mdb_tls_cbp prevents whole-program-optimizer from dropping the symbol. + */ +#pragma comment(linker, "/INCLUDE:_tls_used") +#pragma comment(linker, "/INCLUDE:mdb_tls_cbp") +#pragma const_seg(".CRT$XLB") +extern const PIMAGE_TLS_CALLBACK mdb_tls_cbp; +const PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback; +#pragma const_seg() +#else /* _WIN32 */ +#pragma comment(linker, "/INCLUDE:__tls_used") +#pragma comment(linker, "/INCLUDE:_mdb_tls_cbp") +#pragma data_seg(".CRT$XLB") +PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback; +#pragma data_seg() +#endif /* WIN 32/64 */ +#endif /* !__GNUC__ */ +#endif + +/** Downgrade the exclusive lock on the region back to shared */ +static int ESECT +mdb_env_share_locks(MDB_env *env, int *excl) +{ + int rc = 0, toggle = mdb_env_pick_meta(env); + + env->me_txns->mti_txnid = env->me_metas[toggle]->mm_txnid; + +#ifdef _WIN32 + { + OVERLAPPED ov; + /* First acquire a shared lock. The Unlock will + * then release the existing exclusive lock. + */ + memset(&ov, 0, sizeof(ov)); + if (!LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { + rc = ErrCode(); + } else { + UnlockFile(env->me_lfd, 0, 0, 1, 0); + *excl = 0; + } + } +#else + { + struct flock lock_info; + /* The shared lock replaces the existing lock */ + memset((void *)&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_RDLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = 0; + lock_info.l_len = 1; + while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + *excl = rc ? -1 : 0; /* error may mean we lost the lock */ + } +#endif + + return rc; +} + +/** Try to get exclusive lock, otherwise shared. + * Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive. + */ +static int ESECT +mdb_env_excl_lock(MDB_env *env, int *excl) +{ + int rc = 0; +#ifdef _WIN32 + if (LockFile(env->me_lfd, 0, 0, 1, 0)) { + *excl = 1; + } else { + OVERLAPPED ov; + memset(&ov, 0, sizeof(ov)); + if (LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { + *excl = 0; + } else { + rc = ErrCode(); + } + } +#else + struct flock lock_info; + memset((void *)&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_WRLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = 0; + lock_info.l_len = 1; + while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + if (!rc) { + *excl = 1; + } else +# ifdef MDB_USE_SYSV_SEM + if (*excl < 0) /* always true when !MDB_USE_SYSV_SEM */ +# endif + { + lock_info.l_type = F_RDLCK; + while ((rc = fcntl(env->me_lfd, F_SETLKW, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + if (rc == 0) + *excl = 0; + } +#endif + return rc; +} + +#ifdef MDB_USE_HASH +/* + * hash_64 - 64 bit Fowler/Noll/Vo-0 FNV-1a hash code + * + * @(#) $Revision: 5.1 $ + * @(#) $Id: hash_64a.c,v 5.1 2009/06/30 09:01:38 chongo Exp $ + * @(#) $Source: /usr/local/src/cmd/fnv/RCS/hash_64a.c,v $ + * + * http://www.isthe.com/chongo/tech/comp/fnv/index.html + * + *** + * + * Please do not copyright this code. This code is in the public domain. + * + * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO + * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF + * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + * + * By: + * chongo <Landon Curt Noll> /\oo/\ + * http://www.isthe.com/chongo/ + * + * Share and Enjoy! :-) + */ + +typedef unsigned long long mdb_hash_t; +#define MDB_HASH_INIT ((mdb_hash_t)0xcbf29ce484222325ULL) + +/** perform a 64 bit Fowler/Noll/Vo FNV-1a hash on a buffer + * @param[in] val value to hash + * @param[in] hval initial value for hash + * @return 64 bit hash + * + * NOTE: To use the recommended 64 bit FNV-1a hash, use MDB_HASH_INIT as the + * hval arg on the first call. + */ +static mdb_hash_t +mdb_hash_val(MDB_val *val, mdb_hash_t hval) +{ + unsigned char *s = (unsigned char *)val->mv_data; /* unsigned string */ + unsigned char *end = s + val->mv_size; + /* + * FNV-1a hash each octet of the string + */ + while (s < end) { + /* xor the bottom with the current octet */ + hval ^= (mdb_hash_t)*s++; + + /* multiply by the 64 bit FNV magic prime mod 2^64 */ + hval += (hval << 1) + (hval << 4) + (hval << 5) + + (hval << 7) + (hval << 8) + (hval << 40); + } + /* return our new hash value */ + return hval; +} + +/** Hash the string and output the encoded hash. + * This uses modified RFC1924 Ascii85 encoding to accommodate systems with + * very short name limits. We don't care about the encoding being reversible, + * we just want to preserve as many bits of the input as possible in a + * small printable string. + * @param[in] str string to hash + * @param[out] encbuf an array of 11 chars to hold the hash + */ +static const char mdb_a85[]= "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; + +static void +mdb_pack85(unsigned long l, char *out) +{ + int i; + + for (i=0; i<5; i++) { + *out++ = mdb_a85[l % 85]; + l /= 85; + } +} + +static void +mdb_hash_enc(MDB_val *val, char *encbuf) +{ + mdb_hash_t h = mdb_hash_val(val, MDB_HASH_INIT); + + mdb_pack85(h, encbuf); + mdb_pack85(h>>32, encbuf+5); + encbuf[10] = '\0'; +} +#endif + +/** Open and/or initialize the lock region for the environment. + * @param[in] env The LMDB environment. + * @param[in] lpath The pathname of the file used for the lock region. + * @param[in] mode The Unix permissions for the file, if we create it. + * @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive + * @return 0 on success, non-zero on failure. + */ +static int ESECT +mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) +{ +#ifdef _WIN32 +# define MDB_ERRCODE_ROFS ERROR_WRITE_PROTECT +#else +# define MDB_ERRCODE_ROFS EROFS +#ifdef O_CLOEXEC /* Linux: Open file and set FD_CLOEXEC atomically */ +# define MDB_CLOEXEC O_CLOEXEC +#else + int fdflags; +# define MDB_CLOEXEC 0 +#endif +#endif +#ifdef MDB_USE_SYSV_SEM + int semid; + union semun semu; +#endif + int rc; + off_t size, rsize; + +#ifdef _WIN32 + env->me_lfd = CreateFile(lpath, GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); +#else + env->me_lfd = open(lpath, O_RDWR|O_CREAT|MDB_CLOEXEC, mode); +#endif + if (env->me_lfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + if (rc == MDB_ERRCODE_ROFS && (env->me_flags & MDB_RDONLY)) { + return MDB_SUCCESS; + } + goto fail_errno; + } +#if ! ((MDB_CLOEXEC) || defined(_WIN32)) + /* Lose record locks when exec*() */ + if ((fdflags = fcntl(env->me_lfd, F_GETFD) | FD_CLOEXEC) >= 0) + fcntl(env->me_lfd, F_SETFD, fdflags); +#endif + + if (!(env->me_flags & MDB_NOTLS)) { + rc = pthread_key_create(&env->me_txkey, mdb_env_reader_dest); + if (rc) + goto fail; + env->me_flags |= MDB_ENV_TXKEY; +#ifdef _WIN32 + /* Windows TLS callbacks need help finding their TLS info. */ + if (mdb_tls_nkeys >= MAX_TLS_KEYS) { + rc = MDB_TLS_FULL; + goto fail; + } + mdb_tls_keys[mdb_tls_nkeys++] = env->me_txkey; +#endif + } + + /* Try to get exclusive lock. If we succeed, then + * nobody is using the lock region and we should initialize it. + */ + if ((rc = mdb_env_excl_lock(env, excl))) goto fail; + +#ifdef _WIN32 + size = GetFileSize(env->me_lfd, NULL); +#else + size = lseek(env->me_lfd, 0, SEEK_END); + if (size == -1) goto fail_errno; +#endif + rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo); + if (size < rsize && *excl > 0) { +#ifdef _WIN32 + if (SetFilePointer(env->me_lfd, rsize, NULL, FILE_BEGIN) != (DWORD)rsize + || !SetEndOfFile(env->me_lfd)) + goto fail_errno; +#else + if (ftruncate(env->me_lfd, rsize) != 0) goto fail_errno; +#endif + } else { + rsize = size; + size = rsize - sizeof(MDB_txninfo); + env->me_maxreaders = size/sizeof(MDB_reader) + 1; + } + { +#ifdef _WIN32 + HANDLE mh; + mh = CreateFileMapping(env->me_lfd, NULL, PAGE_READWRITE, + 0, 0, NULL); + if (!mh) goto fail_errno; + env->me_txns = MapViewOfFileEx(mh, FILE_MAP_WRITE, 0, 0, rsize, NULL); + CloseHandle(mh); + if (!env->me_txns) goto fail_errno; +#else + void *m = mmap(NULL, rsize, PROT_READ|PROT_WRITE, MAP_SHARED, + env->me_lfd, 0); + if (m == MAP_FAILED) goto fail_errno; + env->me_txns = m; +#endif + } + if (*excl > 0) { +#ifdef _WIN32 + BY_HANDLE_FILE_INFORMATION stbuf; + struct { + DWORD volume; + DWORD nhigh; + DWORD nlow; + } idbuf; + MDB_val val; + char encbuf[11]; + + if (!mdb_sec_inited) { + InitializeSecurityDescriptor(&mdb_null_sd, + SECURITY_DESCRIPTOR_REVISION); + SetSecurityDescriptorDacl(&mdb_null_sd, TRUE, 0, FALSE); + mdb_all_sa.nLength = sizeof(SECURITY_ATTRIBUTES); + mdb_all_sa.bInheritHandle = FALSE; + mdb_all_sa.lpSecurityDescriptor = &mdb_null_sd; + mdb_sec_inited = 1; + } + if (!GetFileInformationByHandle(env->me_lfd, &stbuf)) goto fail_errno; + idbuf.volume = stbuf.dwVolumeSerialNumber; + idbuf.nhigh = stbuf.nFileIndexHigh; + idbuf.nlow = stbuf.nFileIndexLow; + val.mv_data = &idbuf; + val.mv_size = sizeof(idbuf); + mdb_hash_enc(&val, encbuf); + sprintf(env->me_txns->mti_rmname, "Global\\MDBr%s", encbuf); + sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", encbuf); + env->me_rmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_rmname); + if (!env->me_rmutex) goto fail_errno; + env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname); + if (!env->me_wmutex) goto fail_errno; +#elif defined(MDB_USE_SYSV_SEM) + unsigned short vals[2] = {1, 1}; + semid = semget(IPC_PRIVATE, 2, mode); + if (semid < 0) + goto fail_errno; + semu.array = vals; + if (semctl(semid, 0, SETALL, semu) < 0) + goto fail_errno; + env->me_txns->mti_semid = semid; +#else /* MDB_USE_SYSV_SEM */ + pthread_mutexattr_t mattr; + + if ((rc = pthread_mutexattr_init(&mattr)) + || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) +#ifdef MDB_ROBUST_SUPPORTED + || (rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST)) +#endif + || (rc = pthread_mutex_init(&env->me_txns->mti_rmutex, &mattr)) + || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr))) + goto fail; + pthread_mutexattr_destroy(&mattr); +#endif /* _WIN32 || MDB_USE_SYSV_SEM */ + + env->me_txns->mti_magic = MDB_MAGIC; + env->me_txns->mti_format = MDB_LOCK_FORMAT; + env->me_txns->mti_txnid = 0; + env->me_txns->mti_numreaders = 0; + + } else { +#ifdef MDB_USE_SYSV_SEM + struct semid_ds buf; +#endif + if (env->me_txns->mti_magic != MDB_MAGIC) { + DPUTS("lock region has invalid magic"); + rc = MDB_INVALID; + goto fail; + } + if (env->me_txns->mti_format != MDB_LOCK_FORMAT) { + DPRINTF(("lock region has format+version 0x%x, expected 0x%x", + env->me_txns->mti_format, MDB_LOCK_FORMAT)); + rc = MDB_VERSION_MISMATCH; + goto fail; + } + rc = ErrCode(); + if (rc && rc != EACCES && rc != EAGAIN) { + goto fail; + } +#ifdef _WIN32 + env->me_rmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname); + if (!env->me_rmutex) goto fail_errno; + env->me_wmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_wmname); + if (!env->me_wmutex) goto fail_errno; +#elif defined(MDB_USE_SYSV_SEM) + semid = env->me_txns->mti_semid; + semu.buf = &buf; + /* check for read access */ + if (semctl(semid, 0, IPC_STAT, semu) < 0) + goto fail_errno; + /* check for write access */ + if (semctl(semid, 0, IPC_SET, semu) < 0) + goto fail_errno; +#endif + } +#ifdef MDB_USE_SYSV_SEM + env->me_rmutex.semid = semid; + env->me_wmutex.semid = semid; + env->me_rmutex.semnum = 0; + env->me_wmutex.semnum = 1; + env->me_rmutex.locked = &env->me_txns->mti_rlocked; + env->me_wmutex.locked = &env->me_txns->mti_wlocked; +#endif + + return MDB_SUCCESS; + +fail_errno: + rc = ErrCode(); +fail: + return rc; +} + + /** The name of the lock file in the DB environment */ +#define LOCKNAME "/lock.mdb" + /** The name of the data file in the DB environment */ +#define DATANAME "/data.mdb" + /** The suffix of the lock file when no subdir is used */ +#define LOCKSUFF "-lock" + /** Only a subset of the @ref mdb_env flags can be changed + * at runtime. Changing other flags requires closing the + * environment and re-opening it with the new flags. + */ +#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT) +#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY| \ + MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD) + +#if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS) +# error "Persistent DB flags & env flags overlap, but both go in mm_flags" +#endif + +int ESECT +mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode) +{ + int oflags, rc, len, excl = -1; + char *lpath, *dpath; + + if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) + return EINVAL; + + len = strlen(path); + if (flags & MDB_NOSUBDIR) { + rc = len + sizeof(LOCKSUFF) + len + 1; + } else { + rc = len + sizeof(LOCKNAME) + len + sizeof(DATANAME); + } + lpath = malloc(rc); + if (!lpath) + return ENOMEM; + if (flags & MDB_NOSUBDIR) { + dpath = lpath + len + sizeof(LOCKSUFF); + sprintf(lpath, "%s" LOCKSUFF, path); + strcpy(dpath, path); + } else { + dpath = lpath + len + sizeof(LOCKNAME); + sprintf(lpath, "%s" LOCKNAME, path); + sprintf(dpath, "%s" DATANAME, path); + } + + rc = MDB_SUCCESS; + flags |= env->me_flags; + if (flags & MDB_RDONLY) { + /* silently ignore WRITEMAP when we're only getting read access */ + flags &= ~MDB_WRITEMAP; + } else { + if (!((env->me_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX)) && + (env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2))))) + rc = ENOMEM; + } + env->me_flags = flags |= MDB_ENV_ACTIVE; + if (rc) + goto leave; + + env->me_path = strdup(path); + env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx)); + env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t)); + env->me_dbiseqs = calloc(env->me_maxdbs, sizeof(unsigned int)); + if (!(env->me_dbxs && env->me_path && env->me_dbflags && env->me_dbiseqs)) { + rc = ENOMEM; + goto leave; + } + + /* For RDONLY, get lockfile after we know datafile exists */ + if (!(flags & (MDB_RDONLY|MDB_NOLOCK))) { + rc = mdb_env_setup_locks(env, lpath, mode, &excl); + if (rc) + goto leave; + } + +#ifdef _WIN32 + if (F_ISSET(flags, MDB_RDONLY)) { + oflags = GENERIC_READ; + len = OPEN_EXISTING; + } else { + oflags = GENERIC_READ|GENERIC_WRITE; + len = OPEN_ALWAYS; + } + mode = FILE_ATTRIBUTE_NORMAL; + env->me_fd = CreateFile(dpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, + NULL, len, mode, NULL); +#else + if (F_ISSET(flags, MDB_RDONLY)) + oflags = O_RDONLY; + else + oflags = O_RDWR | O_CREAT; + + env->me_fd = open(dpath, oflags, mode); +#endif + if (env->me_fd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + + if ((flags & (MDB_RDONLY|MDB_NOLOCK)) == MDB_RDONLY) { + rc = mdb_env_setup_locks(env, lpath, mode, &excl); + if (rc) + goto leave; + } + + if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) { + if (flags & (MDB_RDONLY|MDB_WRITEMAP)) { + env->me_mfd = env->me_fd; + } else { + /* Synchronous fd for meta writes. Needed even with + * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset. + */ +#ifdef _WIN32 + len = OPEN_EXISTING; + env->me_mfd = CreateFile(dpath, oflags, + FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len, + mode | FILE_FLAG_WRITE_THROUGH, NULL); +#else + oflags &= ~O_CREAT; + env->me_mfd = open(dpath, oflags | MDB_DSYNC, mode); +#endif + if (env->me_mfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + } + DPRINTF(("opened dbenv %p", (void *) env)); + if (excl > 0) { + rc = mdb_env_share_locks(env, &excl); + if (rc) + goto leave; + } + if (!((flags & MDB_RDONLY) || + (env->me_pbuf = calloc(1, env->me_psize)))) + rc = ENOMEM; + if (!(flags & MDB_RDONLY)) { + MDB_txn *txn; + int tsize = sizeof(MDB_txn), size = tsize + env->me_maxdbs * + (sizeof(MDB_db)+sizeof(MDB_cursor *)+sizeof(unsigned int)+1); + txn = calloc(1, size); + if (txn) { + txn->mt_dbs = (MDB_db *)((char *)txn + tsize); + txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs); + txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); + txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); + txn->mt_env = env; + txn->mt_dbxs = env->me_dbxs; + env->me_txn0 = txn; + } else { + rc = ENOMEM; + } + } + } + +leave: + if (rc) { + mdb_env_close0(env, excl); + } + free(lpath); + return rc; +} + +/** Destroy resources from mdb_env_open(), clear our readers & DBIs */ +static void ESECT +mdb_env_close0(MDB_env *env, int excl) +{ + int i; + + if (!(env->me_flags & MDB_ENV_ACTIVE)) + return; + + /* Doing this here since me_dbxs may not exist during mdb_env_close */ + if (env->me_dbxs) { + for (i = env->me_maxdbs; --i > MAIN_DBI; ) + free(env->me_dbxs[i].md_name.mv_data); + free(env->me_dbxs); + } + + free(env->me_pbuf); + free(env->me_dbiseqs); + free(env->me_dbflags); + free(env->me_path); + free(env->me_dirty_list); + free(env->me_txn0); + mdb_midl_free(env->me_free_pgs); + + if (env->me_flags & MDB_ENV_TXKEY) { + pthread_key_delete(env->me_txkey); +#ifdef _WIN32 + /* Delete our key from the global list */ + for (i=0; i<mdb_tls_nkeys; i++) + if (mdb_tls_keys[i] == env->me_txkey) { + mdb_tls_keys[i] = mdb_tls_keys[mdb_tls_nkeys-1]; + mdb_tls_nkeys--; + break; + } +#endif + } + + if (env->me_map) { + munmap(env->me_map, env->me_mapsize); + } + if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) + (void) close(env->me_mfd); + if (env->me_fd != INVALID_HANDLE_VALUE) + (void) close(env->me_fd); + if (env->me_txns) { + MDB_PID_T pid = env->me_pid; + /* Clearing readers is done in this function because + * me_txkey with its destructor must be disabled first. + */ + for (i = env->me_numreaders; --i >= 0; ) + if (env->me_txns->mti_readers[i].mr_pid == pid) + env->me_txns->mti_readers[i].mr_pid = 0; +#ifdef _WIN32 + if (env->me_rmutex) { + CloseHandle(env->me_rmutex); + if (env->me_wmutex) CloseHandle(env->me_wmutex); + } + /* Windows automatically destroys the mutexes when + * the last handle closes. + */ +#elif defined(MDB_USE_SYSV_SEM) + if (env->me_rmutex.semid != -1) { + /* If we have the filelock: If we are the + * only remaining user, clean up semaphores. + */ + if (excl == 0) + mdb_env_excl_lock(env, &excl); + if (excl > 0) + semctl(env->me_rmutex.semid, 0, IPC_RMID); + } +#endif + munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); + } + if (env->me_lfd != INVALID_HANDLE_VALUE) { +#ifdef _WIN32 + if (excl >= 0) { + /* Unlock the lockfile. Windows would have unlocked it + * after closing anyway, but not necessarily at once. + */ + UnlockFile(env->me_lfd, 0, 0, 1, 0); + } +#endif + (void) close(env->me_lfd); + } + + env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); +} + +void ESECT +mdb_env_close(MDB_env *env) +{ + MDB_page *dp; + + if (env == NULL) + return; + + VGMEMP_DESTROY(env); + while ((dp = env->me_dpages) != NULL) { + VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next)); + env->me_dpages = dp->mp_next; + free(dp); + } + + mdb_env_close0(env, 0); + free(env); +} + +/** Compare two items pointing at aligned size_t's */ +static int +mdb_cmp_long(const MDB_val *a, const MDB_val *b) +{ + return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 : + *(size_t *)a->mv_data > *(size_t *)b->mv_data; +} + +/** Compare two items pointing at aligned unsigned int's */ +static int +mdb_cmp_int(const MDB_val *a, const MDB_val *b) +{ + return (*(unsigned int *)a->mv_data < *(unsigned int *)b->mv_data) ? -1 : + *(unsigned int *)a->mv_data > *(unsigned int *)b->mv_data; +} + +/** Compare two items pointing at unsigned ints of unknown alignment. + * Nodes and keys are guaranteed to be 2-byte aligned. + */ +static int +mdb_cmp_cint(const MDB_val *a, const MDB_val *b) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned short *u, *c; + int x; + + u = (unsigned short *) ((char *) a->mv_data + a->mv_size); + c = (unsigned short *) ((char *) b->mv_data + a->mv_size); + do { + x = *--u - *--c; + } while(!x && u > (unsigned short *)a->mv_data); + return x; +#else + unsigned short *u, *c, *end; + int x; + + end = (unsigned short *) ((char *) a->mv_data + a->mv_size); + u = (unsigned short *)a->mv_data; + c = (unsigned short *)b->mv_data; + do { + x = *u++ - *c++; + } while(!x && u < end); + return x; +#endif +} + +/** Compare two items pointing at size_t's of unknown alignment. */ +#ifdef MISALIGNED_OK +# define mdb_cmp_clong mdb_cmp_long +#else +# define mdb_cmp_clong mdb_cmp_cint +#endif + +/** Compare two items lexically */ +static int +mdb_cmp_memn(const MDB_val *a, const MDB_val *b) +{ + int diff; + ssize_t len_diff; + unsigned int len; + + len = a->mv_size; + len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; + if (len_diff > 0) { + len = b->mv_size; + len_diff = 1; + } + + diff = memcmp(a->mv_data, b->mv_data, len); + return diff ? diff : len_diff<0 ? -1 : len_diff; +} + +/** Compare two items in reverse byte order */ +static int +mdb_cmp_memnr(const MDB_val *a, const MDB_val *b) +{ + const unsigned char *p1, *p2, *p1_lim; + ssize_t len_diff; + int diff; + + p1_lim = (const unsigned char *)a->mv_data; + p1 = (const unsigned char *)a->mv_data + a->mv_size; + p2 = (const unsigned char *)b->mv_data + b->mv_size; + + len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; + if (len_diff > 0) { + p1_lim += len_diff; + len_diff = 1; + } + + while (p1 > p1_lim) { + diff = *--p1 - *--p2; + if (diff) + return diff; + } + return len_diff<0 ? -1 : len_diff; +} + +/** Search for key within a page, using binary search. + * Returns the smallest entry larger or equal to the key. + * If exactp is non-null, stores whether the found entry was an exact match + * in *exactp (1 or 0). + * Updates the cursor index with the index of the found entry. + * If no entry larger or equal to the key is found, returns NULL. + */ +static MDB_node * +mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp) +{ + unsigned int i = 0, nkeys; + int low, high; + int rc = 0; + MDB_page *mp = mc->mc_pg[mc->mc_top]; + MDB_node *node = NULL; + MDB_val nodekey; + MDB_cmp_func *cmp; + DKBUF; + + nkeys = NUMKEYS(mp); + + DPRINTF(("searching %u keys in %s %spage %"Z"u", + nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "", + mdb_dbg_pgno(mp))); + + low = IS_LEAF(mp) ? 0 : 1; + high = nkeys - 1; + cmp = mc->mc_dbx->md_cmp; + + /* Branch pages have no data, so if using integer keys, + * alignment is guaranteed. Use faster mdb_cmp_int. + */ + if (cmp == mdb_cmp_cint && IS_BRANCH(mp)) { + if (NODEPTR(mp, 1)->mn_ksize == sizeof(size_t)) + cmp = mdb_cmp_long; + else + cmp = mdb_cmp_int; + } + + if (IS_LEAF2(mp)) { + nodekey.mv_size = mc->mc_db->md_pad; + node = NODEPTR(mp, 0); /* fake */ + while (low <= high) { + i = (low + high) >> 1; + nodekey.mv_data = LEAF2KEY(mp, i, nodekey.mv_size); + rc = cmp(key, &nodekey); + DPRINTF(("found leaf index %u [%s], rc = %i", + i, DKEY(&nodekey), rc)); + if (rc == 0) + break; + if (rc > 0) + low = i + 1; + else + high = i - 1; + } + } else { + while (low <= high) { + i = (low + high) >> 1; + + node = NODEPTR(mp, i); + nodekey.mv_size = NODEKSZ(node); + nodekey.mv_data = NODEKEY(node); + + rc = cmp(key, &nodekey); +#if MDB_DEBUG + if (IS_LEAF(mp)) + DPRINTF(("found leaf index %u [%s], rc = %i", + i, DKEY(&nodekey), rc)); + else + DPRINTF(("found branch index %u [%s -> %"Z"u], rc = %i", + i, DKEY(&nodekey), NODEPGNO(node), rc)); +#endif + if (rc == 0) + break; + if (rc > 0) + low = i + 1; + else + high = i - 1; + } + } + + if (rc > 0) { /* Found entry is less than the key. */ + i++; /* Skip to get the smallest entry larger than key. */ + if (!IS_LEAF2(mp)) + node = NODEPTR(mp, i); + } + if (exactp) + *exactp = (rc == 0 && nkeys > 0); + /* store the key index */ + mc->mc_ki[mc->mc_top] = i; + if (i >= nkeys) + /* There is no entry larger or equal to the key. */ + return NULL; + + /* nodeptr is fake for LEAF2 */ + return node; +} + +#if 0 +static void +mdb_cursor_adjust(MDB_cursor *mc, func) +{ + MDB_cursor *m2; + + for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { + if (m2->mc_pg[m2->mc_top] == mc->mc_pg[mc->mc_top]) { + func(mc, m2); + } + } +} +#endif + +/** Pop a page off the top of the cursor's stack. */ +static void +mdb_cursor_pop(MDB_cursor *mc) +{ + if (mc->mc_snum) { +#if MDB_DEBUG + MDB_page *top = mc->mc_pg[mc->mc_top]; +#endif + mc->mc_snum--; + if (mc->mc_snum) + mc->mc_top--; + + DPRINTF(("popped page %"Z"u off db %d cursor %p", top->mp_pgno, + DDBI(mc), (void *) mc)); + } +} + +/** Push a page onto the top of the cursor's stack. */ +static int +mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) +{ + DPRINTF(("pushing page %"Z"u on db %d cursor %p", mp->mp_pgno, + DDBI(mc), (void *) mc)); + + if (mc->mc_snum >= CURSOR_STACK) { + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CURSOR_FULL; + } + + mc->mc_top = mc->mc_snum++; + mc->mc_pg[mc->mc_top] = mp; + mc->mc_ki[mc->mc_top] = 0; + + return MDB_SUCCESS; +} + +/** Find the address of the page corresponding to a given page number. + * @param[in] txn the transaction for this access. + * @param[in] pgno the page number for the page to retrieve. + * @param[out] ret address of a pointer where the page's address will be stored. + * @param[out] lvl dirty_list inheritance level of found page. 1=current txn, 0=mapped page. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) +{ + MDB_env *env = txn->mt_env; + MDB_page *p = NULL; + int level; + + if (!((txn->mt_flags & MDB_TXN_RDONLY) | (env->me_flags & MDB_WRITEMAP))) { + MDB_txn *tx2 = txn; + level = 1; + do { + MDB_ID2L dl = tx2->mt_u.dirty_list; + unsigned x; + /* Spilled pages were dirtied in this txn and flushed + * because the dirty list got full. Bring this page + * back in from the map (but don't unspill it here, + * leave that unless page_touch happens again). + */ + if (tx2->mt_spill_pgs) { + MDB_ID pn = pgno << 1; + x = mdb_midl_search(tx2->mt_spill_pgs, pn); + if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) { + p = (MDB_page *)(env->me_map + env->me_psize * pgno); + goto done; + } + } + if (dl[0].mid) { + unsigned x = mdb_mid2l_search(dl, pgno); + if (x <= dl[0].mid && dl[x].mid == pgno) { + p = dl[x].mptr; + goto done; + } + } + level++; + } while ((tx2 = tx2->mt_parent) != NULL); + } + + if (pgno < txn->mt_next_pgno) { + level = 0; + p = (MDB_page *)(env->me_map + env->me_psize * pgno); + } else { + DPRINTF(("page %"Z"u not found", pgno)); + txn->mt_flags |= MDB_TXN_ERROR; + return MDB_PAGE_NOTFOUND; + } + +done: + *ret = p; + if (lvl) + *lvl = level; + return MDB_SUCCESS; +} + +/** Finish #mdb_page_search() / #mdb_page_search_lowest(). + * The cursor is at the root page, set up the rest of it. + */ +static int +mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top]; + int rc; + DKBUF; + + while (IS_BRANCH(mp)) { + MDB_node *node; + indx_t i; + + DPRINTF(("branch page %"Z"u has %u keys", mp->mp_pgno, NUMKEYS(mp))); + mdb_cassert(mc, NUMKEYS(mp) > 1); + DPRINTF(("found index 0 to page %"Z"u", NODEPGNO(NODEPTR(mp, 0)))); + + if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) { + i = 0; + if (flags & MDB_PS_LAST) + i = NUMKEYS(mp) - 1; + } else { + int exact; + node = mdb_node_search(mc, key, &exact); + if (node == NULL) + i = NUMKEYS(mp) - 1; + else { + i = mc->mc_ki[mc->mc_top]; + if (!exact) { + mdb_cassert(mc, i > 0); + i--; + } + } + DPRINTF(("following index %u for key [%s]", i, DKEY(key))); + } + + mdb_cassert(mc, i < NUMKEYS(mp)); + node = NODEPTR(mp, i); + + if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) + return rc; + + mc->mc_ki[mc->mc_top] = i; + if ((rc = mdb_cursor_push(mc, mp))) + return rc; + + if (flags & MDB_PS_MODIFY) { + if ((rc = mdb_page_touch(mc)) != 0) + return rc; + mp = mc->mc_pg[mc->mc_top]; + } + } + + if (!IS_LEAF(mp)) { + DPRINTF(("internal error, index points to a %02X page!?", + mp->mp_flags)); + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CORRUPTED; + } + + DPRINTF(("found leaf page %"Z"u for key [%s]", mp->mp_pgno, + key ? DKEY(key) : "null")); + mc->mc_flags |= C_INITIALIZED; + mc->mc_flags &= ~C_EOF; + + return MDB_SUCCESS; +} + +/** Search for the lowest key under the current branch page. + * This just bypasses a NUMKEYS check in the current page + * before calling mdb_page_search_root(), because the callers + * are all in situations where the current page is known to + * be underfilled. + */ +static int +mdb_page_search_lowest(MDB_cursor *mc) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top]; + MDB_node *node = NODEPTR(mp, 0); + int rc; + + if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) + return rc; + + mc->mc_ki[mc->mc_top] = 0; + if ((rc = mdb_cursor_push(mc, mp))) + return rc; + return mdb_page_search_root(mc, NULL, MDB_PS_FIRST); +} + +/** Search for the page a given key should be in. + * Push it and its parent pages on the cursor stack. + * @param[in,out] mc the cursor for this operation. + * @param[in] key the key to search for, or NULL for first/last page. + * @param[in] flags If MDB_PS_MODIFY is set, visited pages in the DB + * are touched (updated with new page numbers). + * If MDB_PS_FIRST or MDB_PS_LAST is set, find first or last leaf. + * This is used by #mdb_cursor_first() and #mdb_cursor_last(). + * If MDB_PS_ROOTONLY set, just fetch root node, no further lookups. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) +{ + int rc; + pgno_t root; + + /* Make sure the txn is still viable, then find the root from + * the txn's db table and set it as the root of the cursor's stack. + */ + if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_ERROR)) { + DPUTS("transaction has failed, must abort"); + return MDB_BAD_TXN; + } else { + /* Make sure we're using an up-to-date root */ + if (*mc->mc_dbflag & DB_STALE) { + MDB_cursor mc2; + if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi)) + return MDB_BAD_DBI; + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL); + rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, 0); + if (rc) + return rc; + { + MDB_val data; + int exact = 0; + uint16_t flags; + MDB_node *leaf = mdb_node_search(&mc2, + &mc->mc_dbx->md_name, &exact); + if (!exact) + return MDB_NOTFOUND; + rc = mdb_node_read(mc->mc_txn, leaf, &data); + if (rc) + return rc; + memcpy(&flags, ((char *) data.mv_data + offsetof(MDB_db, md_flags)), + sizeof(uint16_t)); + /* The txn may not know this DBI, or another process may + * have dropped and recreated the DB with other flags. + */ + if ((mc->mc_db->md_flags & PERSISTENT_FLAGS) != flags) + return MDB_INCOMPATIBLE; + memcpy(mc->mc_db, data.mv_data, sizeof(MDB_db)); + } + *mc->mc_dbflag &= ~DB_STALE; + } + root = mc->mc_db->md_root; + + if (root == P_INVALID) { /* Tree is empty. */ + DPUTS("tree is empty"); + return MDB_NOTFOUND; + } + } + + mdb_cassert(mc, root > 1); + if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) + if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0) + return rc; + + mc->mc_snum = 1; + mc->mc_top = 0; + + DPRINTF(("db %d root page %"Z"u has flags 0x%X", + DDBI(mc), root, mc->mc_pg[0]->mp_flags)); + + if (flags & MDB_PS_MODIFY) { + if ((rc = mdb_page_touch(mc))) + return rc; + } + + if (flags & MDB_PS_ROOTONLY) + return MDB_SUCCESS; + + return mdb_page_search_root(mc, key, flags); +} + +static int +mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp) +{ + MDB_txn *txn = mc->mc_txn; + pgno_t pg = mp->mp_pgno; + unsigned x = 0, ovpages = mp->mp_pages; + MDB_env *env = txn->mt_env; + MDB_IDL sl = txn->mt_spill_pgs; + MDB_ID pn = pg << 1; + int rc; + + DPRINTF(("free ov page %"Z"u (%d)", pg, ovpages)); + /* If the page is dirty or on the spill list we just acquired it, + * so we should give it back to our current free list, if any. + * Otherwise put it onto the list of pages we freed in this txn. + * + * Won't create me_pghead: me_pglast must be inited along with it. + * Unsupported in nested txns: They would need to hide the page + * range in ancestor txns' dirty and spilled lists. + */ + if (env->me_pghead && + !txn->mt_parent && + ((mp->mp_flags & P_DIRTY) || + (sl && (x = mdb_midl_search(sl, pn)) <= sl[0] && sl[x] == pn))) + { + unsigned i, j; + pgno_t *mop; + MDB_ID2 *dl, ix, iy; + rc = mdb_midl_need(&env->me_pghead, ovpages); + if (rc) + return rc; + if (!(mp->mp_flags & P_DIRTY)) { + /* This page is no longer spilled */ + if (x == sl[0]) + sl[0]--; + else + sl[x] |= 1; + goto release; + } + /* Remove from dirty list */ + dl = txn->mt_u.dirty_list; + x = dl[0].mid--; + for (ix = dl[x]; ix.mptr != mp; ix = iy) { + if (x > 1) { + x--; + iy = dl[x]; + dl[x] = ix; + } else { + mdb_cassert(mc, x > 1); + j = ++(dl[0].mid); + dl[j] = ix; /* Unsorted. OK when MDB_TXN_ERROR. */ + txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CORRUPTED; + } + } + if (!(env->me_flags & MDB_WRITEMAP)) + mdb_dpage_free(env, mp); +release: + /* Insert in me_pghead */ + mop = env->me_pghead; + j = mop[0] + ovpages; + for (i = mop[0]; i && mop[i] < pg; i--) + mop[j--] = mop[i]; + while (j>i) + mop[j--] = pg++; + mop[0] += ovpages; + } else { + rc = mdb_midl_append_range(&txn->mt_free_pgs, pg, ovpages); + if (rc) + return rc; + } + mc->mc_db->md_overflow_pages -= ovpages; + return 0; +} + +/** Return the data associated with a given node. + * @param[in] txn The transaction for this operation. + * @param[in] leaf The node being read. + * @param[out] data Updated to point to the node's data. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data) +{ + MDB_page *omp; /* overflow page */ + pgno_t pgno; + int rc; + + if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) { + data->mv_size = NODEDSZ(leaf); + data->mv_data = NODEDATA(leaf); + return MDB_SUCCESS; + } + + /* Read overflow data. + */ + data->mv_size = NODEDSZ(leaf); + memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); + if ((rc = mdb_page_get(txn, pgno, &omp, NULL)) != 0) { + DPRINTF(("read overflow page %"Z"u failed", pgno)); + return rc; + } + data->mv_data = METADATA(omp); + + return MDB_SUCCESS; +} + +int +mdb_get(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data) +{ + MDB_cursor mc; + MDB_xcursor mx; + int exact = 0; + DKBUF; + + DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key))); + + if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + mdb_cursor_init(&mc, txn, dbi, &mx); + return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); +} + +/** Find a sibling for a page. + * Replaces the page at the top of the cursor's stack with the + * specified sibling, if one exists. + * @param[in] mc The cursor for this operation. + * @param[in] move_right Non-zero if the right sibling is requested, + * otherwise the left sibling. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_cursor_sibling(MDB_cursor *mc, int move_right) +{ + int rc; + MDB_node *indx; + MDB_page *mp; + + if (mc->mc_snum < 2) { + return MDB_NOTFOUND; /* root has no siblings */ + } + + mdb_cursor_pop(mc); + DPRINTF(("parent page is page %"Z"u, index %u", + mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top])); + + if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top])) + : (mc->mc_ki[mc->mc_top] == 0)) { + DPRINTF(("no more keys left, moving to %s sibling", + move_right ? "right" : "left")); + if ((rc = mdb_cursor_sibling(mc, move_right)) != MDB_SUCCESS) { + /* undo cursor_pop before returning */ + mc->mc_top++; + mc->mc_snum++; + return rc; + } + } else { + if (move_right) + mc->mc_ki[mc->mc_top]++; + else + mc->mc_ki[mc->mc_top]--; + DPRINTF(("just moving to %s index key %u", + move_right ? "right" : "left", mc->mc_ki[mc->mc_top])); + } + mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top])); + + indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) { + /* mc will be inconsistent if caller does mc_snum++ as above */ + mc->mc_flags &= ~(C_INITIALIZED|C_EOF); + return rc; + } + + mdb_cursor_push(mc, mp); + if (!move_right) + mc->mc_ki[mc->mc_top] = NUMKEYS(mp)-1; + + return MDB_SUCCESS; +} + +/** Move the cursor to the next data item. */ +static int +mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) +{ + MDB_page *mp; + MDB_node *leaf; + int rc; + + if (mc->mc_flags & C_EOF) { + return MDB_NOTFOUND; + } + + mdb_cassert(mc, mc->mc_flags & C_INITIALIZED); + + mp = mc->mc_pg[mc->mc_top]; + + if (mc->mc_db->md_flags & MDB_DUPSORT) { + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (op == MDB_NEXT || op == MDB_NEXT_DUP) { + rc = mdb_cursor_next(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_NEXT); + if (op != MDB_NEXT || rc != MDB_NOTFOUND) { + if (rc == MDB_SUCCESS) + MDB_GET_KEY(leaf, key); + return rc; + } + } + } else { + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + if (op == MDB_NEXT_DUP) + return MDB_NOTFOUND; + } + } + + DPRINTF(("cursor_next: top page is %"Z"u in cursor %p", + mdb_dbg_pgno(mp), (void *) mc)); + if (mc->mc_flags & C_DEL) + goto skip; + + if (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mp)) { + DPUTS("=====> move to next sibling page"); + if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS) { + mc->mc_flags |= C_EOF; + return rc; + } + mp = mc->mc_pg[mc->mc_top]; + DPRINTF(("next page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); + } else + mc->mc_ki[mc->mc_top]++; + +skip: + DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", + mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); + + if (IS_LEAF2(mp)) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + return MDB_SUCCESS; + } + + mdb_cassert(mc, IS_LEAF(mp)); + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + } + if (data) { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } + } + + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +/** Move the cursor to the previous data item. */ +static int +mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) +{ + MDB_page *mp; + MDB_node *leaf; + int rc; + + mdb_cassert(mc, mc->mc_flags & C_INITIALIZED); + + mp = mc->mc_pg[mc->mc_top]; + + if (mc->mc_db->md_flags & MDB_DUPSORT) { + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (op == MDB_PREV || op == MDB_PREV_DUP) { + rc = mdb_cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_PREV); + if (op != MDB_PREV || rc != MDB_NOTFOUND) { + if (rc == MDB_SUCCESS) { + MDB_GET_KEY(leaf, key); + mc->mc_flags &= ~C_EOF; + } + return rc; + } + } + } else { + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + if (op == MDB_PREV_DUP) + return MDB_NOTFOUND; + } + } + + DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p", + mdb_dbg_pgno(mp), (void *) mc)); + + if (mc->mc_ki[mc->mc_top] == 0) { + DPUTS("=====> move to prev sibling page"); + if ((rc = mdb_cursor_sibling(mc, 0)) != MDB_SUCCESS) { + return rc; + } + mp = mc->mc_pg[mc->mc_top]; + mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1; + DPRINTF(("prev page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); + } else + mc->mc_ki[mc->mc_top]--; + + mc->mc_flags &= ~C_EOF; + + DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", + mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); + + if (IS_LEAF2(mp)) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + return MDB_SUCCESS; + } + + mdb_cassert(mc, IS_LEAF(mp)); + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + } + if (data) { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } + } + + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +/** Set the cursor on a specific data item. */ +static int +mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, + MDB_cursor_op op, int *exactp) +{ + int rc; + MDB_page *mp; + MDB_node *leaf = NULL; + DKBUF; + + if (key->mv_size == 0) + return MDB_BAD_VALSIZE; + + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + + /* See if we're already on the right page */ + if (mc->mc_flags & C_INITIALIZED) { + MDB_val nodekey; + + mp = mc->mc_pg[mc->mc_top]; + if (!NUMKEYS(mp)) { + mc->mc_ki[mc->mc_top] = 0; + return MDB_NOTFOUND; + } + if (mp->mp_flags & P_LEAF2) { + nodekey.mv_size = mc->mc_db->md_pad; + nodekey.mv_data = LEAF2KEY(mp, 0, nodekey.mv_size); + } else { + leaf = NODEPTR(mp, 0); + MDB_GET_KEY2(leaf, nodekey); + } + rc = mc->mc_dbx->md_cmp(key, &nodekey); + if (rc == 0) { + /* Probably happens rarely, but first node on the page + * was the one we wanted. + */ + mc->mc_ki[mc->mc_top] = 0; + if (exactp) + *exactp = 1; + goto set1; + } + if (rc > 0) { + unsigned int i; + unsigned int nkeys = NUMKEYS(mp); + if (nkeys > 1) { + if (mp->mp_flags & P_LEAF2) { + nodekey.mv_data = LEAF2KEY(mp, + nkeys-1, nodekey.mv_size); + } else { + leaf = NODEPTR(mp, nkeys-1); + MDB_GET_KEY2(leaf, nodekey); + } + rc = mc->mc_dbx->md_cmp(key, &nodekey); + if (rc == 0) { + /* last node was the one we wanted */ + mc->mc_ki[mc->mc_top] = nkeys-1; + if (exactp) + *exactp = 1; + goto set1; + } + if (rc < 0) { + if (mc->mc_ki[mc->mc_top] < NUMKEYS(mp)) { + /* This is definitely the right page, skip search_page */ + if (mp->mp_flags & P_LEAF2) { + nodekey.mv_data = LEAF2KEY(mp, + mc->mc_ki[mc->mc_top], nodekey.mv_size); + } else { + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + MDB_GET_KEY2(leaf, nodekey); + } + rc = mc->mc_dbx->md_cmp(key, &nodekey); + if (rc == 0) { + /* current node was the one we wanted */ + if (exactp) + *exactp = 1; + goto set1; + } + } + rc = 0; + goto set2; + } + } + /* If any parents have right-sibs, search. + * Otherwise, there's nothing further. + */ + for (i=0; i<mc->mc_top; i++) + if (mc->mc_ki[i] < + NUMKEYS(mc->mc_pg[i])-1) + break; + if (i == mc->mc_top) { + /* There are no other pages */ + mc->mc_ki[mc->mc_top] = nkeys; + return MDB_NOTFOUND; + } + } + if (!mc->mc_top) { + /* There are no other pages */ + mc->mc_ki[mc->mc_top] = 0; + if (op == MDB_SET_RANGE && !exactp) { + rc = 0; + goto set1; + } else + return MDB_NOTFOUND; + } + } + + rc = mdb_page_search(mc, key, 0); + if (rc != MDB_SUCCESS) + return rc; + + mp = mc->mc_pg[mc->mc_top]; + mdb_cassert(mc, IS_LEAF(mp)); + +set2: + leaf = mdb_node_search(mc, key, exactp); + if (exactp != NULL && !*exactp) { + /* MDB_SET specified and not an exact match. */ + return MDB_NOTFOUND; + } + + if (leaf == NULL) { + DPUTS("===> inexact leaf not found, goto sibling"); + if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS) + return rc; /* no entries matched */ + mp = mc->mc_pg[mc->mc_top]; + mdb_cassert(mc, IS_LEAF(mp)); + leaf = NODEPTR(mp, 0); + } + +set1: + mc->mc_flags |= C_INITIALIZED; + mc->mc_flags &= ~C_EOF; + + if (IS_LEAF2(mp)) { + if (op == MDB_SET_RANGE || op == MDB_SET_KEY) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + } + return MDB_SUCCESS; + } + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + } + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + } else { + int ex2, *ex2p; + if (op == MDB_GET_BOTH) { + ex2p = &ex2; + ex2 = 0; + } else { + ex2p = NULL; + } + rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); + if (rc != MDB_SUCCESS) + return rc; + } + } else if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) { + MDB_val d2; + if ((rc = mdb_node_read(mc->mc_txn, leaf, &d2)) != MDB_SUCCESS) + return rc; + rc = mc->mc_dbx->md_dcmp(data, &d2); + if (rc) { + if (op == MDB_GET_BOTH || rc > 0) + return MDB_NOTFOUND; + rc = 0; + *data = d2; + } + + } else { + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + } + } + + /* The key already matches in all other cases */ + if (op == MDB_SET_RANGE || op == MDB_SET_KEY) + MDB_GET_KEY(leaf, key); + DPRINTF(("==> cursor placed on key [%s]", DKEY(key))); + + return rc; +} + +/** Move the cursor to the first item in the database. */ +static int +mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) +{ + int rc; + MDB_node *leaf; + + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + + if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { + rc = mdb_page_search(mc, NULL, MDB_PS_FIRST); + if (rc != MDB_SUCCESS) + return rc; + } + mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); + + leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0); + mc->mc_flags |= C_INITIALIZED; + mc->mc_flags &= ~C_EOF; + + mc->mc_ki[mc->mc_top] = 0; + + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size); + return MDB_SUCCESS; + } + + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } else { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + } + } + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +/** Move the cursor to the last item in the database. */ +static int +mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) +{ + int rc; + MDB_node *leaf; + + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + + if (!(mc->mc_flags & C_EOF)) { + + if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { + rc = mdb_page_search(mc, NULL, MDB_PS_LAST); + if (rc != MDB_SUCCESS) + return rc; + } + mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); + + } + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; + mc->mc_flags |= C_INITIALIZED|C_EOF; + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size); + return MDB_SUCCESS; + } + + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } else { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + } + } + + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +int +mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, + MDB_cursor_op op) +{ + int rc; + int exact = 0; + int (*mfunc)(MDB_cursor *mc, MDB_val *key, MDB_val *data); + + if (mc == NULL) + return EINVAL; + + if (mc->mc_txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + switch (op) { + case MDB_GET_CURRENT: + if (!(mc->mc_flags & C_INITIALIZED)) { + rc = EINVAL; + } else { + MDB_page *mp = mc->mc_pg[mc->mc_top]; + int nkeys = NUMKEYS(mp); + if (!nkeys || mc->mc_ki[mc->mc_top] >= nkeys) { + mc->mc_ki[mc->mc_top] = nkeys; + rc = MDB_NOTFOUND; + break; + } + rc = MDB_SUCCESS; + if (IS_LEAF2(mp)) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + } else { + MDB_node *leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + MDB_GET_KEY(leaf, key); + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (mc->mc_flags & C_DEL) + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_GET_CURRENT); + } else { + rc = mdb_node_read(mc->mc_txn, leaf, data); + } + } + } + } + break; + case MDB_GET_BOTH: + case MDB_GET_BOTH_RANGE: + if (data == NULL) { + rc = EINVAL; + break; + } + if (mc->mc_xcursor == NULL) { + rc = MDB_INCOMPATIBLE; + break; + } + /* FALLTHRU */ + case MDB_SET: + case MDB_SET_KEY: + case MDB_SET_RANGE: + if (key == NULL) { + rc = EINVAL; + } else { + rc = mdb_cursor_set(mc, key, data, op, + op == MDB_SET_RANGE ? NULL : &exact); + } + break; + case MDB_GET_MULTIPLE: + if (data == NULL || !(mc->mc_flags & C_INITIALIZED)) { + rc = EINVAL; + break; + } + if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) { + rc = MDB_INCOMPATIBLE; + break; + } + rc = MDB_SUCCESS; + if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) || + (mc->mc_xcursor->mx_cursor.mc_flags & C_EOF)) + break; + goto fetchm; + case MDB_NEXT_MULTIPLE: + if (data == NULL) { + rc = EINVAL; + break; + } + if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) { + rc = MDB_INCOMPATIBLE; + break; + } + if (!(mc->mc_flags & C_INITIALIZED)) + rc = mdb_cursor_first(mc, key, data); + else + rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP); + if (rc == MDB_SUCCESS) { + if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + MDB_cursor *mx; +fetchm: + mx = &mc->mc_xcursor->mx_cursor; + data->mv_size = NUMKEYS(mx->mc_pg[mx->mc_top]) * + mx->mc_db->md_pad; + data->mv_data = METADATA(mx->mc_pg[mx->mc_top]); + mx->mc_ki[mx->mc_top] = NUMKEYS(mx->mc_pg[mx->mc_top])-1; + } else { + rc = MDB_NOTFOUND; + } + } + break; + case MDB_NEXT: + case MDB_NEXT_DUP: + case MDB_NEXT_NODUP: + if (!(mc->mc_flags & C_INITIALIZED)) + rc = mdb_cursor_first(mc, key, data); + else + rc = mdb_cursor_next(mc, key, data, op); + break; + case MDB_PREV: + case MDB_PREV_DUP: + case MDB_PREV_NODUP: + if (!(mc->mc_flags & C_INITIALIZED)) { + rc = mdb_cursor_last(mc, key, data); + if (rc) + break; + mc->mc_flags |= C_INITIALIZED; + mc->mc_ki[mc->mc_top]++; + } + rc = mdb_cursor_prev(mc, key, data, op); + break; + case MDB_FIRST: + rc = mdb_cursor_first(mc, key, data); + break; + case MDB_FIRST_DUP: + mfunc = mdb_cursor_first; + mmove: + if (data == NULL || !(mc->mc_flags & C_INITIALIZED)) { + rc = EINVAL; + break; + } + if (mc->mc_xcursor == NULL) { + rc = MDB_INCOMPATIBLE; + break; + } + { + MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { + MDB_GET_KEY(leaf, key); + rc = mdb_node_read(mc->mc_txn, leaf, data); + break; + } + } + if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { + rc = EINVAL; + break; + } + rc = mfunc(&mc->mc_xcursor->mx_cursor, data, NULL); + break; + case MDB_LAST: + rc = mdb_cursor_last(mc, key, data); + break; + case MDB_LAST_DUP: + mfunc = mdb_cursor_last; + goto mmove; + default: + DPRINTF(("unhandled/unimplemented cursor operation %u", op)); + rc = EINVAL; + break; + } + + if (mc->mc_flags & C_DEL) + mc->mc_flags ^= C_DEL; + + return rc; +} + +/** Touch all the pages in the cursor stack. Set mc_top. + * Makes sure all the pages are writable, before attempting a write operation. + * @param[in] mc The cursor to operate on. + */ +static int +mdb_cursor_touch(MDB_cursor *mc) +{ + int rc = MDB_SUCCESS; + + if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) { + MDB_cursor mc2; + MDB_xcursor mcx; + if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi)) + return MDB_BAD_DBI; + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, &mcx); + rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, MDB_PS_MODIFY); + if (rc) + return rc; + *mc->mc_dbflag |= DB_DIRTY; + } + mc->mc_top = 0; + if (mc->mc_snum) { + do { + rc = mdb_page_touch(mc); + } while (!rc && ++(mc->mc_top) < mc->mc_snum); + mc->mc_top = mc->mc_snum-1; + } + return rc; +} + +/** Do not spill pages to disk if txn is getting full, may fail instead */ +#define MDB_NOSPILL 0x8000 + +int +mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, + unsigned int flags) +{ + MDB_env *env; + MDB_node *leaf = NULL; + MDB_page *fp, *mp; + uint16_t fp_flags; + MDB_val xdata, *rdata, dkey, olddata; + MDB_db dummy; + int do_sub = 0, insert_key, insert_data; + unsigned int mcount = 0, dcount = 0, nospill; + size_t nsize; + int rc, rc2; + unsigned int nflags; + DKBUF; + + if (mc == NULL || key == NULL) + return EINVAL; + + env = mc->mc_txn->mt_env; + + /* Check this first so counter will always be zero on any + * early failures. + */ + if (flags & MDB_MULTIPLE) { + dcount = data[1].mv_size; + data[1].mv_size = 0; + if (!F_ISSET(mc->mc_db->md_flags, MDB_DUPFIXED)) + return MDB_INCOMPATIBLE; + } + + nospill = flags & MDB_NOSPILL; + flags &= ~MDB_NOSPILL; + + if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) + return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + + if (key->mv_size-1 >= ENV_MAXKEY(env)) + return MDB_BAD_VALSIZE; + +#if SIZE_MAX > MAXDATASIZE + if (data->mv_size > ((mc->mc_db->md_flags & MDB_DUPSORT) ? ENV_MAXKEY(env) : MAXDATASIZE)) + return MDB_BAD_VALSIZE; +#else + if ((mc->mc_db->md_flags & MDB_DUPSORT) && data->mv_size > ENV_MAXKEY(env)) + return MDB_BAD_VALSIZE; +#endif + + DPRINTF(("==> put db %d key [%s], size %"Z"u, data size %"Z"u", + DDBI(mc), DKEY(key), key ? key->mv_size : 0, data->mv_size)); + + dkey.mv_size = 0; + + if (flags == MDB_CURRENT) { + if (!(mc->mc_flags & C_INITIALIZED)) + return EINVAL; + rc = MDB_SUCCESS; + } else if (mc->mc_db->md_root == P_INVALID) { + /* new database, cursor has nothing to point to */ + mc->mc_snum = 0; + mc->mc_top = 0; + mc->mc_flags &= ~C_INITIALIZED; + rc = MDB_NO_ROOT; + } else { + int exact = 0; + MDB_val d2; + if (flags & MDB_APPEND) { + MDB_val k2; + rc = mdb_cursor_last(mc, &k2, &d2); + if (rc == 0) { + rc = mc->mc_dbx->md_cmp(key, &k2); + if (rc > 0) { + rc = MDB_NOTFOUND; + mc->mc_ki[mc->mc_top]++; + } else { + /* new key is <= last key */ + rc = MDB_KEYEXIST; + } + } + } else { + rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact); + } + if ((flags & MDB_NOOVERWRITE) && rc == 0) { + DPRINTF(("duplicate key [%s]", DKEY(key))); + *data = d2; + return MDB_KEYEXIST; + } + if (rc && rc != MDB_NOTFOUND) + return rc; + } + + if (mc->mc_flags & C_DEL) + mc->mc_flags ^= C_DEL; + + /* Cursor is positioned, check for room in the dirty list */ + if (!nospill) { + if (flags & MDB_MULTIPLE) { + rdata = &xdata; + xdata.mv_size = data->mv_size * dcount; + } else { + rdata = data; + } + if ((rc2 = mdb_page_spill(mc, key, rdata))) + return rc2; + } + + if (rc == MDB_NO_ROOT) { + MDB_page *np; + /* new database, write a root leaf page */ + DPUTS("allocating new root leaf page"); + if ((rc2 = mdb_page_new(mc, P_LEAF, 1, &np))) { + return rc2; + } + mdb_cursor_push(mc, np); + mc->mc_db->md_root = np->mp_pgno; + mc->mc_db->md_depth++; + *mc->mc_dbflag |= DB_DIRTY; + if ((mc->mc_db->md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) + == MDB_DUPFIXED) + np->mp_flags |= P_LEAF2; + mc->mc_flags |= C_INITIALIZED; + } else { + /* make sure all cursor pages are writable */ + rc2 = mdb_cursor_touch(mc); + if (rc2) + return rc2; + } + + insert_key = insert_data = rc; + if (insert_key) { + /* The key does not exist */ + DPRINTF(("inserting key at index %i", mc->mc_ki[mc->mc_top])); + if ((mc->mc_db->md_flags & MDB_DUPSORT) && + LEAFSIZE(key, data) > env->me_nodemax) + { + /* Too big for a node, insert in sub-DB. Set up an empty + * "old sub-page" for prep_subDB to expand to a full page. + */ + fp_flags = P_LEAF|P_DIRTY; + fp = env->me_pbuf; + fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */ + fp->mp_lower = fp->mp_upper = (PAGEHDRSZ-PAGEBASE); + olddata.mv_size = PAGEHDRSZ; + goto prep_subDB; + } + } else { + /* there's only a key anyway, so this is a no-op */ + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { + char *ptr; + unsigned int ksize = mc->mc_db->md_pad; + if (key->mv_size != ksize) + return MDB_BAD_VALSIZE; + ptr = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize); + memcpy(ptr, key->mv_data, ksize); +fix_parent: + /* if overwriting slot 0 of leaf, need to + * update branch key if there is a parent page + */ + if (mc->mc_top && !mc->mc_ki[mc->mc_top]) { + unsigned short top = mc->mc_top; + mc->mc_top--; + /* slot 0 is always an empty key, find real slot */ + while (mc->mc_top && !mc->mc_ki[mc->mc_top]) + mc->mc_top--; + if (mc->mc_ki[mc->mc_top]) + rc2 = mdb_update_key(mc, key); + else + rc2 = MDB_SUCCESS; + mc->mc_top = top; + if (rc2) + return rc2; + } + return MDB_SUCCESS; + } + +more: + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + olddata.mv_size = NODEDSZ(leaf); + olddata.mv_data = NODEDATA(leaf); + + /* DB has dups? */ + if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT)) { + /* Prepare (sub-)page/sub-DB to accept the new item, + * if needed. fp: old sub-page or a header faking + * it. mp: new (sub-)page. offset: growth in page + * size. xdata: node data with new page or DB. + */ + unsigned i, offset = 0; + mp = fp = xdata.mv_data = env->me_pbuf; + mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno; + + /* Was a single item before, must convert now */ + if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { + /* Just overwrite the current item */ + if (flags == MDB_CURRENT) + goto current; + +#if UINT_MAX < SIZE_MAX + if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t)) + mc->mc_dbx->md_dcmp = mdb_cmp_clong; +#endif + /* does data match? */ + if (!mc->mc_dbx->md_dcmp(data, &olddata)) { + if (flags & MDB_NODUPDATA) + return MDB_KEYEXIST; + /* overwrite it */ + goto current; + } + + /* Back up original data item */ + dkey.mv_size = olddata.mv_size; + dkey.mv_data = memcpy(fp+1, olddata.mv_data, olddata.mv_size); + + /* Make sub-page header for the dup items, with dummy body */ + fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP; + fp->mp_lower = (PAGEHDRSZ-PAGEBASE); + xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size; + if (mc->mc_db->md_flags & MDB_DUPFIXED) { + fp->mp_flags |= P_LEAF2; + fp->mp_pad = data->mv_size; + xdata.mv_size += 2 * data->mv_size; /* leave space for 2 more */ + } else { + xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) + + (dkey.mv_size & 1) + (data->mv_size & 1); + } + fp->mp_upper = xdata.mv_size - PAGEBASE; + olddata.mv_size = xdata.mv_size; /* pretend olddata is fp */ + } else if (leaf->mn_flags & F_SUBDATA) { + /* Data is on sub-DB, just store it */ + flags |= F_DUPDATA|F_SUBDATA; + goto put_sub; + } else { + /* Data is on sub-page */ + fp = olddata.mv_data; + switch (flags) { + default: + if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) { + offset = EVEN(NODESIZE + sizeof(indx_t) + + data->mv_size); + break; + } + offset = fp->mp_pad; + if (SIZELEFT(fp) < offset) { + offset *= 4; /* space for 4 more */ + break; + } + /* FALLTHRU: Big enough MDB_DUPFIXED sub-page */ + case MDB_CURRENT: + fp->mp_flags |= P_DIRTY; + COPY_PGNO(fp->mp_pgno, mp->mp_pgno); + mc->mc_xcursor->mx_cursor.mc_pg[0] = fp; + flags |= F_DUPDATA; + goto put_sub; + } + xdata.mv_size = olddata.mv_size + offset; + } + + fp_flags = fp->mp_flags; + if (NODESIZE + NODEKSZ(leaf) + xdata.mv_size > env->me_nodemax) { + /* Too big for a sub-page, convert to sub-DB */ + fp_flags &= ~P_SUBP; +prep_subDB: + if (mc->mc_db->md_flags & MDB_DUPFIXED) { + fp_flags |= P_LEAF2; + dummy.md_pad = fp->mp_pad; + dummy.md_flags = MDB_DUPFIXED; + if (mc->mc_db->md_flags & MDB_INTEGERDUP) + dummy.md_flags |= MDB_INTEGERKEY; + } else { + dummy.md_pad = 0; + dummy.md_flags = 0; + } + dummy.md_depth = 1; + dummy.md_branch_pages = 0; + dummy.md_leaf_pages = 1; + dummy.md_overflow_pages = 0; + dummy.md_entries = NUMKEYS(fp); + xdata.mv_size = sizeof(MDB_db); + xdata.mv_data = &dummy; + if ((rc = mdb_page_alloc(mc, 1, &mp))) + return rc; + offset = env->me_psize - olddata.mv_size; + flags |= F_DUPDATA|F_SUBDATA; + dummy.md_root = mp->mp_pgno; + } + if (mp != fp) { + mp->mp_flags = fp_flags | P_DIRTY; + mp->mp_pad = fp->mp_pad; + mp->mp_lower = fp->mp_lower; + mp->mp_upper = fp->mp_upper + offset; + if (fp_flags & P_LEAF2) { + memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad); + } else { + memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE, + olddata.mv_size - fp->mp_upper - PAGEBASE); + for (i=0; i<NUMKEYS(fp); i++) + mp->mp_ptrs[i] = fp->mp_ptrs[i] + offset; + } + } + + rdata = &xdata; + flags |= F_DUPDATA; + do_sub = 1; + if (!insert_key) + mdb_node_del(mc, 0); + goto new_sub; + } +current: + /* overflow page overwrites need special handling */ + if (F_ISSET(leaf->mn_flags, F_BIGDATA)) { + MDB_page *omp; + pgno_t pg; + int level, ovpages, dpages = OVPAGES(data->mv_size, env->me_psize); + + memcpy(&pg, olddata.mv_data, sizeof(pg)); + if ((rc2 = mdb_page_get(mc->mc_txn, pg, &omp, &level)) != 0) + return rc2; + ovpages = omp->mp_pages; + + /* Is the ov page large enough? */ + if (ovpages >= dpages) { + if (!(omp->mp_flags & P_DIRTY) && + (level || (env->me_flags & MDB_WRITEMAP))) + { + rc = mdb_page_unspill(mc->mc_txn, omp, &omp); + if (rc) + return rc; + level = 0; /* dirty in this txn or clean */ + } + /* Is it dirty? */ + if (omp->mp_flags & P_DIRTY) { + /* yes, overwrite it. Note in this case we don't + * bother to try shrinking the page if the new data + * is smaller than the overflow threshold. + */ + if (level > 1) { + /* It is writable only in a parent txn */ + size_t sz = (size_t) env->me_psize * ovpages, off; + MDB_page *np = mdb_page_malloc(mc->mc_txn, ovpages); + MDB_ID2 id2; + if (!np) + return ENOMEM; + id2.mid = pg; + id2.mptr = np; + rc2 = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2); + mdb_cassert(mc, rc2 == 0); + if (!(flags & MDB_RESERVE)) { + /* Copy end of page, adjusting alignment so + * compiler may copy words instead of bytes. + */ + off = (PAGEHDRSZ + data->mv_size) & -sizeof(size_t); + memcpy((size_t *)((char *)np + off), + (size_t *)((char *)omp + off), sz - off); + sz = PAGEHDRSZ; + } + memcpy(np, omp, sz); /* Copy beginning of page */ + omp = np; + } + SETDSZ(leaf, data->mv_size); + if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = METADATA(omp); + else + memcpy(METADATA(omp), data->mv_data, data->mv_size); + return MDB_SUCCESS; + } + } + if ((rc2 = mdb_ovpage_free(mc, omp)) != MDB_SUCCESS) + return rc2; + } else if (data->mv_size == olddata.mv_size) { + /* same size, just replace it. Note that we could + * also reuse this node if the new data is smaller, + * but instead we opt to shrink the node in that case. + */ + if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = olddata.mv_data; + else if (!(mc->mc_flags & C_SUB)) + memcpy(olddata.mv_data, data->mv_data, data->mv_size); + else { + memcpy(NODEKEY(leaf), key->mv_data, key->mv_size); + goto fix_parent; + } + return MDB_SUCCESS; + } + mdb_node_del(mc, 0); + } + + rdata = data; + +new_sub: + nflags = flags & NODE_ADD_FLAGS; + nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->mv_size : mdb_leaf_size(env, key, rdata); + if (SIZELEFT(mc->mc_pg[mc->mc_top]) < nsize) { + if (( flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA ) + nflags &= ~MDB_APPEND; /* sub-page may need room to grow */ + if (!insert_key) + nflags |= MDB_SPLIT_REPLACE; + rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags); + } else { + /* There is room already in this leaf page. */ + rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags); + if (rc == 0 && insert_key) { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + unsigned i = mc->mc_top; + MDB_page *mp = mc->mc_pg[i]; + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == mc || m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[i] == mp && m3->mc_ki[i] >= mc->mc_ki[i]) { + m3->mc_ki[i]++; + } + } + } + } + + if (rc == MDB_SUCCESS) { + /* Now store the actual data in the child DB. Note that we're + * storing the user data in the keys field, so there are strict + * size limits on dupdata. The actual data fields of the child + * DB are all zero size. + */ + if (do_sub) { + int xflags; + size_t ecount; +put_sub: + xdata.mv_size = 0; + xdata.mv_data = ""; + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if (flags & MDB_CURRENT) { + xflags = MDB_CURRENT|MDB_NOSPILL; + } else { + mdb_xcursor_init1(mc, leaf); + xflags = (flags & MDB_NODUPDATA) ? + MDB_NOOVERWRITE|MDB_NOSPILL : MDB_NOSPILL; + } + /* converted, write the original data first */ + if (dkey.mv_size) { + rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags); + if (rc) + goto bad_sub; + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2; + unsigned i = mc->mc_top; + MDB_page *mp = mc->mc_pg[i]; + + for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { + if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; + if (!(m2->mc_flags & C_INITIALIZED)) continue; + if (m2->mc_pg[i] == mp && m2->mc_ki[i] == mc->mc_ki[i]) { + mdb_xcursor_init1(m2, leaf); + } + } + } + /* we've done our job */ + dkey.mv_size = 0; + } + ecount = mc->mc_xcursor->mx_db.md_entries; + if (flags & MDB_APPENDDUP) + xflags |= MDB_APPEND; + rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags); + if (flags & F_SUBDATA) { + void *db = NODEDATA(leaf); + memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db)); + } + insert_data = mc->mc_xcursor->mx_db.md_entries - ecount; + } + /* Increment count unless we just replaced an existing item. */ + if (insert_data) + mc->mc_db->md_entries++; + if (insert_key) { + /* Invalidate txn if we created an empty sub-DB */ + if (rc) + goto bad_sub; + /* If we succeeded and the key didn't exist before, + * make sure the cursor is marked valid. + */ + mc->mc_flags |= C_INITIALIZED; + } + if (flags & MDB_MULTIPLE) { + if (!rc) { + mcount++; + /* let caller know how many succeeded, if any */ + data[1].mv_size = mcount; + if (mcount < dcount) { + data[0].mv_data = (char *)data[0].mv_data + data[0].mv_size; + insert_key = insert_data = 0; + goto more; + } + } + } + return rc; +bad_sub: + if (rc == MDB_KEYEXIST) /* should not happen, we deleted that item */ + rc = MDB_CORRUPTED; + } + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +int +mdb_cursor_del(MDB_cursor *mc, unsigned int flags) +{ + MDB_node *leaf; + MDB_page *mp; + int rc; + + if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) + return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + + if (!(mc->mc_flags & C_INITIALIZED)) + return EINVAL; + + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) + return MDB_NOTFOUND; + + if (!(flags & MDB_NOSPILL) && (rc = mdb_page_spill(mc, NULL, NULL))) + return rc; + + rc = mdb_cursor_touch(mc); + if (rc) + return rc; + + mp = mc->mc_pg[mc->mc_top]; + if (IS_LEAF2(mp)) + goto del_key; + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (flags & MDB_NODUPDATA) { + /* mdb_cursor_del0() will subtract the final entry */ + mc->mc_db->md_entries -= mc->mc_xcursor->mx_db.md_entries - 1; + } else { + if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) { + mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + } + rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL); + if (rc) + return rc; + /* If sub-DB still has entries, we're done */ + if (mc->mc_xcursor->mx_db.md_entries) { + if (leaf->mn_flags & F_SUBDATA) { + /* update subDB info */ + void *db = NODEDATA(leaf); + memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db)); + } else { + MDB_cursor *m2; + /* shrink fake page */ + mdb_node_shrink(mp, mc->mc_ki[mc->mc_top]); + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + /* fix other sub-DB cursors pointed at this fake page */ + for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { + if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; + if (m2->mc_pg[mc->mc_top] == mp && + m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) + m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + } + } + mc->mc_db->md_entries--; + mc->mc_flags |= C_DEL; + return rc; + } + /* otherwise fall thru and delete the sub-DB */ + } + + if (leaf->mn_flags & F_SUBDATA) { + /* add all the child DB's pages to the free list */ + rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); + if (rc) + goto fail; + } + } + + /* add overflow pages to free list */ + if (F_ISSET(leaf->mn_flags, F_BIGDATA)) { + MDB_page *omp; + pgno_t pg; + + memcpy(&pg, NODEDATA(leaf), sizeof(pg)); + if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) || + (rc = mdb_ovpage_free(mc, omp))) + goto fail; + } + +del_key: + return mdb_cursor_del0(mc); + +fail: + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +/** Allocate and initialize new pages for a database. + * @param[in] mc a cursor on the database being added to. + * @param[in] flags flags defining what type of page is being allocated. + * @param[in] num the number of pages to allocate. This is usually 1, + * unless allocating overflow pages for a large record. + * @param[out] mp Address of a page, or NULL on failure. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp) +{ + MDB_page *np; + int rc; + + if ((rc = mdb_page_alloc(mc, num, &np))) + return rc; + DPRINTF(("allocated new mpage %"Z"u, page size %u", + np->mp_pgno, mc->mc_txn->mt_env->me_psize)); + np->mp_flags = flags | P_DIRTY; + np->mp_lower = (PAGEHDRSZ-PAGEBASE); + np->mp_upper = mc->mc_txn->mt_env->me_psize - PAGEBASE; + + if (IS_BRANCH(np)) + mc->mc_db->md_branch_pages++; + else if (IS_LEAF(np)) + mc->mc_db->md_leaf_pages++; + else if (IS_OVERFLOW(np)) { + mc->mc_db->md_overflow_pages += num; + np->mp_pages = num; + } + *mp = np; + + return 0; +} + +/** Calculate the size of a leaf node. + * The size depends on the environment's page size; if a data item + * is too large it will be put onto an overflow page and the node + * size will only include the key and not the data. Sizes are always + * rounded up to an even number of bytes, to guarantee 2-byte alignment + * of the #MDB_node headers. + * @param[in] env The environment handle. + * @param[in] key The key for the node. + * @param[in] data The data for the node. + * @return The number of bytes needed to store the node. + */ +static size_t +mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data) +{ + size_t sz; + + sz = LEAFSIZE(key, data); + if (sz > env->me_nodemax) { + /* put on overflow page */ + sz -= data->mv_size - sizeof(pgno_t); + } + + return EVEN(sz + sizeof(indx_t)); +} + +/** Calculate the size of a branch node. + * The size should depend on the environment's page size but since + * we currently don't support spilling large keys onto overflow + * pages, it's simply the size of the #MDB_node header plus the + * size of the key. Sizes are always rounded up to an even number + * of bytes, to guarantee 2-byte alignment of the #MDB_node headers. + * @param[in] env The environment handle. + * @param[in] key The key for the node. + * @return The number of bytes needed to store the node. + */ +static size_t +mdb_branch_size(MDB_env *env, MDB_val *key) +{ + size_t sz; + + sz = INDXSIZE(key); + if (sz > env->me_nodemax) { + /* put on overflow page */ + /* not implemented */ + /* sz -= key->size - sizeof(pgno_t); */ + } + + return sz + sizeof(indx_t); +} + +/** Add a node to the page pointed to by the cursor. + * @param[in] mc The cursor for this operation. + * @param[in] indx The index on the page where the new node should be added. + * @param[in] key The key for the new node. + * @param[in] data The data for the new node, if any. + * @param[in] pgno The page number, if adding a branch node. + * @param[in] flags Flags for the node. + * @return 0 on success, non-zero on failure. Possible errors are: + * <ul> + * <li>ENOMEM - failed to allocate overflow pages for the node. + * <li>MDB_PAGE_FULL - there is insufficient room in the page. This error + * should never happen since all callers already calculate the + * page's free space before calling this function. + * </ul> + */ +static int +mdb_node_add(MDB_cursor *mc, indx_t indx, + MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags) +{ + unsigned int i; + size_t node_size = NODESIZE; + ssize_t room; + indx_t ofs; + MDB_node *node; + MDB_page *mp = mc->mc_pg[mc->mc_top]; + MDB_page *ofp = NULL; /* overflow page */ + DKBUF; + + mdb_cassert(mc, mp->mp_upper >= mp->mp_lower); + + DPRINTF(("add to %s %spage %"Z"u index %i, data size %"Z"u key size %"Z"u [%s]", + IS_LEAF(mp) ? "leaf" : "branch", + IS_SUBP(mp) ? "sub-" : "", + mdb_dbg_pgno(mp), indx, data ? data->mv_size : 0, + key ? key->mv_size : 0, key ? DKEY(key) : "null")); + + if (IS_LEAF2(mp)) { + /* Move higher keys up one slot. */ + int ksize = mc->mc_db->md_pad, dif; + char *ptr = LEAF2KEY(mp, indx, ksize); + dif = NUMKEYS(mp) - indx; + if (dif > 0) + memmove(ptr+ksize, ptr, dif*ksize); + /* insert new key */ + memcpy(ptr, key->mv_data, ksize); + + /* Just using these for counting */ + mp->mp_lower += sizeof(indx_t); + mp->mp_upper -= ksize - sizeof(indx_t); + return MDB_SUCCESS; + } + + room = (ssize_t)SIZELEFT(mp) - (ssize_t)sizeof(indx_t); + if (key != NULL) + node_size += key->mv_size; + if (IS_LEAF(mp)) { + mdb_cassert(mc, data); + if (F_ISSET(flags, F_BIGDATA)) { + /* Data already on overflow page. */ + node_size += sizeof(pgno_t); + } else if (node_size + data->mv_size > mc->mc_txn->mt_env->me_nodemax) { + int ovpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize); + int rc; + /* Put data on overflow page. */ + DPRINTF(("data size is %"Z"u, node would be %"Z"u, put data on overflow page", + data->mv_size, node_size+data->mv_size)); + node_size = EVEN(node_size + sizeof(pgno_t)); + if ((ssize_t)node_size > room) + goto full; + if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp))) + return rc; + DPRINTF(("allocated overflow page %"Z"u", ofp->mp_pgno)); + flags |= F_BIGDATA; + goto update; + } else { + node_size += data->mv_size; + } + } + node_size = EVEN(node_size); + if ((ssize_t)node_size > room) + goto full; + +update: + /* Move higher pointers up one slot. */ + for (i = NUMKEYS(mp); i > indx; i--) + mp->mp_ptrs[i] = mp->mp_ptrs[i - 1]; + + /* Adjust free space offsets. */ + ofs = mp->mp_upper - node_size; + mdb_cassert(mc, ofs >= mp->mp_lower + sizeof(indx_t)); + mp->mp_ptrs[indx] = ofs; + mp->mp_upper = ofs; + mp->mp_lower += sizeof(indx_t); + + /* Write the node data. */ + node = NODEPTR(mp, indx); + node->mn_ksize = (key == NULL) ? 0 : key->mv_size; + node->mn_flags = flags; + if (IS_LEAF(mp)) + SETDSZ(node,data->mv_size); + else + SETPGNO(node,pgno); + + if (key) + memcpy(NODEKEY(node), key->mv_data, key->mv_size); + + if (IS_LEAF(mp)) { + mdb_cassert(mc, key); + if (ofp == NULL) { + if (F_ISSET(flags, F_BIGDATA)) + memcpy(node->mn_data + key->mv_size, data->mv_data, + sizeof(pgno_t)); + else if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = node->mn_data + key->mv_size; + else + memcpy(node->mn_data + key->mv_size, data->mv_data, + data->mv_size); + } else { + memcpy(node->mn_data + key->mv_size, &ofp->mp_pgno, + sizeof(pgno_t)); + if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = METADATA(ofp); + else + memcpy(METADATA(ofp), data->mv_data, data->mv_size); + } + } + + return MDB_SUCCESS; + +full: + DPRINTF(("not enough room in page %"Z"u, got %u ptrs", + mdb_dbg_pgno(mp), NUMKEYS(mp))); + DPRINTF(("upper-lower = %u - %u = %"Z"d", mp->mp_upper,mp->mp_lower,room)); + DPRINTF(("node size = %"Z"u", node_size)); + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return MDB_PAGE_FULL; +} + +/** Delete the specified node from a page. + * @param[in] mc Cursor pointing to the node to delete. + * @param[in] ksize The size of a node. Only used if the page is + * part of a #MDB_DUPFIXED database. + */ +static void +mdb_node_del(MDB_cursor *mc, int ksize) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top]; + indx_t indx = mc->mc_ki[mc->mc_top]; + unsigned int sz; + indx_t i, j, numkeys, ptr; + MDB_node *node; + char *base; + + DPRINTF(("delete node %u on %s page %"Z"u", indx, + IS_LEAF(mp) ? "leaf" : "branch", mdb_dbg_pgno(mp))); + numkeys = NUMKEYS(mp); + mdb_cassert(mc, indx < numkeys); + + if (IS_LEAF2(mp)) { + int x = numkeys - 1 - indx; + base = LEAF2KEY(mp, indx, ksize); + if (x) + memmove(base, base + ksize, x * ksize); + mp->mp_lower -= sizeof(indx_t); + mp->mp_upper += ksize - sizeof(indx_t); + return; + } + + node = NODEPTR(mp, indx); + sz = NODESIZE + node->mn_ksize; + if (IS_LEAF(mp)) { + if (F_ISSET(node->mn_flags, F_BIGDATA)) + sz += sizeof(pgno_t); + else + sz += NODEDSZ(node); + } + sz = EVEN(sz); + + ptr = mp->mp_ptrs[indx]; + for (i = j = 0; i < numkeys; i++) { + if (i != indx) { + mp->mp_ptrs[j] = mp->mp_ptrs[i]; + if (mp->mp_ptrs[i] < ptr) + mp->mp_ptrs[j] += sz; + j++; + } + } + + base = (char *)mp + mp->mp_upper + PAGEBASE; + memmove(base + sz, base, ptr - mp->mp_upper); + + mp->mp_lower -= sizeof(indx_t); + mp->mp_upper += sz; +} + +/** Compact the main page after deleting a node on a subpage. + * @param[in] mp The main page to operate on. + * @param[in] indx The index of the subpage on the main page. + */ +static void +mdb_node_shrink(MDB_page *mp, indx_t indx) +{ + MDB_node *node; + MDB_page *sp, *xp; + char *base; + int nsize, delta; + indx_t i, numkeys, ptr; + + node = NODEPTR(mp, indx); + sp = (MDB_page *)NODEDATA(node); + delta = SIZELEFT(sp); + xp = (MDB_page *)((char *)sp + delta); + + /* shift subpage upward */ + if (IS_LEAF2(sp)) { + nsize = NUMKEYS(sp) * sp->mp_pad; + if (nsize & 1) + return; /* do not make the node uneven-sized */ + memmove(METADATA(xp), METADATA(sp), nsize); + } else { + int i; + numkeys = NUMKEYS(sp); + for (i=numkeys-1; i>=0; i--) + xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta; + } + xp->mp_upper = sp->mp_lower; + xp->mp_lower = sp->mp_lower; + xp->mp_flags = sp->mp_flags; + xp->mp_pad = sp->mp_pad; + COPY_PGNO(xp->mp_pgno, mp->mp_pgno); + + nsize = NODEDSZ(node) - delta; + SETDSZ(node, nsize); + + /* shift lower nodes upward */ + ptr = mp->mp_ptrs[indx]; + numkeys = NUMKEYS(mp); + for (i = 0; i < numkeys; i++) { + if (mp->mp_ptrs[i] <= ptr) + mp->mp_ptrs[i] += delta; + } + + base = (char *)mp + mp->mp_upper + PAGEBASE; + memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node)); + mp->mp_upper += delta; +} + +/** Initial setup of a sorted-dups cursor. + * Sorted duplicates are implemented as a sub-database for the given key. + * The duplicate data items are actually keys of the sub-database. + * Operations on the duplicate data items are performed using a sub-cursor + * initialized when the sub-database is first accessed. This function does + * the preliminary setup of the sub-cursor, filling in the fields that + * depend only on the parent DB. + * @param[in] mc The main cursor whose sorted-dups cursor is to be initialized. + */ +static void +mdb_xcursor_init0(MDB_cursor *mc) +{ + MDB_xcursor *mx = mc->mc_xcursor; + + mx->mx_cursor.mc_xcursor = NULL; + mx->mx_cursor.mc_txn = mc->mc_txn; + mx->mx_cursor.mc_db = &mx->mx_db; + mx->mx_cursor.mc_dbx = &mx->mx_dbx; + mx->mx_cursor.mc_dbi = mc->mc_dbi; + mx->mx_cursor.mc_dbflag = &mx->mx_dbflag; + mx->mx_cursor.mc_snum = 0; + mx->mx_cursor.mc_top = 0; + mx->mx_cursor.mc_flags = C_SUB; + mx->mx_dbx.md_name.mv_size = 0; + mx->mx_dbx.md_name.mv_data = NULL; + mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp; + mx->mx_dbx.md_dcmp = NULL; + mx->mx_dbx.md_rel = mc->mc_dbx->md_rel; +} + +/** Final setup of a sorted-dups cursor. + * Sets up the fields that depend on the data from the main cursor. + * @param[in] mc The main cursor whose sorted-dups cursor is to be initialized. + * @param[in] node The data containing the #MDB_db record for the + * sorted-dup database. + */ +static void +mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) +{ + MDB_xcursor *mx = mc->mc_xcursor; + + if (node->mn_flags & F_SUBDATA) { + memcpy(&mx->mx_db, NODEDATA(node), sizeof(MDB_db)); + mx->mx_cursor.mc_pg[0] = 0; + mx->mx_cursor.mc_snum = 0; + mx->mx_cursor.mc_top = 0; + mx->mx_cursor.mc_flags = C_SUB; + } else { + MDB_page *fp = NODEDATA(node); + mx->mx_db.md_pad = mc->mc_pg[mc->mc_top]->mp_pad; + mx->mx_db.md_flags = 0; + mx->mx_db.md_depth = 1; + mx->mx_db.md_branch_pages = 0; + mx->mx_db.md_leaf_pages = 1; + mx->mx_db.md_overflow_pages = 0; + mx->mx_db.md_entries = NUMKEYS(fp); + COPY_PGNO(mx->mx_db.md_root, fp->mp_pgno); + mx->mx_cursor.mc_snum = 1; + mx->mx_cursor.mc_top = 0; + mx->mx_cursor.mc_flags = C_INITIALIZED|C_SUB; + mx->mx_cursor.mc_pg[0] = fp; + mx->mx_cursor.mc_ki[0] = 0; + if (mc->mc_db->md_flags & MDB_DUPFIXED) { + mx->mx_db.md_flags = MDB_DUPFIXED; + mx->mx_db.md_pad = fp->mp_pad; + if (mc->mc_db->md_flags & MDB_INTEGERDUP) + mx->mx_db.md_flags |= MDB_INTEGERKEY; + } + } + DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi, + mx->mx_db.md_root)); + mx->mx_dbflag = DB_VALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ +#if UINT_MAX < SIZE_MAX + if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t)) + mx->mx_dbx.md_cmp = mdb_cmp_clong; +#endif +} + +/** Initialize a cursor for a given transaction and database. */ +static void +mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) +{ + mc->mc_next = NULL; + mc->mc_backup = NULL; + mc->mc_dbi = dbi; + mc->mc_txn = txn; + mc->mc_db = &txn->mt_dbs[dbi]; + mc->mc_dbx = &txn->mt_dbxs[dbi]; + mc->mc_dbflag = &txn->mt_dbflags[dbi]; + mc->mc_snum = 0; + mc->mc_top = 0; + mc->mc_pg[0] = 0; + mc->mc_flags = 0; + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { + mdb_tassert(txn, mx != NULL); + mc->mc_xcursor = mx; + mdb_xcursor_init0(mc); + } else { + mc->mc_xcursor = NULL; + } + if (*mc->mc_dbflag & DB_STALE) { + mdb_page_search(mc, NULL, MDB_PS_ROOTONLY); + } +} + +int +mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) +{ + MDB_cursor *mc; + size_t size = sizeof(MDB_cursor); + + if (!ret || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + /* Allow read access to the freelist */ + if (!dbi && !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EINVAL; + + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) + size += sizeof(MDB_xcursor); + + if ((mc = malloc(size)) != NULL) { + mdb_cursor_init(mc, txn, dbi, (MDB_xcursor *)(mc + 1)); + if (txn->mt_cursors) { + mc->mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = mc; + mc->mc_flags |= C_UNTRACK; + } + } else { + return ENOMEM; + } + + *ret = mc; + + return MDB_SUCCESS; +} + +int +mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc) +{ + if (!mc || !TXN_DBI_EXIST(txn, mc->mc_dbi)) + return EINVAL; + + if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor); + return MDB_SUCCESS; +} + +/* Return the count of duplicate data items for the current key */ +int +mdb_cursor_count(MDB_cursor *mc, size_t *countp) +{ + MDB_node *leaf; + + if (mc == NULL || countp == NULL) + return EINVAL; + + if (mc->mc_xcursor == NULL) + return MDB_INCOMPATIBLE; + + if (mc->mc_txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + if (!(mc->mc_flags & C_INITIALIZED)) + return EINVAL; + + if (!mc->mc_snum || (mc->mc_flags & C_EOF)) + return MDB_NOTFOUND; + + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { + *countp = 1; + } else { + if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) + return EINVAL; + + *countp = mc->mc_xcursor->mx_db.md_entries; + } + return MDB_SUCCESS; +} + +void +mdb_cursor_close(MDB_cursor *mc) +{ + if (mc && !mc->mc_backup) { + /* remove from txn, if tracked */ + if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) { + MDB_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; + while (*prev && *prev != mc) prev = &(*prev)->mc_next; + if (*prev == mc) + *prev = mc->mc_next; + } + free(mc); + } +} + +MDB_txn * +mdb_cursor_txn(MDB_cursor *mc) +{ + if (!mc) return NULL; + return mc->mc_txn; +} + +MDB_dbi +mdb_cursor_dbi(MDB_cursor *mc) +{ + return mc->mc_dbi; +} + +/** Replace the key for a branch node with a new key. + * @param[in] mc Cursor pointing to the node to operate on. + * @param[in] key The new key to use. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_update_key(MDB_cursor *mc, MDB_val *key) +{ + MDB_page *mp; + MDB_node *node; + char *base; + size_t len; + int delta, ksize, oksize; + indx_t ptr, i, numkeys, indx; + DKBUF; + + indx = mc->mc_ki[mc->mc_top]; + mp = mc->mc_pg[mc->mc_top]; + node = NODEPTR(mp, indx); + ptr = mp->mp_ptrs[indx]; +#if MDB_DEBUG + { + MDB_val k2; + char kbuf2[DKBUF_MAXKEYSIZE*2+1]; + k2.mv_data = NODEKEY(node); + k2.mv_size = node->mn_ksize; + DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Z"u", + indx, ptr, + mdb_dkey(&k2, kbuf2), + DKEY(key), + mp->mp_pgno)); + } +#endif + + /* Sizes must be 2-byte aligned. */ + ksize = EVEN(key->mv_size); + oksize = EVEN(node->mn_ksize); + delta = ksize - oksize; + + /* Shift node contents if EVEN(key length) changed. */ + if (delta) { + if (delta > 0 && SIZELEFT(mp) < delta) { + pgno_t pgno; + /* not enough space left, do a delete and split */ + DPRINTF(("Not enough room, delta = %d, splitting...", delta)); + pgno = NODEPGNO(node); + mdb_node_del(mc, 0); + return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE); + } + + numkeys = NUMKEYS(mp); + for (i = 0; i < numkeys; i++) { + if (mp->mp_ptrs[i] <= ptr) + mp->mp_ptrs[i] -= delta; + } + + base = (char *)mp + mp->mp_upper + PAGEBASE; + len = ptr - mp->mp_upper + NODESIZE; + memmove(base - delta, base, len); + mp->mp_upper -= delta; + + node = NODEPTR(mp, indx); + } + + /* But even if no shift was needed, update ksize */ + if (node->mn_ksize != key->mv_size) + node->mn_ksize = key->mv_size; + + if (key->mv_size) + memcpy(NODEKEY(node), key->mv_data, key->mv_size); + + return MDB_SUCCESS; +} + +static void +mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst); + +/** Move a node from csrc to cdst. + */ +static int +mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst) +{ + MDB_node *srcnode; + MDB_val key, data; + pgno_t srcpg; + MDB_cursor mn; + int rc; + unsigned short flags; + + DKBUF; + + /* Mark src and dst as dirty. */ + if ((rc = mdb_page_touch(csrc)) || + (rc = mdb_page_touch(cdst))) + return rc; + + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_size = csrc->mc_db->md_pad; + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size); + data.mv_size = 0; + data.mv_data = NULL; + srcpg = 0; + flags = 0; + } else { + srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]); + mdb_cassert(csrc, !((size_t)srcnode & 1)); + srcpg = NODEPGNO(srcnode); + flags = srcnode->mn_flags; + if (csrc->mc_ki[csrc->mc_top] == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { + unsigned int snum = csrc->mc_snum; + MDB_node *s2; + /* must find the lowest key below src */ + rc = mdb_page_search_lowest(csrc); + if (rc) + return rc; + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_size = csrc->mc_db->md_pad; + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); + } else { + s2 = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); + key.mv_size = NODEKSZ(s2); + key.mv_data = NODEKEY(s2); + } + csrc->mc_snum = snum--; + csrc->mc_top = snum; + } else { + key.mv_size = NODEKSZ(srcnode); + key.mv_data = NODEKEY(srcnode); + } + data.mv_size = NODEDSZ(srcnode); + data.mv_data = NODEDATA(srcnode); + } + if (IS_BRANCH(cdst->mc_pg[cdst->mc_top]) && cdst->mc_ki[cdst->mc_top] == 0) { + unsigned int snum = cdst->mc_snum; + MDB_node *s2; + MDB_val bkey; + /* must find the lowest key below dst */ + mdb_cursor_copy(cdst, &mn); + rc = mdb_page_search_lowest(&mn); + if (rc) + return rc; + if (IS_LEAF2(mn.mc_pg[mn.mc_top])) { + bkey.mv_size = mn.mc_db->md_pad; + bkey.mv_data = LEAF2KEY(mn.mc_pg[mn.mc_top], 0, bkey.mv_size); + } else { + s2 = NODEPTR(mn.mc_pg[mn.mc_top], 0); + bkey.mv_size = NODEKSZ(s2); + bkey.mv_data = NODEKEY(s2); + } + mn.mc_snum = snum--; + mn.mc_top = snum; + mn.mc_ki[snum] = 0; + rc = mdb_update_key(&mn, &bkey); + if (rc) + return rc; + } + + DPRINTF(("moving %s node %u [%s] on page %"Z"u to node %u on page %"Z"u", + IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch", + csrc->mc_ki[csrc->mc_top], + DKEY(&key), + csrc->mc_pg[csrc->mc_top]->mp_pgno, + cdst->mc_ki[cdst->mc_top], cdst->mc_pg[cdst->mc_top]->mp_pgno)); + + /* Add the node to the destination page. + */ + rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, flags); + if (rc != MDB_SUCCESS) + return rc; + + /* Delete the node from the source page. + */ + mdb_node_del(csrc, key.mv_size); + + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = csrc->mc_dbi; + MDB_page *mp = csrc->mc_pg[csrc->mc_top]; + + for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (csrc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == csrc) continue; + if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] == + csrc->mc_ki[csrc->mc_top]) { + m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top]; + m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; + } + } + } + + /* Update the parent separators. + */ + if (csrc->mc_ki[csrc->mc_top] == 0) { + if (csrc->mc_ki[csrc->mc_top-1] != 0) { + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); + } else { + srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); + key.mv_size = NODEKSZ(srcnode); + key.mv_data = NODEKEY(srcnode); + } + DPRINTF(("update separator for source page %"Z"u to [%s]", + csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key))); + mdb_cursor_copy(csrc, &mn); + mn.mc_snum--; + mn.mc_top--; + if ((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS) + return rc; + } + if (IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { + MDB_val nullkey; + indx_t ix = csrc->mc_ki[csrc->mc_top]; + nullkey.mv_size = 0; + csrc->mc_ki[csrc->mc_top] = 0; + rc = mdb_update_key(csrc, &nullkey); + csrc->mc_ki[csrc->mc_top] = ix; + mdb_cassert(csrc, rc == MDB_SUCCESS); + } + } + + if (cdst->mc_ki[cdst->mc_top] == 0) { + if (cdst->mc_ki[cdst->mc_top-1] != 0) { + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, key.mv_size); + } else { + srcnode = NODEPTR(cdst->mc_pg[cdst->mc_top], 0); + key.mv_size = NODEKSZ(srcnode); + key.mv_data = NODEKEY(srcnode); + } + DPRINTF(("update separator for destination page %"Z"u to [%s]", + cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key))); + mdb_cursor_copy(cdst, &mn); + mn.mc_snum--; + mn.mc_top--; + if ((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS) + return rc; + } + if (IS_BRANCH(cdst->mc_pg[cdst->mc_top])) { + MDB_val nullkey; + indx_t ix = cdst->mc_ki[cdst->mc_top]; + nullkey.mv_size = 0; + cdst->mc_ki[cdst->mc_top] = 0; + rc = mdb_update_key(cdst, &nullkey); + cdst->mc_ki[cdst->mc_top] = ix; + mdb_cassert(cdst, rc == MDB_SUCCESS); + } + } + + return MDB_SUCCESS; +} + +/** Merge one page into another. + * The nodes from the page pointed to by \b csrc will + * be copied to the page pointed to by \b cdst and then + * the \b csrc page will be freed. + * @param[in] csrc Cursor pointing to the source page. + * @param[in] cdst Cursor pointing to the destination page. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) +{ + MDB_page *psrc, *pdst; + MDB_node *srcnode; + MDB_val key, data; + unsigned nkeys; + int rc; + indx_t i, j; + + psrc = csrc->mc_pg[csrc->mc_top]; + pdst = cdst->mc_pg[cdst->mc_top]; + + DPRINTF(("merging page %"Z"u into %"Z"u", psrc->mp_pgno, pdst->mp_pgno)); + + mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */ + mdb_cassert(csrc, cdst->mc_snum > 1); + + /* Mark dst as dirty. */ + if ((rc = mdb_page_touch(cdst))) + return rc; + + /* Move all nodes from src to dst. + */ + j = nkeys = NUMKEYS(pdst); + if (IS_LEAF2(psrc)) { + key.mv_size = csrc->mc_db->md_pad; + key.mv_data = METADATA(psrc); + for (i = 0; i < NUMKEYS(psrc); i++, j++) { + rc = mdb_node_add(cdst, j, &key, NULL, 0, 0); + if (rc != MDB_SUCCESS) + return rc; + key.mv_data = (char *)key.mv_data + key.mv_size; + } + } else { + for (i = 0; i < NUMKEYS(psrc); i++, j++) { + srcnode = NODEPTR(psrc, i); + if (i == 0 && IS_BRANCH(psrc)) { + MDB_cursor mn; + MDB_node *s2; + mdb_cursor_copy(csrc, &mn); + /* must find the lowest key below src */ + rc = mdb_page_search_lowest(&mn); + if (rc) + return rc; + if (IS_LEAF2(mn.mc_pg[mn.mc_top])) { + key.mv_size = mn.mc_db->md_pad; + key.mv_data = LEAF2KEY(mn.mc_pg[mn.mc_top], 0, key.mv_size); + } else { + s2 = NODEPTR(mn.mc_pg[mn.mc_top], 0); + key.mv_size = NODEKSZ(s2); + key.mv_data = NODEKEY(s2); + } + } else { + key.mv_size = srcnode->mn_ksize; + key.mv_data = NODEKEY(srcnode); + } + + data.mv_size = NODEDSZ(srcnode); + data.mv_data = NODEDATA(srcnode); + rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), srcnode->mn_flags); + if (rc != MDB_SUCCESS) + return rc; + } + } + + DPRINTF(("dst page %"Z"u now has %u keys (%.1f%% filled)", + pdst->mp_pgno, NUMKEYS(pdst), + (float)PAGEFILL(cdst->mc_txn->mt_env, pdst) / 10)); + + /* Unlink the src page from parent and add to free list. + */ + csrc->mc_top--; + mdb_node_del(csrc, 0); + if (csrc->mc_ki[csrc->mc_top] == 0) { + key.mv_size = 0; + rc = mdb_update_key(csrc, &key); + if (rc) { + csrc->mc_top++; + return rc; + } + } + csrc->mc_top++; + + psrc = csrc->mc_pg[csrc->mc_top]; + /* If not operating on FreeDB, allow this page to be reused + * in this txn. Otherwise just add to free list. + */ + rc = mdb_page_loose(csrc, psrc); + if (rc) + return rc; + if (IS_LEAF(psrc)) + csrc->mc_db->md_leaf_pages--; + else + csrc->mc_db->md_branch_pages--; + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = csrc->mc_dbi; + + for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (csrc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == csrc) continue; + if (m3->mc_snum < csrc->mc_snum) continue; + if (m3->mc_pg[csrc->mc_top] == psrc) { + m3->mc_pg[csrc->mc_top] = pdst; + m3->mc_ki[csrc->mc_top] += nkeys; + } + } + } + { + unsigned int snum = cdst->mc_snum; + uint16_t depth = cdst->mc_db->md_depth; + mdb_cursor_pop(cdst); + rc = mdb_rebalance(cdst); + /* Did the tree shrink? */ + if (depth > cdst->mc_db->md_depth) + snum--; + cdst->mc_snum = snum; + cdst->mc_top = snum-1; + } + return rc; +} + +/** Copy the contents of a cursor. + * @param[in] csrc The cursor to copy from. + * @param[out] cdst The cursor to copy to. + */ +static void +mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst) +{ + unsigned int i; + + cdst->mc_txn = csrc->mc_txn; + cdst->mc_dbi = csrc->mc_dbi; + cdst->mc_db = csrc->mc_db; + cdst->mc_dbx = csrc->mc_dbx; + cdst->mc_snum = csrc->mc_snum; + cdst->mc_top = csrc->mc_top; + cdst->mc_flags = csrc->mc_flags; + + for (i=0; i<csrc->mc_snum; i++) { + cdst->mc_pg[i] = csrc->mc_pg[i]; + cdst->mc_ki[i] = csrc->mc_ki[i]; + } +} + +/** Rebalance the tree after a delete operation. + * @param[in] mc Cursor pointing to the page where rebalancing + * should begin. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_rebalance(MDB_cursor *mc) +{ + MDB_node *node; + int rc; + unsigned int ptop, minkeys; + MDB_cursor mn; + indx_t oldki; + + minkeys = 1 + (IS_BRANCH(mc->mc_pg[mc->mc_top])); + DPRINTF(("rebalancing %s page %"Z"u (has %u keys, %.1f%% full)", + IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch", + mdb_dbg_pgno(mc->mc_pg[mc->mc_top]), NUMKEYS(mc->mc_pg[mc->mc_top]), + (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10)); + + if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD && + NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) { + DPRINTF(("no need to rebalance page %"Z"u, above fill threshold", + mdb_dbg_pgno(mc->mc_pg[mc->mc_top]))); + return MDB_SUCCESS; + } + + if (mc->mc_snum < 2) { + MDB_page *mp = mc->mc_pg[0]; + if (IS_SUBP(mp)) { + DPUTS("Can't rebalance a subpage, ignoring"); + return MDB_SUCCESS; + } + if (NUMKEYS(mp) == 0) { + DPUTS("tree is completely empty"); + mc->mc_db->md_root = P_INVALID; + mc->mc_db->md_depth = 0; + mc->mc_db->md_leaf_pages = 0; + rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno); + if (rc) + return rc; + /* Adjust cursors pointing to mp */ + mc->mc_snum = 0; + mc->mc_top = 0; + mc->mc_flags &= ~C_INITIALIZED; + { + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[0] == mp) { + m3->mc_snum = 0; + m3->mc_top = 0; + m3->mc_flags &= ~C_INITIALIZED; + } + } + } + } else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) { + int i; + DPUTS("collapsing root page!"); + rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno); + if (rc) + return rc; + mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0)); + rc = mdb_page_get(mc->mc_txn,mc->mc_db->md_root,&mc->mc_pg[0],NULL); + if (rc) + return rc; + mc->mc_db->md_depth--; + mc->mc_db->md_branch_pages--; + mc->mc_ki[0] = mc->mc_ki[1]; + for (i = 1; i<mc->mc_db->md_depth; i++) { + mc->mc_pg[i] = mc->mc_pg[i+1]; + mc->mc_ki[i] = mc->mc_ki[i+1]; + } + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == mc || m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[0] == mp) { + m3->mc_snum--; + m3->mc_top--; + for (i=0; i<m3->mc_snum; i++) { + m3->mc_pg[i] = m3->mc_pg[i+1]; + m3->mc_ki[i] = m3->mc_ki[i+1]; + } + } + } + } + } else + DPUTS("root page doesn't need rebalancing"); + return MDB_SUCCESS; + } + + /* The parent (branch page) must have at least 2 pointers, + * otherwise the tree is invalid. + */ + ptop = mc->mc_top-1; + mdb_cassert(mc, NUMKEYS(mc->mc_pg[ptop]) > 1); + + /* Leaf page fill factor is below the threshold. + * Try to move keys from left or right neighbor, or + * merge with a neighbor page. + */ + + /* Find neighbors. + */ + mdb_cursor_copy(mc, &mn); + mn.mc_xcursor = NULL; + + oldki = mc->mc_ki[mc->mc_top]; + if (mc->mc_ki[ptop] == 0) { + /* We're the leftmost leaf in our parent. + */ + DPUTS("reading right neighbor"); + mn.mc_ki[ptop]++; + node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); + rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); + if (rc) + return rc; + mn.mc_ki[mn.mc_top] = 0; + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]); + } else { + /* There is at least one neighbor to the left. + */ + DPUTS("reading left neighbor"); + mn.mc_ki[ptop]--; + node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); + rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); + if (rc) + return rc; + mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1; + mc->mc_ki[mc->mc_top] = 0; + } + + DPRINTF(("found neighbor page %"Z"u (%u keys, %.1f%% full)", + mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]), + (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10)); + + /* If the neighbor page is above threshold and has enough keys, + * move one key from it. Otherwise we should try to merge them. + * (A branch page must never have less than 2 keys.) + */ + minkeys = 1 + (IS_BRANCH(mn.mc_pg[mn.mc_top])); + if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) { + rc = mdb_node_move(&mn, mc); + if (mc->mc_ki[ptop]) { + oldki++; + } + } else { + if (mc->mc_ki[ptop] == 0) { + rc = mdb_page_merge(&mn, mc); + } else { + oldki += NUMKEYS(mn.mc_pg[mn.mc_top]); + mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1; + rc = mdb_page_merge(mc, &mn); + mdb_cursor_copy(&mn, mc); + } + mc->mc_flags &= ~C_EOF; + } + mc->mc_ki[mc->mc_top] = oldki; + return rc; +} + +/** Complete a delete operation started by #mdb_cursor_del(). */ +static int +mdb_cursor_del0(MDB_cursor *mc) +{ + int rc; + MDB_page *mp; + indx_t ki; + unsigned int nkeys; + + ki = mc->mc_ki[mc->mc_top]; + mdb_node_del(mc, mc->mc_db->md_pad); + mc->mc_db->md_entries--; + rc = mdb_rebalance(mc); + + if (rc == MDB_SUCCESS) { + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + + mp = mc->mc_pg[mc->mc_top]; + nkeys = NUMKEYS(mp); + + /* if mc points past last node in page, find next sibling */ + if (mc->mc_ki[mc->mc_top] >= nkeys) { + rc = mdb_cursor_sibling(mc, 1); + if (rc == MDB_NOTFOUND) { + mc->mc_flags |= C_EOF; + rc = MDB_SUCCESS; + } + } + + /* Adjust other cursors pointing to mp */ + for (m2 = mc->mc_txn->mt_cursors[dbi]; !rc && m2; m2=m2->mc_next) { + m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; + if (! (m2->mc_flags & m3->mc_flags & C_INITIALIZED)) + continue; + if (m3 == mc || m3->mc_snum < mc->mc_snum) + continue; + if (m3->mc_pg[mc->mc_top] == mp) { + if (m3->mc_ki[mc->mc_top] >= ki) { + m3->mc_flags |= C_DEL; + if (m3->mc_ki[mc->mc_top] > ki) + m3->mc_ki[mc->mc_top]--; + else if (mc->mc_db->md_flags & MDB_DUPSORT) + m3->mc_xcursor->mx_cursor.mc_flags |= C_EOF; + } + if (m3->mc_ki[mc->mc_top] >= nkeys) { + rc = mdb_cursor_sibling(m3, 1); + if (rc == MDB_NOTFOUND) { + m3->mc_flags |= C_EOF; + rc = MDB_SUCCESS; + } + } + } + } + mc->mc_flags |= C_DEL; + } + + if (rc) + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +int +mdb_del(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data) +{ + if (!key || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) + return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + + if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) { + /* must ignore any data */ + data = NULL; + } + + return mdb_del0(txn, dbi, key, data, 0); +} + +static int +mdb_del0(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, unsigned flags) +{ + MDB_cursor mc; + MDB_xcursor mx; + MDB_cursor_op op; + MDB_val rdata, *xdata; + int rc, exact = 0; + DKBUF; + + DPRINTF(("====> delete db %u key [%s]", dbi, DKEY(key))); + + mdb_cursor_init(&mc, txn, dbi, &mx); + + if (data) { + op = MDB_GET_BOTH; + rdata = *data; + xdata = &rdata; + } else { + op = MDB_SET; + xdata = NULL; + flags |= MDB_NODUPDATA; + } + rc = mdb_cursor_set(&mc, key, xdata, op, &exact); + if (rc == 0) { + /* let mdb_page_split know about this cursor if needed: + * delete will trigger a rebalance; if it needs to move + * a node from one page to another, it will have to + * update the parent's separator key(s). If the new sepkey + * is larger than the current one, the parent page may + * run out of space, triggering a split. We need this + * cursor to be consistent until the end of the rebalance. + */ + mc.mc_flags |= C_UNTRACK; + mc.mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = &mc; + rc = mdb_cursor_del(&mc, flags); + txn->mt_cursors[dbi] = mc.mc_next; + } + return rc; +} + +/** Split a page and insert a new node. + * @param[in,out] mc Cursor pointing to the page and desired insertion index. + * The cursor will be updated to point to the actual page and index where + * the node got inserted after the split. + * @param[in] newkey The key for the newly inserted node. + * @param[in] newdata The data for the newly inserted node. + * @param[in] newpgno The page number, if the new node is a branch node. + * @param[in] nflags The #NODE_ADD_FLAGS for the new node. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno, + unsigned int nflags) +{ + unsigned int flags; + int rc = MDB_SUCCESS, new_root = 0, did_split = 0; + indx_t newindx; + pgno_t pgno = 0; + int i, j, split_indx, nkeys, pmax; + MDB_env *env = mc->mc_txn->mt_env; + MDB_node *node; + MDB_val sepkey, rkey, xdata, *rdata = &xdata; + MDB_page *copy = NULL; + MDB_page *mp, *rp, *pp; + int ptop; + MDB_cursor mn; + DKBUF; + + mp = mc->mc_pg[mc->mc_top]; + newindx = mc->mc_ki[mc->mc_top]; + nkeys = NUMKEYS(mp); + + DPRINTF(("-----> splitting %s page %"Z"u and adding [%s] at index %i/%i", + IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno, + DKEY(newkey), mc->mc_ki[mc->mc_top], nkeys)); + + /* Create a right sibling. */ + if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp))) + return rc; + DPRINTF(("new right sibling: page %"Z"u", rp->mp_pgno)); + + if (mc->mc_snum < 2) { + if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp))) + goto done; + /* shift current top to make room for new parent */ + mc->mc_pg[1] = mc->mc_pg[0]; + mc->mc_ki[1] = mc->mc_ki[0]; + mc->mc_pg[0] = pp; + mc->mc_ki[0] = 0; + mc->mc_db->md_root = pp->mp_pgno; + DPRINTF(("root split! new root = %"Z"u", pp->mp_pgno)); + mc->mc_db->md_depth++; + new_root = 1; + + /* Add left (implicit) pointer. */ + if ((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0)) != MDB_SUCCESS) { + /* undo the pre-push */ + mc->mc_pg[0] = mc->mc_pg[1]; + mc->mc_ki[0] = mc->mc_ki[1]; + mc->mc_db->md_root = mp->mp_pgno; + mc->mc_db->md_depth--; + goto done; + } + mc->mc_snum = 2; + mc->mc_top = 1; + ptop = 0; + } else { + ptop = mc->mc_top-1; + DPRINTF(("parent branch page is %"Z"u", mc->mc_pg[ptop]->mp_pgno)); + } + + mc->mc_flags |= C_SPLITTING; + mdb_cursor_copy(mc, &mn); + mn.mc_pg[mn.mc_top] = rp; + mn.mc_ki[ptop] = mc->mc_ki[ptop]+1; + + if (nflags & MDB_APPEND) { + mn.mc_ki[mn.mc_top] = 0; + sepkey = *newkey; + split_indx = newindx; + nkeys = 0; + } else { + + split_indx = (nkeys+1) / 2; + + if (IS_LEAF2(rp)) { + char *split, *ins; + int x; + unsigned int lsize, rsize, ksize; + /* Move half of the keys to the right sibling */ + x = mc->mc_ki[mc->mc_top] - split_indx; + ksize = mc->mc_db->md_pad; + split = LEAF2KEY(mp, split_indx, ksize); + rsize = (nkeys - split_indx) * ksize; + lsize = (nkeys - split_indx) * sizeof(indx_t); + mp->mp_lower -= lsize; + rp->mp_lower += lsize; + mp->mp_upper += rsize - lsize; + rp->mp_upper -= rsize - lsize; + sepkey.mv_size = ksize; + if (newindx == split_indx) { + sepkey.mv_data = newkey->mv_data; + } else { + sepkey.mv_data = split; + } + if (x<0) { + ins = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], ksize); + memcpy(rp->mp_ptrs, split, rsize); + sepkey.mv_data = rp->mp_ptrs; + memmove(ins+ksize, ins, (split_indx - mc->mc_ki[mc->mc_top]) * ksize); + memcpy(ins, newkey->mv_data, ksize); + mp->mp_lower += sizeof(indx_t); + mp->mp_upper -= ksize - sizeof(indx_t); + } else { + if (x) + memcpy(rp->mp_ptrs, split, x * ksize); + ins = LEAF2KEY(rp, x, ksize); + memcpy(ins, newkey->mv_data, ksize); + memcpy(ins+ksize, split + x * ksize, rsize - x * ksize); + rp->mp_lower += sizeof(indx_t); + rp->mp_upper -= ksize - sizeof(indx_t); + mc->mc_ki[mc->mc_top] = x; + mc->mc_pg[mc->mc_top] = rp; + } + } else { + int psize, nsize, k; + /* Maximum free space in an empty page */ + pmax = env->me_psize - PAGEHDRSZ; + if (IS_LEAF(mp)) + nsize = mdb_leaf_size(env, newkey, newdata); + else + nsize = mdb_branch_size(env, newkey); + nsize = EVEN(nsize); + + /* grab a page to hold a temporary copy */ + copy = mdb_page_malloc(mc->mc_txn, 1); + if (copy == NULL) { + rc = ENOMEM; + goto done; + } + copy->mp_pgno = mp->mp_pgno; + copy->mp_flags = mp->mp_flags; + copy->mp_lower = (PAGEHDRSZ-PAGEBASE); + copy->mp_upper = env->me_psize - PAGEBASE; + + /* prepare to insert */ + for (i=0, j=0; i<nkeys; i++) { + if (i == newindx) { + copy->mp_ptrs[j++] = 0; + } + copy->mp_ptrs[j++] = mp->mp_ptrs[i]; + } + + /* When items are relatively large the split point needs + * to be checked, because being off-by-one will make the + * difference between success or failure in mdb_node_add. + * + * It's also relevant if a page happens to be laid out + * such that one half of its nodes are all "small" and + * the other half of its nodes are "large." If the new + * item is also "large" and falls on the half with + * "large" nodes, it also may not fit. + * + * As a final tweak, if the new item goes on the last + * spot on the page (and thus, onto the new page), bias + * the split so the new page is emptier than the old page. + * This yields better packing during sequential inserts. + */ + if (nkeys < 20 || nsize > pmax/16 || newindx >= nkeys) { + /* Find split point */ + psize = 0; + if (newindx <= split_indx || newindx >= nkeys) { + i = 0; j = 1; + k = newindx >= nkeys ? nkeys : split_indx+2; + } else { + i = nkeys; j = -1; + k = split_indx-1; + } + for (; i!=k; i+=j) { + if (i == newindx) { + psize += nsize; + node = NULL; + } else { + node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); + psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t); + if (IS_LEAF(mp)) { + if (F_ISSET(node->mn_flags, F_BIGDATA)) + psize += sizeof(pgno_t); + else + psize += NODEDSZ(node); + } + psize = EVEN(psize); + } + if (psize > pmax || i == k-j) { + split_indx = i + (j<0); + break; + } + } + } + if (split_indx == newindx) { + sepkey.mv_size = newkey->mv_size; + sepkey.mv_data = newkey->mv_data; + } else { + node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx] + PAGEBASE); + sepkey.mv_size = node->mn_ksize; + sepkey.mv_data = NODEKEY(node); + } + } + } + + DPRINTF(("separator is %d [%s]", split_indx, DKEY(&sepkey))); + + /* Copy separator key to the parent. + */ + if (SIZELEFT(mn.mc_pg[ptop]) < mdb_branch_size(env, &sepkey)) { + mn.mc_snum--; + mn.mc_top--; + did_split = 1; + rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0); + if (rc) + goto done; + + /* root split? */ + if (mn.mc_snum == mc->mc_snum) { + mc->mc_pg[mc->mc_snum] = mc->mc_pg[mc->mc_top]; + mc->mc_ki[mc->mc_snum] = mc->mc_ki[mc->mc_top]; + mc->mc_pg[mc->mc_top] = mc->mc_pg[ptop]; + mc->mc_ki[mc->mc_top] = mc->mc_ki[ptop]; + mc->mc_snum++; + mc->mc_top++; + ptop++; + } + /* Right page might now have changed parent. + * Check if left page also changed parent. + */ + if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && + mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) { + for (i=0; i<ptop; i++) { + mc->mc_pg[i] = mn.mc_pg[i]; + mc->mc_ki[i] = mn.mc_ki[i]; + } + mc->mc_pg[ptop] = mn.mc_pg[ptop]; + if (mn.mc_ki[ptop]) { + mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1; + } else { + /* find right page's left sibling */ + mc->mc_ki[ptop] = mn.mc_ki[ptop]; + mdb_cursor_sibling(mc, 0); + } + } + } else { + mn.mc_top--; + rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0); + mn.mc_top++; + } + mc->mc_flags ^= C_SPLITTING; + if (rc != MDB_SUCCESS) { + goto done; + } + if (nflags & MDB_APPEND) { + mc->mc_pg[mc->mc_top] = rp; + mc->mc_ki[mc->mc_top] = 0; + rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags); + if (rc) + goto done; + for (i=0; i<mc->mc_top; i++) + mc->mc_ki[i] = mn.mc_ki[i]; + } else if (!IS_LEAF2(mp)) { + /* Move nodes */ + mc->mc_pg[mc->mc_top] = rp; + i = split_indx; + j = 0; + do { + if (i == newindx) { + rkey.mv_data = newkey->mv_data; + rkey.mv_size = newkey->mv_size; + if (IS_LEAF(mp)) { + rdata = newdata; + } else + pgno = newpgno; + flags = nflags; + /* Update index for the new key. */ + mc->mc_ki[mc->mc_top] = j; + } else { + node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); + rkey.mv_data = NODEKEY(node); + rkey.mv_size = node->mn_ksize; + if (IS_LEAF(mp)) { + xdata.mv_data = NODEDATA(node); + xdata.mv_size = NODEDSZ(node); + rdata = &xdata; + } else + pgno = NODEPGNO(node); + flags = node->mn_flags; + } + + if (!IS_LEAF(mp) && j == 0) { + /* First branch index doesn't need key data. */ + rkey.mv_size = 0; + } + + rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags); + if (rc) + goto done; + if (i == nkeys) { + i = 0; + j = 0; + mc->mc_pg[mc->mc_top] = copy; + } else { + i++; + j++; + } + } while (i != split_indx); + + nkeys = NUMKEYS(copy); + for (i=0; i<nkeys; i++) + mp->mp_ptrs[i] = copy->mp_ptrs[i]; + mp->mp_lower = copy->mp_lower; + mp->mp_upper = copy->mp_upper; + memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1), + env->me_psize - copy->mp_upper - PAGEBASE); + + /* reset back to original page */ + if (newindx < split_indx) { + mc->mc_pg[mc->mc_top] = mp; + if (nflags & MDB_RESERVE) { + node = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + if (!(node->mn_flags & F_BIGDATA)) + newdata->mv_data = NODEDATA(node); + } + } else { + mc->mc_pg[mc->mc_top] = rp; + mc->mc_ki[ptop]++; + /* Make sure mc_ki is still valid. + */ + if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && + mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) { + for (i=0; i<=ptop; i++) { + mc->mc_pg[i] = mn.mc_pg[i]; + mc->mc_ki[i] = mn.mc_ki[i]; + } + } + } + } + + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + int fixup = NUMKEYS(mp); + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == mc) + continue; + if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) + continue; + if (m3->mc_flags & C_SPLITTING) + continue; + if (new_root) { + int k; + /* root split */ + for (k=m3->mc_top; k>=0; k--) { + m3->mc_ki[k+1] = m3->mc_ki[k]; + m3->mc_pg[k+1] = m3->mc_pg[k]; + } + if (m3->mc_ki[0] >= split_indx) { + m3->mc_ki[0] = 1; + } else { + m3->mc_ki[0] = 0; + } + m3->mc_pg[0] = mc->mc_pg[0]; + m3->mc_snum++; + m3->mc_top++; + } + if (m3->mc_top >= mc->mc_top && m3->mc_pg[mc->mc_top] == mp) { + if (m3->mc_ki[mc->mc_top] >= newindx && !(nflags & MDB_SPLIT_REPLACE)) + m3->mc_ki[mc->mc_top]++; + if (m3->mc_ki[mc->mc_top] >= fixup) { + m3->mc_pg[mc->mc_top] = rp; + m3->mc_ki[mc->mc_top] -= fixup; + m3->mc_ki[ptop] = mn.mc_ki[ptop]; + } + } else if (!did_split && m3->mc_top >= ptop && m3->mc_pg[ptop] == mc->mc_pg[ptop] && + m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { + m3->mc_ki[ptop]++; + } + } + } + DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp))); + +done: + if (copy) /* tmp page */ + mdb_page_free(env, copy); + if (rc) + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + return rc; +} + +int +mdb_put(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, unsigned int flags) +{ + MDB_cursor mc; + MDB_xcursor mx; + + if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags) + return EINVAL; + + mdb_cursor_init(&mc, txn, dbi, &mx); + return mdb_cursor_put(&mc, key, data, flags); +} + +#ifndef MDB_WBUF +#define MDB_WBUF (1024*1024) +#endif + + /** State needed for a compacting copy. */ +typedef struct mdb_copy { + pthread_mutex_t mc_mutex; + pthread_cond_t mc_cond; + char *mc_wbuf[2]; + char *mc_over[2]; + MDB_env *mc_env; + MDB_txn *mc_txn; + int mc_wlen[2]; + int mc_olen[2]; + pgno_t mc_next_pgno; + HANDLE mc_fd; + int mc_status; + volatile int mc_new; + int mc_toggle; + +} mdb_copy; + + /** Dedicated writer thread for compacting copy. */ +static THREAD_RET ESECT +mdb_env_copythr(void *arg) +{ + mdb_copy *my = arg; + char *ptr; + int toggle = 0, wsize, rc; +#ifdef _WIN32 + DWORD len; +#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) +#else + int len; +#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) +#endif + + pthread_mutex_lock(&my->mc_mutex); + my->mc_new = 0; + pthread_cond_signal(&my->mc_cond); + for(;;) { + while (!my->mc_new) + pthread_cond_wait(&my->mc_cond, &my->mc_mutex); + if (my->mc_new < 0) { + my->mc_new = 0; + break; + } + my->mc_new = 0; + wsize = my->mc_wlen[toggle]; + ptr = my->mc_wbuf[toggle]; +again: + while (wsize > 0) { + DO_WRITE(rc, my->mc_fd, ptr, wsize, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + wsize -= len; + continue; + } else { + rc = EIO; + break; + } + } + if (rc) { + my->mc_status = rc; + break; + } + /* If there's an overflow page tail, write it too */ + if (my->mc_olen[toggle]) { + wsize = my->mc_olen[toggle]; + ptr = my->mc_over[toggle]; + my->mc_olen[toggle] = 0; + goto again; + } + my->mc_wlen[toggle] = 0; + toggle ^= 1; + pthread_cond_signal(&my->mc_cond); + } + pthread_cond_signal(&my->mc_cond); + pthread_mutex_unlock(&my->mc_mutex); + return (THREAD_RET)0; +#undef DO_WRITE +} + + /** Tell the writer thread there's a buffer ready to write */ +static int ESECT +mdb_env_cthr_toggle(mdb_copy *my, int st) +{ + int toggle = my->mc_toggle ^ 1; + pthread_mutex_lock(&my->mc_mutex); + if (my->mc_status) { + pthread_mutex_unlock(&my->mc_mutex); + return my->mc_status; + } + while (my->mc_new == 1) + pthread_cond_wait(&my->mc_cond, &my->mc_mutex); + my->mc_new = st; + my->mc_toggle = toggle; + pthread_cond_signal(&my->mc_cond); + pthread_mutex_unlock(&my->mc_mutex); + return 0; +} + + /** Depth-first tree traversal for compacting copy. */ +static int ESECT +mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) +{ + MDB_cursor mc; + MDB_txn *txn = my->mc_txn; + MDB_node *ni; + MDB_page *mo, *mp, *leaf; + char *buf, *ptr; + int rc, toggle; + unsigned int i; + + /* Empty DB, nothing to do */ + if (*pg == P_INVALID) + return MDB_SUCCESS; + + mc.mc_snum = 1; + mc.mc_top = 0; + mc.mc_txn = txn; + + rc = mdb_page_get(my->mc_txn, *pg, &mc.mc_pg[0], NULL); + if (rc) + return rc; + rc = mdb_page_search_root(&mc, NULL, MDB_PS_FIRST); + if (rc) + return rc; + + /* Make cursor pages writable */ + buf = ptr = malloc(my->mc_env->me_psize * mc.mc_snum); + if (buf == NULL) + return ENOMEM; + + for (i=0; i<mc.mc_top; i++) { + mdb_page_copy((MDB_page *)ptr, mc.mc_pg[i], my->mc_env->me_psize); + mc.mc_pg[i] = (MDB_page *)ptr; + ptr += my->mc_env->me_psize; + } + + /* This is writable space for a leaf page. Usually not needed. */ + leaf = (MDB_page *)ptr; + + toggle = my->mc_toggle; + while (mc.mc_snum > 0) { + unsigned n; + mp = mc.mc_pg[mc.mc_top]; + n = NUMKEYS(mp); + + if (IS_LEAF(mp)) { + if (!IS_LEAF2(mp) && !(flags & F_DUPDATA)) { + for (i=0; i<n; i++) { + ni = NODEPTR(mp, i); + if (ni->mn_flags & F_BIGDATA) { + MDB_page *omp; + pgno_t pg; + + /* Need writable leaf */ + if (mp != leaf) { + mc.mc_pg[mc.mc_top] = leaf; + mdb_page_copy(leaf, mp, my->mc_env->me_psize); + mp = leaf; + ni = NODEPTR(mp, i); + } + + memcpy(&pg, NODEDATA(ni), sizeof(pg)); + rc = mdb_page_get(txn, pg, &omp, NULL); + if (rc) + goto done; + if (my->mc_wlen[toggle] >= MDB_WBUF) { + rc = mdb_env_cthr_toggle(my, 1); + if (rc) + goto done; + toggle = my->mc_toggle; + } + mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); + memcpy(mo, omp, my->mc_env->me_psize); + mo->mp_pgno = my->mc_next_pgno; + my->mc_next_pgno += omp->mp_pages; + my->mc_wlen[toggle] += my->mc_env->me_psize; + if (omp->mp_pages > 1) { + my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1); + my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize; + rc = mdb_env_cthr_toggle(my, 1); + if (rc) + goto done; + toggle = my->mc_toggle; + } + memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t)); + } else if (ni->mn_flags & F_SUBDATA) { + MDB_db db; + + /* Need writable leaf */ + if (mp != leaf) { + mc.mc_pg[mc.mc_top] = leaf; + mdb_page_copy(leaf, mp, my->mc_env->me_psize); + mp = leaf; + ni = NODEPTR(mp, i); + } + + memcpy(&db, NODEDATA(ni), sizeof(db)); + my->mc_toggle = toggle; + rc = mdb_env_cwalk(my, &db.md_root, ni->mn_flags & F_DUPDATA); + if (rc) + goto done; + toggle = my->mc_toggle; + memcpy(NODEDATA(ni), &db, sizeof(db)); + } + } + } + } else { + mc.mc_ki[mc.mc_top]++; + if (mc.mc_ki[mc.mc_top] < n) { + pgno_t pg; +again: + ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]); + pg = NODEPGNO(ni); + rc = mdb_page_get(txn, pg, &mp, NULL); + if (rc) + goto done; + mc.mc_top++; + mc.mc_snum++; + mc.mc_ki[mc.mc_top] = 0; + if (IS_BRANCH(mp)) { + /* Whenever we advance to a sibling branch page, + * we must proceed all the way down to its first leaf. + */ + mdb_page_copy(mc.mc_pg[mc.mc_top], mp, my->mc_env->me_psize); + goto again; + } else + mc.mc_pg[mc.mc_top] = mp; + continue; + } + } + if (my->mc_wlen[toggle] >= MDB_WBUF) { + rc = mdb_env_cthr_toggle(my, 1); + if (rc) + goto done; + toggle = my->mc_toggle; + } + mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); + mdb_page_copy(mo, mp, my->mc_env->me_psize); + mo->mp_pgno = my->mc_next_pgno++; + my->mc_wlen[toggle] += my->mc_env->me_psize; + if (mc.mc_top) { + /* Update parent if there is one */ + ni = NODEPTR(mc.mc_pg[mc.mc_top-1], mc.mc_ki[mc.mc_top-1]); + SETPGNO(ni, mo->mp_pgno); + mdb_cursor_pop(&mc); + } else { + /* Otherwise we're done */ + *pg = mo->mp_pgno; + break; + } + } +done: + free(buf); + return rc; +} + + /** Copy environment with compaction. */ +static int ESECT +mdb_env_copyfd1(MDB_env *env, HANDLE fd) +{ + MDB_meta *mm; + MDB_page *mp; + mdb_copy my; + MDB_txn *txn = NULL; + pthread_t thr; + int rc; + +#ifdef _WIN32 + my.mc_mutex = CreateMutex(NULL, FALSE, NULL); + my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL); + my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_os_psize); + if (my.mc_wbuf[0] == NULL) + return errno; +#else + pthread_mutex_init(&my.mc_mutex, NULL); + pthread_cond_init(&my.mc_cond, NULL); +#ifdef HAVE_MEMALIGN + my.mc_wbuf[0] = memalign(env->me_os_psize, MDB_WBUF*2); + if (my.mc_wbuf[0] == NULL) + return errno; +#else + rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_os_psize, MDB_WBUF*2); + if (rc) + return rc; +#endif +#endif + memset(my.mc_wbuf[0], 0, MDB_WBUF*2); + my.mc_wbuf[1] = my.mc_wbuf[0] + MDB_WBUF; + my.mc_wlen[0] = 0; + my.mc_wlen[1] = 0; + my.mc_olen[0] = 0; + my.mc_olen[1] = 0; + my.mc_next_pgno = 2; + my.mc_status = 0; + my.mc_new = 1; + my.mc_toggle = 0; + my.mc_env = env; + my.mc_fd = fd; + THREAD_CREATE(thr, mdb_env_copythr, &my); + + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) + return rc; + + mp = (MDB_page *)my.mc_wbuf[0]; + memset(mp, 0, 2*env->me_psize); + mp->mp_pgno = 0; + mp->mp_flags = P_META; + mm = (MDB_meta *)METADATA(mp); + mdb_env_init_meta0(env, mm); + mm->mm_address = env->me_metas[0]->mm_address; + + mp = (MDB_page *)(my.mc_wbuf[0] + env->me_psize); + mp->mp_pgno = 1; + mp->mp_flags = P_META; + *(MDB_meta *)METADATA(mp) = *mm; + mm = (MDB_meta *)METADATA(mp); + + /* Count the number of free pages, subtract from lastpg to find + * number of active pages + */ + { + MDB_ID freecount = 0; + MDB_cursor mc; + MDB_val key, data; + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) + freecount += *(MDB_ID *)data.mv_data; + freecount += txn->mt_dbs[0].md_branch_pages + + txn->mt_dbs[0].md_leaf_pages + + txn->mt_dbs[0].md_overflow_pages; + + /* Set metapage 1 */ + mm->mm_last_pg = txn->mt_next_pgno - freecount - 1; + mm->mm_dbs[1] = txn->mt_dbs[1]; + if (mm->mm_last_pg > 1) { + mm->mm_dbs[1].md_root = mm->mm_last_pg; + mm->mm_txnid = 1; + } else { + mm->mm_dbs[1].md_root = P_INVALID; + } + } + my.mc_wlen[0] = env->me_psize * 2; + my.mc_txn = txn; + pthread_mutex_lock(&my.mc_mutex); + while(my.mc_new) + pthread_cond_wait(&my.mc_cond, &my.mc_mutex); + pthread_mutex_unlock(&my.mc_mutex); + rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0); + if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) + rc = mdb_env_cthr_toggle(&my, 1); + mdb_env_cthr_toggle(&my, -1); + pthread_mutex_lock(&my.mc_mutex); + while(my.mc_new) + pthread_cond_wait(&my.mc_cond, &my.mc_mutex); + pthread_mutex_unlock(&my.mc_mutex); + THREAD_FINISH(thr); + + mdb_txn_abort(txn); +#ifdef _WIN32 + CloseHandle(my.mc_cond); + CloseHandle(my.mc_mutex); + _aligned_free(my.mc_wbuf[0]); +#else + pthread_cond_destroy(&my.mc_cond); + pthread_mutex_destroy(&my.mc_mutex); + free(my.mc_wbuf[0]); +#endif + return rc; +} + + /** Copy environment as-is. */ +static int ESECT +mdb_env_copyfd0(MDB_env *env, HANDLE fd) +{ + MDB_txn *txn = NULL; + mdb_mutex_t *wmutex = NULL; + int rc; + size_t wsize; + char *ptr; +#ifdef _WIN32 + DWORD len, w2; +#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) +#else + ssize_t len; + size_t w2; +#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) +#endif + + /* Do the lock/unlock of the reader mutex before starting the + * write txn. Otherwise other read txns could block writers. + */ + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) + return rc; + + if (env->me_txns) { + /* We must start the actual read txn after blocking writers */ + mdb_txn_reset0(txn, "reset-stage1"); + + /* Temporarily block writers until we snapshot the meta pages */ + wmutex = MDB_MUTEX(env, w); + if (LOCK_MUTEX(rc, env, wmutex)) + goto leave; + + rc = mdb_txn_renew0(txn); + if (rc) { + UNLOCK_MUTEX(wmutex); + goto leave; + } + } + + wsize = env->me_psize * 2; + ptr = env->me_map; + w2 = wsize; + while (w2 > 0) { + DO_WRITE(rc, fd, ptr, w2, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + w2 -= len; + continue; + } else { + /* Non-blocking or async handles are not supported */ + rc = EIO; + break; + } + } + if (wmutex) + UNLOCK_MUTEX(wmutex); + + if (rc) + goto leave; + + w2 = txn->mt_next_pgno * env->me_psize; + { + size_t fsize = 0; + if ((rc = mdb_fsize(env->me_fd, &fsize))) + goto leave; + if (w2 > fsize) + w2 = fsize; + } + wsize = w2 - wsize; + while (wsize > 0) { + if (wsize > MAX_WRITE) + w2 = MAX_WRITE; + else + w2 = wsize; + DO_WRITE(rc, fd, ptr, w2, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + wsize -= len; + continue; + } else { + rc = EIO; + break; + } + } + +leave: + mdb_txn_abort(txn); + return rc; +} + +int ESECT +mdb_env_copyfd2(MDB_env *env, HANDLE fd, unsigned int flags) +{ + if (flags & MDB_CP_COMPACT) + return mdb_env_copyfd1(env, fd); + else + return mdb_env_copyfd0(env, fd); +} + +int ESECT +mdb_env_copyfd(MDB_env *env, HANDLE fd) +{ + return mdb_env_copyfd2(env, fd, 0); +} + +int ESECT +mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) +{ + int rc, len; + char *lpath; + HANDLE newfd = INVALID_HANDLE_VALUE; + + if (env->me_flags & MDB_NOSUBDIR) { + lpath = (char *)path; + } else { + len = strlen(path); + len += sizeof(DATANAME); + lpath = malloc(len); + if (!lpath) + return ENOMEM; + sprintf(lpath, "%s" DATANAME, path); + } + + /* The destination path must exist, but the destination file must not. + * We don't want the OS to cache the writes, since the source data is + * already in the OS cache. + */ +#ifdef _WIN32 + newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, + FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); +#else + newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666); +#endif + if (newfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + + if (env->me_psize >= env->me_os_psize) { +#ifdef O_DIRECT + /* Set O_DIRECT if the file system supports it */ + if ((rc = fcntl(newfd, F_GETFL)) != -1) + (void) fcntl(newfd, F_SETFL, rc | O_DIRECT); +#endif +#ifdef F_NOCACHE /* __APPLE__ */ + rc = fcntl(newfd, F_NOCACHE, 1); + if (rc) { + rc = ErrCode(); + goto leave; + } +#endif + } + + rc = mdb_env_copyfd2(env, newfd, flags); + +leave: + if (!(env->me_flags & MDB_NOSUBDIR)) + free(lpath); + if (newfd != INVALID_HANDLE_VALUE) + if (close(newfd) < 0 && rc == MDB_SUCCESS) + rc = ErrCode(); + + return rc; +} + +int ESECT +mdb_env_copy(MDB_env *env, const char *path) +{ + return mdb_env_copy2(env, path, 0); +} + +int ESECT +mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) +{ + if (flag & (env->me_map ? ~CHANGEABLE : ~(CHANGEABLE|CHANGELESS))) + return EINVAL; + if (onoff) + env->me_flags |= flag; + else + env->me_flags &= ~flag; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_get_flags(MDB_env *env, unsigned int *arg) +{ + if (!env || !arg) + return EINVAL; + + *arg = env->me_flags; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_set_userctx(MDB_env *env, void *ctx) +{ + if (!env) + return EINVAL; + env->me_userctx = ctx; + return MDB_SUCCESS; +} + +void * ESECT +mdb_env_get_userctx(MDB_env *env) +{ + return env ? env->me_userctx : NULL; +} + +int ESECT +mdb_env_set_assert(MDB_env *env, MDB_assert_func *func) +{ + if (!env) + return EINVAL; +#ifndef NDEBUG + env->me_assert_func = func; +#endif + return MDB_SUCCESS; +} + +int ESECT +mdb_env_get_path(MDB_env *env, const char **arg) +{ + if (!env || !arg) + return EINVAL; + + *arg = env->me_path; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg) +{ + if (!env || !arg) + return EINVAL; + + *arg = env->me_fd; + return MDB_SUCCESS; +} + +/** Common code for #mdb_stat() and #mdb_env_stat(). + * @param[in] env the environment to operate in. + * @param[in] db the #MDB_db record containing the stats to return. + * @param[out] arg the address of an #MDB_stat structure to receive the stats. + * @return 0, this function always succeeds. + */ +static int ESECT +mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg) +{ + arg->ms_psize = env->me_psize; + arg->ms_depth = db->md_depth; + arg->ms_branch_pages = db->md_branch_pages; + arg->ms_leaf_pages = db->md_leaf_pages; + arg->ms_overflow_pages = db->md_overflow_pages; + arg->ms_entries = db->md_entries; + + return MDB_SUCCESS; +} + +int ESECT +mdb_env_stat(MDB_env *env, MDB_stat *arg) +{ + int toggle; + + if (env == NULL || arg == NULL) + return EINVAL; + + toggle = mdb_env_pick_meta(env); + + return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg); +} + +int ESECT +mdb_env_info(MDB_env *env, MDB_envinfo *arg) +{ + int toggle; + + if (env == NULL || arg == NULL) + return EINVAL; + + toggle = mdb_env_pick_meta(env); + arg->me_mapaddr = env->me_metas[toggle]->mm_address; + arg->me_mapsize = env->me_mapsize; + arg->me_maxreaders = env->me_maxreaders; + + /* me_numreaders may be zero if this process never used any readers. Use + * the shared numreader count if it exists. + */ + arg->me_numreaders = env->me_txns ? env->me_txns->mti_numreaders : env->me_numreaders; + + arg->me_last_pgno = env->me_metas[toggle]->mm_last_pg; + arg->me_last_txnid = env->me_metas[toggle]->mm_txnid; + return MDB_SUCCESS; +} + +/** Set the default comparison functions for a database. + * Called immediately after a database is opened to set the defaults. + * The user can then override them with #mdb_set_compare() or + * #mdb_set_dupsort(). + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + */ +static void +mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi) +{ + uint16_t f = txn->mt_dbs[dbi].md_flags; + + txn->mt_dbxs[dbi].md_cmp = + (f & MDB_REVERSEKEY) ? mdb_cmp_memnr : + (f & MDB_INTEGERKEY) ? mdb_cmp_cint : mdb_cmp_memn; + + txn->mt_dbxs[dbi].md_dcmp = + !(f & MDB_DUPSORT) ? 0 : + ((f & MDB_INTEGERDUP) + ? ((f & MDB_DUPFIXED) ? mdb_cmp_int : mdb_cmp_cint) + : ((f & MDB_REVERSEDUP) ? mdb_cmp_memnr : mdb_cmp_memn)); +} + +int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) +{ + MDB_val key, data; + MDB_dbi i; + MDB_cursor mc; + MDB_db dummy; + int rc, dbflag, exact; + unsigned int unused = 0, seq; + size_t len; + + if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) { + mdb_default_cmp(txn, FREE_DBI); + } + + if ((flags & VALID_FLAGS) != flags) + return EINVAL; + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + /* main DB? */ + if (!name) { + *dbi = MAIN_DBI; + if (flags & PERSISTENT_FLAGS) { + uint16_t f2 = flags & PERSISTENT_FLAGS; + /* make sure flag changes get committed */ + if ((txn->mt_dbs[MAIN_DBI].md_flags | f2) != txn->mt_dbs[MAIN_DBI].md_flags) { + txn->mt_dbs[MAIN_DBI].md_flags |= f2; + txn->mt_flags |= MDB_TXN_DIRTY; + } + } + mdb_default_cmp(txn, MAIN_DBI); + return MDB_SUCCESS; + } + + if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) { + mdb_default_cmp(txn, MAIN_DBI); + } + + /* Is the DB already open? */ + len = strlen(name); + for (i=2; i<txn->mt_numdbs; i++) { + if (!txn->mt_dbxs[i].md_name.mv_size) { + /* Remember this free slot */ + if (!unused) unused = i; + continue; + } + if (len == txn->mt_dbxs[i].md_name.mv_size && + !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) { + *dbi = i; + return MDB_SUCCESS; + } + } + + /* If no free slot and max hit, fail */ + if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs) + return MDB_DBS_FULL; + + /* Cannot mix named databases with some mainDB flags */ + if (txn->mt_dbs[MAIN_DBI].md_flags & (MDB_DUPSORT|MDB_INTEGERKEY)) + return (flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND; + + /* Find the DB info */ + dbflag = DB_NEW|DB_VALID; + exact = 0; + key.mv_size = len; + key.mv_data = (void *)name; + mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); + rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact); + if (rc == MDB_SUCCESS) { + /* make sure this is actually a DB */ + MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); + if (!(node->mn_flags & F_SUBDATA)) + return MDB_INCOMPATIBLE; + } else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) { + /* Create if requested */ + data.mv_size = sizeof(MDB_db); + data.mv_data = &dummy; + memset(&dummy, 0, sizeof(dummy)); + dummy.md_root = P_INVALID; + dummy.md_flags = flags & PERSISTENT_FLAGS; + rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); + dbflag |= DB_DIRTY; + } + + /* OK, got info, add to table */ + if (rc == MDB_SUCCESS) { + unsigned int slot = unused ? unused : txn->mt_numdbs; + txn->mt_dbxs[slot].md_name.mv_data = strdup(name); + txn->mt_dbxs[slot].md_name.mv_size = len; + txn->mt_dbxs[slot].md_rel = NULL; + txn->mt_dbflags[slot] = dbflag; + /* txn-> and env-> are the same in read txns, use + * tmp variable to avoid undefined assignment + */ + seq = ++txn->mt_env->me_dbiseqs[slot]; + txn->mt_dbiseqs[slot] = seq; + + memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db)); + *dbi = slot; + mdb_default_cmp(txn, slot); + if (!unused) { + txn->mt_numdbs++; + } + } + + return rc; +} + +int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) +{ + if (!arg || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + if (txn->mt_dbflags[dbi] & DB_STALE) { + MDB_cursor mc; + MDB_xcursor mx; + /* Stale, must read the DB's root. cursor_init does it for us. */ + mdb_cursor_init(&mc, txn, dbi, &mx); + } + return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg); +} + +void mdb_dbi_close(MDB_env *env, MDB_dbi dbi) +{ + char *ptr; + if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs) + return; + ptr = env->me_dbxs[dbi].md_name.mv_data; + /* If there was no name, this was already closed */ + if (ptr) { + env->me_dbxs[dbi].md_name.mv_data = NULL; + env->me_dbxs[dbi].md_name.mv_size = 0; + env->me_dbflags[dbi] = 0; + env->me_dbiseqs[dbi]++; + free(ptr); + } +} + +int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags) +{ + /* We could return the flags for the FREE_DBI too but what's the point? */ + if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + *flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS; + return MDB_SUCCESS; +} + +/** Add all the DB's pages to the free list. + * @param[in] mc Cursor on the DB to free. + * @param[in] subs non-Zero to check for sub-DBs in this DB. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_drop0(MDB_cursor *mc, int subs) +{ + int rc; + + rc = mdb_page_search(mc, NULL, MDB_PS_FIRST); + if (rc == MDB_SUCCESS) { + MDB_txn *txn = mc->mc_txn; + MDB_node *ni; + MDB_cursor mx; + unsigned int i; + + /* LEAF2 pages have no nodes, cannot have sub-DBs */ + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) + mdb_cursor_pop(mc); + + mdb_cursor_copy(mc, &mx); + while (mc->mc_snum > 0) { + MDB_page *mp = mc->mc_pg[mc->mc_top]; + unsigned n = NUMKEYS(mp); + if (IS_LEAF(mp)) { + for (i=0; i<n; i++) { + ni = NODEPTR(mp, i); + if (ni->mn_flags & F_BIGDATA) { + MDB_page *omp; + pgno_t pg; + memcpy(&pg, NODEDATA(ni), sizeof(pg)); + rc = mdb_page_get(txn, pg, &omp, NULL); + if (rc != 0) + goto done; + mdb_cassert(mc, IS_OVERFLOW(omp)); + rc = mdb_midl_append_range(&txn->mt_free_pgs, + pg, omp->mp_pages); + if (rc) + goto done; + } else if (subs && (ni->mn_flags & F_SUBDATA)) { + mdb_xcursor_init1(mc, ni); + rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); + if (rc) + goto done; + } + } + } else { + if ((rc = mdb_midl_need(&txn->mt_free_pgs, n)) != 0) + goto done; + for (i=0; i<n; i++) { + pgno_t pg; + ni = NODEPTR(mp, i); + pg = NODEPGNO(ni); + /* free it */ + mdb_midl_xappend(txn->mt_free_pgs, pg); + } + } + if (!mc->mc_top) + break; + mc->mc_ki[mc->mc_top] = i; + rc = mdb_cursor_sibling(mc, 1); + if (rc) { + if (rc != MDB_NOTFOUND) + goto done; + /* no more siblings, go back to beginning + * of previous level. + */ + mdb_cursor_pop(mc); + mc->mc_ki[0] = 0; + for (i=1; i<mc->mc_snum; i++) { + mc->mc_ki[i] = 0; + mc->mc_pg[i] = mx.mc_pg[i]; + } + } + } + /* free it */ + rc = mdb_midl_append(&txn->mt_free_pgs, mc->mc_db->md_root); +done: + if (rc) + txn->mt_flags |= MDB_TXN_ERROR; + } else if (rc == MDB_NOTFOUND) { + rc = MDB_SUCCESS; + } + return rc; +} + +int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del) +{ + MDB_cursor *mc, *m2; + int rc; + + if ((unsigned)del > 1 || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; + + if (dbi > MAIN_DBI && TXN_DBI_CHANGED(txn, dbi)) + return MDB_BAD_DBI; + + rc = mdb_cursor_open(txn, dbi, &mc); + if (rc) + return rc; + + rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT); + /* Invalidate the dropped DB's cursors */ + for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) + m2->mc_flags &= ~(C_INITIALIZED|C_EOF); + if (rc) + goto leave; + + /* Can't delete the main DB */ + if (del && dbi > MAIN_DBI) { + rc = mdb_del0(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, 0); + if (!rc) { + txn->mt_dbflags[dbi] = DB_STALE; + mdb_dbi_close(txn->mt_env, dbi); + } else { + txn->mt_flags |= MDB_TXN_ERROR; + } + } else { + /* reset the DB record, mark it dirty */ + txn->mt_dbflags[dbi] |= DB_DIRTY; + txn->mt_dbs[dbi].md_depth = 0; + txn->mt_dbs[dbi].md_branch_pages = 0; + txn->mt_dbs[dbi].md_leaf_pages = 0; + txn->mt_dbs[dbi].md_overflow_pages = 0; + txn->mt_dbs[dbi].md_entries = 0; + txn->mt_dbs[dbi].md_root = P_INVALID; + + txn->mt_flags |= MDB_TXN_DIRTY; + } +leave: + mdb_cursor_close(mc); + return rc; +} + +int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) +{ + if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + txn->mt_dbxs[dbi].md_cmp = cmp; + return MDB_SUCCESS; +} + +int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) +{ + if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + txn->mt_dbxs[dbi].md_dcmp = cmp; + return MDB_SUCCESS; +} + +int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel) +{ + if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + txn->mt_dbxs[dbi].md_rel = rel; + return MDB_SUCCESS; +} + +int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx) +{ + if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + return EINVAL; + + txn->mt_dbxs[dbi].md_relctx = ctx; + return MDB_SUCCESS; +} + +int ESECT +mdb_env_get_maxkeysize(MDB_env *env) +{ + return ENV_MAXKEY(env); +} + +int ESECT +mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) +{ + unsigned int i, rdrs; + MDB_reader *mr; + char buf[64]; + int rc = 0, first = 1; + + if (!env || !func) + return -1; + if (!env->me_txns) { + return func("(no reader locks)\n", ctx); + } + rdrs = env->me_txns->mti_numreaders; + mr = env->me_txns->mti_readers; + for (i=0; i<rdrs; i++) { + if (mr[i].mr_pid) { + txnid_t txnid = mr[i].mr_txnid; + sprintf(buf, txnid == (txnid_t)-1 ? + "%10d %"Z"x -\n" : "%10d %"Z"x %"Z"u\n", + (int)mr[i].mr_pid, (size_t)mr[i].mr_tid, txnid); + if (first) { + first = 0; + rc = func(" pid thread txnid\n", ctx); + if (rc < 0) + break; + } + rc = func(buf, ctx); + if (rc < 0) + break; + } + } + if (first) { + rc = func("(no active readers)\n", ctx); + } + return rc; +} + +/** Insert pid into list if not already present. + * return -1 if already present. + */ +static int ESECT +mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid) +{ + /* binary search of pid in list */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = ids[0]; + + while( 0 < n ) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = pid - ids[cursor]; + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor; + n -= pivot + 1; + + } else { + /* found, so it's a duplicate */ + return -1; + } + } + + if( val > 0 ) { + ++cursor; + } + ids[0]++; + for (n = ids[0]; n > cursor; n--) + ids[n] = ids[n-1]; + ids[n] = pid; + return 0; +} + +int ESECT +mdb_reader_check(MDB_env *env, int *dead) +{ + if (!env) + return EINVAL; + if (dead) + *dead = 0; + return env->me_txns ? mdb_reader_check0(env, 0, dead) : MDB_SUCCESS; +} + +/** As #mdb_reader_check(). rlocked = <caller locked the reader mutex>. */ +static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead) +{ + mdb_mutex_t *rmutex = rlocked ? NULL : MDB_MUTEX(env, r); + unsigned int i, j, rdrs; + MDB_reader *mr; + MDB_PID_T *pids, pid; + int rc = MDB_SUCCESS, count = 0; + + rdrs = env->me_txns->mti_numreaders; + pids = malloc((rdrs+1) * sizeof(MDB_PID_T)); + if (!pids) + return ENOMEM; + pids[0] = 0; + mr = env->me_txns->mti_readers; + for (i=0; i<rdrs; i++) { + pid = mr[i].mr_pid; + if (pid && pid != env->me_pid) { + if (mdb_pid_insert(pids, pid) == 0) { + if (!mdb_reader_pid(env, Pidcheck, pid)) { + /* Stale reader found */ + j = i; + if (rmutex) { + if ((rc = LOCK_MUTEX0(rmutex)) != 0) { + if ((rc = mdb_mutex_failed(env, rmutex, rc))) + break; + rdrs = 0; /* the above checked all readers */ + } else { + /* Recheck, a new process may have reused pid */ + if (mdb_reader_pid(env, Pidcheck, pid)) + j = rdrs; + } + } + for (; j<rdrs; j++) + if (mr[j].mr_pid == pid) { + DPRINTF(("clear stale reader pid %u txn %"Z"d", + (unsigned) pid, mr[j].mr_txnid)); + mr[j].mr_pid = 0; + count++; + } + if (rmutex) + UNLOCK_MUTEX(rmutex); + } + } + } + } + free(pids); + if (dead) + *dead = count; + return rc; +} + +#ifdef MDB_ROBUST_SUPPORTED +/** Handle #LOCK_MUTEX0() failure. + * Try to repair the lock file if the mutex owner died. + * @param[in] env the environment handle + * @param[in] mutex LOCK_MUTEX0() mutex + * @param[in] rc LOCK_MUTEX0() error (nonzero) + * @return 0 on success with the mutex locked, or an error code on failure. + */ +static int mdb_mutex_failed(MDB_env *env, mdb_mutex_t *mutex, int rc) +{ + int toggle, rlocked, rc2; + + if (rc == MDB_OWNERDEAD) { + /* We own the mutex. Clean up after dead previous owner. */ + rc = MDB_SUCCESS; + rlocked = (mutex == MDB_MUTEX(env, r)); + if (!rlocked) { + /* Keep mti_txnid updated, otherwise next writer can + * overwrite data which latest meta page refers to. + */ + toggle = mdb_env_pick_meta(env); + env->me_txns->mti_txnid = env->me_metas[toggle]->mm_txnid; + /* env is hosed if the dead thread was ours */ + if (env->me_txn) { + env->me_flags |= MDB_FATAL_ERROR; + env->me_txn = NULL; + rc = MDB_PANIC; + } + } + DPRINTF(("%cmutex owner died, %s", (rlocked ? 'r' : 'w'), + (rc ? "this process' env is hosed" : "recovering"))); + rc2 = mdb_reader_check0(env, rlocked, NULL); + if (rc2 == 0) + rc2 = mdb_mutex_consistent(mutex); + if (rc || (rc = rc2)) { + DPRINTF(("LOCK_MUTEX recovery failed, %s", mdb_strerror(rc))); + UNLOCK_MUTEX(mutex); + } + } else { +#ifdef _WIN32 + rc = ErrCode(); +#endif + DPRINTF(("LOCK_MUTEX failed, %s", mdb_strerror(rc))); + } + + return rc; +} +#endif /* MDB_ROBUST_SUPPORTED */ +/** @} */ diff --git a/external/db_drivers/liblmdb64/mdb_copy.1 b/external/db_drivers/liblmdb64/mdb_copy.1 new file mode 100644 index 000000000..094b26056 --- /dev/null +++ b/external/db_drivers/liblmdb64/mdb_copy.1 @@ -0,0 +1,54 @@ +.TH MDB_COPY 1 "2014/06/20" "LMDB 0.9.14" +.\" Copyright 2012-2014 Howard Chu, Symas Corp. All Rights Reserved. +.\" Copying restrictions apply. See COPYRIGHT/LICENSE. +.SH NAME +mdb_copy \- LMDB environment copy tool +.SH SYNOPSIS +.B mdb_copy +[\c +.BR \-V ] +[\c +.BR \-c ] +[\c +.BR \-n ] +.B srcpath +[\c +.BR dstpath ] +.SH DESCRIPTION +The +.B mdb_copy +utility copies an LMDB environment. The environment can +be copied regardless of whether it is currently in use. +No lockfile is created, since it gets recreated at need. + +If +.I dstpath +is specified it must be the path of an empty directory +for storing the backup. Otherwise, the backup will be +written to stdout. + +.SH OPTIONS +.TP +.BR \-V +Write the library version number to the standard output, and exit. +.TP +.BR \-c +Compact while copying. Only current data pages will be copied; freed +or unused pages will be omitted from the copy. This option will +slow down the backup process as it is more CPU-intensive. +.TP +.BR \-n +Open LDMB environment(s) which do not use subdirectories. + +.SH DIAGNOSTICS +Exit status is zero if no errors occur. +Errors result in a non-zero exit status and +a diagnostic message being written to standard error. +.SH CAVEATS +This utility can trigger significant file size growth if run +in parallel with write transactions, because pages which they +free during copying cannot be reused until the copy is done. +.SH "SEE ALSO" +.BR mdb_stat (1) +.SH AUTHOR +Howard Chu of Symas Corporation <http://www.symas.com> diff --git a/external/db_drivers/liblmdb64/mdb_copy.c b/external/db_drivers/liblmdb64/mdb_copy.c new file mode 100644 index 000000000..af0a94111 --- /dev/null +++ b/external/db_drivers/liblmdb64/mdb_copy.c @@ -0,0 +1,82 @@ +/* mdb_copy.c - memory-mapped database backup tool */ +/* + * Copyright 2012 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#ifdef _WIN32 +#include <windows.h> +#define MDB_STDOUT GetStdHandle(STD_OUTPUT_HANDLE) +#else +#define MDB_STDOUT 1 +#endif +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include "lmdb.h" + +static void +sighandle(int sig) +{ +} + +int main(int argc,char * argv[]) +{ + int rc; + MDB_env *env; + const char *progname = argv[0], *act; + unsigned flags = MDB_RDONLY; + unsigned cpflags = 0; + + for (; argc > 1 && argv[1][0] == '-'; argc--, argv++) { + if (argv[1][1] == 'n' && argv[1][2] == '\0') + flags |= MDB_NOSUBDIR; + else if (argv[1][1] == 'c' && argv[1][2] == '\0') + cpflags |= MDB_CP_COMPACT; + else if (argv[1][1] == 'V' && argv[1][2] == '\0') { + printf("%s\n", MDB_VERSION_STRING); + exit(0); + } else + argc = 0; + } + + if (argc<2 || argc>3) { + fprintf(stderr, "usage: %s [-V] [-c] [-n] srcpath [dstpath]\n", progname); + exit(EXIT_FAILURE); + } + +#ifdef SIGPIPE + signal(SIGPIPE, sighandle); +#endif +#ifdef SIGHUP + signal(SIGHUP, sighandle); +#endif + signal(SIGINT, sighandle); + signal(SIGTERM, sighandle); + + act = "opening environment"; + rc = mdb_env_create(&env); + if (rc == MDB_SUCCESS) { + rc = mdb_env_open(env, argv[1], flags, 0600); + } + if (rc == MDB_SUCCESS) { + act = "copying"; + if (argc == 2) + rc = mdb_env_copyfd2(env, MDB_STDOUT, cpflags); + else + rc = mdb_env_copy2(env, argv[2], cpflags); + } + if (rc) + fprintf(stderr, "%s: %s failed, error %d (%s)\n", + progname, act, rc, mdb_strerror(rc)); + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/external/db_drivers/liblmdb64/mdb_dump.1 b/external/db_drivers/liblmdb64/mdb_dump.1 new file mode 100644 index 000000000..6fcc93081 --- /dev/null +++ b/external/db_drivers/liblmdb64/mdb_dump.1 @@ -0,0 +1,75 @@ +.TH MDB_DUMP 1 "2014/06/20" "LMDB 0.9.14" +.\" Copyright 2014 Howard Chu, Symas Corp. All Rights Reserved. +.\" Copying restrictions apply. See COPYRIGHT/LICENSE. +.SH NAME +mdb_dump \- LMDB environment export tool +.SH SYNOPSIS +.B mdb_dump +.BR \ envpath +[\c +.BR \-V ] +[\c +.BI \-f \ file\fR] +[\c +.BR \-l ] +[\c +.BR \-n ] +[\c +.BR \-p ] +[\c +.BR \-a \ | +.BI \-s \ subdb\fR] +.SH DESCRIPTION +The +.B mdb_dump +utility reads a database and writes its contents to the +standard output using a portable flat-text format +understood by the +.BR mdb_load (1) +utility. +.SH OPTIONS +.TP +.BR \-V +Write the library version number to the standard output, and exit. +.TP +.BR \-f \ file +Write to the specified file instead of to the standard output. +.TP +.BR \-l +List the databases stored in the environment. Just the +names will be listed, no data will be output. +.TP +.BR \-n +Dump an LMDB database which does not use subdirectories. +.TP +.BR \-p +If characters in either the key or data items are printing characters (as +defined by isprint(3)), output them directly. This option permits users to +use standard text editors and tools to modify the contents of databases. + +Note: different systems may have different notions about what characters +are considered printing characters, and databases dumped in this manner may +be less portable to external systems. +.TP +.BR \-a +Dump all of the subdatabases in the environment. +.TP +.BR \-s \ subdb +Dump a specific subdatabase. If no database is specified, only the main database is dumped. +.SH DIAGNOSTICS +Exit status is zero if no errors occur. +Errors result in a non-zero exit status and +a diagnostic message being written to standard error. + +Dumping and reloading databases that use user-defined comparison functions +will result in new databases that use the default comparison functions. +\fBIn this case it is quite likely that the reloaded database will be +damaged beyond repair permitting neither record storage nor retrieval.\fP + +The only available workaround is to modify the source for the +.BR mdb_load (1) +utility to load the database using the correct comparison functions. +.SH "SEE ALSO" +.BR mdb_load (1) +.SH AUTHOR +Howard Chu of Symas Corporation <http://www.symas.com> diff --git a/external/db_drivers/liblmdb64/mdb_dump.c b/external/db_drivers/liblmdb64/mdb_dump.c new file mode 100644 index 000000000..0eb85fd20 --- /dev/null +++ b/external/db_drivers/liblmdb64/mdb_dump.c @@ -0,0 +1,317 @@ +/* mdb_dump.c - memory-mapped database dump tool */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include <signal.h> +#include "lmdb.h" + +#ifdef _WIN32 +#define Z "I" +#else +#define Z "z" +#endif + +#define PRINT 1 +static int mode; + +typedef struct flagbit { + int bit; + char *name; +} flagbit; + +flagbit dbflags[] = { + { MDB_REVERSEKEY, "reversekey" }, + { MDB_DUPSORT, "dupsort" }, + { MDB_INTEGERKEY, "integerkey" }, + { MDB_DUPFIXED, "dupfixed" }, + { MDB_INTEGERDUP, "integerdup" }, + { MDB_REVERSEDUP, "reversedup" }, + { 0, NULL } +}; + +static volatile sig_atomic_t gotsig; + +static void dumpsig( int sig ) +{ + gotsig=1; +} + +static const char hexc[] = "0123456789abcdef"; + +static void hex(unsigned char c) +{ + putchar(hexc[c >> 4]); + putchar(hexc[c & 0xf]); +} + +static void text(MDB_val *v) +{ + unsigned char *c, *end; + + putchar(' '); + c = v->mv_data; + end = c + v->mv_size; + while (c < end) { + if (isprint(*c)) { + putchar(*c); + } else { + putchar('\\'); + hex(*c); + } + c++; + } + putchar('\n'); +} + +static void byte(MDB_val *v) +{ + unsigned char *c, *end; + + putchar(' '); + c = v->mv_data; + end = c + v->mv_size; + while (c < end) { + hex(*c++); + } + putchar('\n'); +} + +/* Dump in BDB-compatible format */ +static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) +{ + MDB_cursor *mc; + MDB_stat ms; + MDB_val key, data; + MDB_envinfo info; + unsigned int flags; + int rc, i; + + rc = mdb_dbi_flags(txn, dbi, &flags); + if (rc) return rc; + + rc = mdb_stat(txn, dbi, &ms); + if (rc) return rc; + + rc = mdb_env_info(mdb_txn_env(txn), &info); + if (rc) return rc; + + printf("VERSION=3\n"); + printf("format=%s\n", mode & PRINT ? "print" : "bytevalue"); + if (name) + printf("database=%s\n", name); + printf("type=btree\n"); + printf("mapsize=%" Z "u\n", info.me_mapsize); + if (info.me_mapaddr) + printf("mapaddr=%p\n", info.me_mapaddr); + printf("maxreaders=%u\n", info.me_maxreaders); + + if (flags & MDB_DUPSORT) + printf("duplicates=1\n"); + + for (i=0; dbflags[i].bit; i++) + if (flags & dbflags[i].bit) + printf("%s=1\n", dbflags[i].name); + + printf("db_pagesize=%d\n", ms.ms_psize); + printf("HEADER=END\n"); + + rc = mdb_cursor_open(txn, dbi, &mc); + if (rc) return rc; + + while ((rc = mdb_cursor_get(mc, &key, &data, MDB_NEXT) == MDB_SUCCESS)) { + if (gotsig) { + rc = EINTR; + break; + } + if (mode & PRINT) { + text(&key); + text(&data); + } else { + byte(&key); + byte(&data); + } + } + printf("DATA=END\n"); + if (rc == MDB_NOTFOUND) + rc = MDB_SUCCESS; + + return rc; +} + +static void usage(char *prog) +{ + fprintf(stderr, "usage: %s dbpath [-V] [-f output] [-l] [-n] [-p] [-a|-s subdb]\n", prog); + exit(EXIT_FAILURE); +} + +int main(int argc, char *argv[]) +{ + int i, rc; + MDB_env *env; + MDB_txn *txn; + MDB_dbi dbi; + char *prog = argv[0]; + char *envname; + char *subname = NULL; + int alldbs = 0, envflags = 0, list = 0; + + if (argc < 2) { + usage(prog); + } + + /* -a: dump main DB and all subDBs + * -s: dump only the named subDB + * -n: use NOSUBDIR flag on env_open + * -p: use printable characters + * -f: write to file instead of stdout + * -V: print version and exit + * (default) dump only the main DB + */ + while ((i = getopt(argc, argv, "af:lnps:V")) != EOF) { + switch(i) { + case 'V': + printf("%s\n", MDB_VERSION_STRING); + exit(0); + break; + case 'l': + list = 1; + /*FALLTHROUGH*/; + case 'a': + if (subname) + usage(prog); + alldbs++; + break; + case 'f': + if (freopen(optarg, "w", stdout) == NULL) { + fprintf(stderr, "%s: %s: reopen: %s\n", + prog, optarg, strerror(errno)); + exit(EXIT_FAILURE); + } + break; + case 'n': + envflags |= MDB_NOSUBDIR; + break; + case 'p': + mode |= PRINT; + break; + case 's': + if (alldbs) + usage(prog); + subname = optarg; + break; + default: + usage(prog); + } + } + + if (optind != argc - 1) + usage(prog); + +#ifdef SIGPIPE + signal(SIGPIPE, dumpsig); +#endif +#ifdef SIGHUP + signal(SIGHUP, dumpsig); +#endif + signal(SIGINT, dumpsig); + signal(SIGTERM, dumpsig); + + envname = argv[optind]; + rc = mdb_env_create(&env); + if (rc) { + fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc)); + return EXIT_FAILURE; + } + + if (alldbs || subname) { + mdb_env_set_maxdbs(env, 2); + } + + rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); + if (rc) { + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) { + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + rc = mdb_open(txn, subname, 0, &dbi); + if (rc) { + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + + if (alldbs) { + MDB_cursor *cursor; + MDB_val key; + int count = 0; + + rc = mdb_cursor_open(txn, dbi, &cursor); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) { + char *str; + MDB_dbi db2; + if (memchr(key.mv_data, '\0', key.mv_size)) + continue; + count++; + str = malloc(key.mv_size+1); + memcpy(str, key.mv_data, key.mv_size); + str[key.mv_size] = '\0'; + rc = mdb_open(txn, str, 0, &db2); + if (rc == MDB_SUCCESS) { + if (list) { + printf("%s\n", str); + list++; + } else { + rc = dumpit(txn, db2, str); + if (rc) + break; + } + mdb_close(env, db2); + } + free(str); + if (rc) continue; + } + mdb_cursor_close(cursor); + if (!count) { + fprintf(stderr, "%s: %s does not contain multiple databases\n", prog, envname); + rc = MDB_NOTFOUND; + } else if (rc == MDB_NOTFOUND) { + rc = MDB_SUCCESS; + } + } else { + rc = dumpit(txn, dbi, subname); + } + if (rc && rc != MDB_NOTFOUND) + fprintf(stderr, "%s: %s: %s\n", prog, envname, mdb_strerror(rc)); + + mdb_close(env, dbi); +txn_abort: + mdb_txn_abort(txn); +env_close: + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/external/db_drivers/liblmdb64/mdb_load.1 b/external/db_drivers/liblmdb64/mdb_load.1 new file mode 100644 index 000000000..728024010 --- /dev/null +++ b/external/db_drivers/liblmdb64/mdb_load.1 @@ -0,0 +1,77 @@ +.TH MDB_LOAD 1 "2014/06/20" "LMDB 0.9.14" +.\" Copyright 2014 Howard Chu, Symas Corp. All Rights Reserved. +.\" Copying restrictions apply. See COPYRIGHT/LICENSE. +.SH NAME +mdb_load \- LMDB environment import tool +.SH SYNOPSIS +.B mdb_load +.BR \ envpath +[\c +.BR \-V ] +[\c +.BI \-f \ file\fR] +[\c +.BR \-n ] +[\c +.BI \-s \ subdb\fR] +[\c +.BR \-N ] +[\c +.BR \-T ] +.SH DESCRIPTION +The +.B mdb_load +utility reads from the standard input and loads it into the +LMDB environment +.BR envpath . + +The input to +.B mdb_load +must be in the output format specified by the +.BR mdb_dump (1) +utility or as specified by the +.B -T +option below. +.SH OPTIONS +.TP +.BR \-V +Write the library version number to the standard output, and exit. +.TP +.BR \-f \ file +Read from the specified file instead of from the standard input. +.TP +.BR \-n +Load an LMDB database which does not use subdirectories. +.TP +.BR \-s \ subdb +Load a specific subdatabase. If no database is specified, data is loaded into the main database. +.TP +.BR \-N +Don't overwrite existing records when loading into an already existing database; just skip them. +.TP +.BR \-T +Load data from simple text files. The input must be paired lines of text, where the first +line of the pair is the key item, and the second line of the pair is its corresponding +data item. + +A simple escape mechanism, where newline and backslash (\\) characters are special, is +applied to the text input. Newline characters are interpreted as record separators. +Backslash characters in the text will be interpreted in one of two ways: If the backslash +character precedes another backslash character, the pair will be interpreted as a literal +backslash. If the backslash character precedes any other character, the two characters +following the backslash will be interpreted as a hexadecimal specification of a single +character; for example, \\0a is a newline character in the ASCII character set. + +For this reason, any backslash or newline characters that naturally occur in the text +input must be escaped to avoid misinterpretation by +.BR mdb_load . + +.SH DIAGNOSTICS +Exit status is zero if no errors occur. +Errors result in a non-zero exit status and +a diagnostic message being written to standard error. + +.SH "SEE ALSO" +.BR mdb_dump (1) +.SH AUTHOR +Howard Chu of Symas Corporation <http://www.symas.com> diff --git a/external/db_drivers/liblmdb64/mdb_load.c b/external/db_drivers/liblmdb64/mdb_load.c new file mode 100644 index 000000000..e0d95e13c --- /dev/null +++ b/external/db_drivers/liblmdb64/mdb_load.c @@ -0,0 +1,452 @@ +/* mdb_load.c - memory-mapped database load tool */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include "lmdb.h" + +#define PRINT 1 +#define NOHDR 2 +static int mode; + +static char *subname = NULL; + +static size_t lineno; +static int version; + +static int flags; + +static char *prog; + +static int Eof; + +static MDB_envinfo info; + +static MDB_val kbuf, dbuf; + +#ifdef _WIN32 +#define Z "I" +#else +#define Z "z" +#endif + +#define STRLENOF(s) (sizeof(s)-1) + +typedef struct flagbit { + int bit; + char *name; + int len; +} flagbit; + +#define S(s) s, STRLENOF(s) + +flagbit dbflags[] = { + { MDB_REVERSEKEY, S("reversekey") }, + { MDB_DUPSORT, S("dupsort") }, + { MDB_INTEGERKEY, S("integerkey") }, + { MDB_DUPFIXED, S("dupfixed") }, + { MDB_INTEGERDUP, S("integerdup") }, + { MDB_REVERSEDUP, S("reversedup") }, + { 0, NULL, 0 } +}; + +static void readhdr(void) +{ + char *ptr; + + while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) { + lineno++; + if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) { + version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION=")); + if (version > 3) { + fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n", + prog, lineno, version); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) { + break; + } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) { + if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print"))) + mode |= PRINT; + else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) { + fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) { + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + if (subname) free(subname); + subname = strdup((char *)dbuf.mv_data+STRLENOF("database=")); + } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) { + if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) { + fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("type=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr); + if (i != 1) { + fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize); + if (i != 1) { + fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders); + if (i != 1) { + fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders=")); + exit(EXIT_FAILURE); + } + } else { + int i; + for (i=0; dbflags[i].bit; i++) { + if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) && + ((char *)dbuf.mv_data)[dbflags[i].len] == '=') { + flags |= dbflags[i].bit; + break; + } + } + if (!dbflags[i].bit) { + ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size); + if (!ptr) { + fprintf(stderr, "%s: line %" Z "d: unexpected format\n", + prog, lineno); + exit(EXIT_FAILURE); + } else { + *ptr = '\0'; + fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n", + prog, lineno, (char *)dbuf.mv_data); + } + } + } + } +} + +static void badend(void) +{ + fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n", + prog, lineno); +} + +static int unhex(unsigned char *c2) +{ + int x, c; + x = *c2++ & 0x4f; + if (x & 0x40) + x -= 55; + c = x << 4; + x = *c2 & 0x4f; + if (x & 0x40) + x -= 55; + c |= x; + return c; +} + +static int readline(MDB_val *out, MDB_val *buf) +{ + unsigned char *c1, *c2, *end; + size_t len; + int c; + + if (!(mode & NOHDR)) { + c = fgetc(stdin); + if (c == EOF) { + Eof = 1; + return EOF; + } + if (c != ' ') { + lineno++; + if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { +badend: + Eof = 1; + badend(); + return EOF; + } + if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END"))) + return EOF; + goto badend; + } + } + if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { + Eof = 1; + return EOF; + } + lineno++; + + c1 = buf->mv_data; + len = strlen((char *)c1); + + /* Is buffer too short? */ + while (c1[len-1] != '\n') { + buf->mv_data = realloc(buf->mv_data, buf->mv_size*2); + if (!buf->mv_data) { + Eof = 1; + fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n", + prog, lineno); + return EOF; + } + c1 = buf->mv_data; + c1 += buf->mv_size; + if (fgets((char *)c1, buf->mv_size, stdin) == NULL) { + Eof = 1; + badend(); + return EOF; + } + buf->mv_size *= 2; + len = strlen((char *)c1); + } + c1 = c2 = buf->mv_data; + len = strlen((char *)c1); + c1[--len] = '\0'; + end = c1 + len; + + if (mode & PRINT) { + while (c2 < end) { + if (*c2 == '\\') { + if (c2[1] == '\\') { + c1++; c2 += 2; + } else { + if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) { + Eof = 1; + badend(); + return EOF; + } + *c1++ = unhex(++c2); + c2 += 2; + } + } else { + c1++; c2++; + } + } + } else { + /* odd length not allowed */ + if (len & 1) { + Eof = 1; + badend(); + return EOF; + } + while (c2 < end) { + if (!isxdigit(*c2) || !isxdigit(c2[1])) { + Eof = 1; + badend(); + return EOF; + } + *c1++ = unhex(c2); + c2 += 2; + } + } + c2 = out->mv_data = buf->mv_data; + out->mv_size = c1 - c2; + + return 0; +} + +static void usage(void) +{ + fprintf(stderr, "usage: %s dbpath [-V] [-f input] [-n] [-s name] [-N] [-T]\n", prog); + exit(EXIT_FAILURE); +} + +int main(int argc, char *argv[]) +{ + int i, rc; + MDB_env *env; + MDB_txn *txn; + MDB_cursor *mc; + MDB_dbi dbi; + char *envname; + int envflags = 0, putflags = 0; + int dohdr = 0; + + prog = argv[0]; + + if (argc < 2) { + usage(); + } + + /* -f: load file instead of stdin + * -n: use NOSUBDIR flag on env_open + * -s: load into named subDB + * -N: use NOOVERWRITE on puts + * -T: read plaintext + * -V: print version and exit + */ + while ((i = getopt(argc, argv, "f:ns:NTV")) != EOF) { + switch(i) { + case 'V': + printf("%s\n", MDB_VERSION_STRING); + exit(0); + break; + case 'f': + if (freopen(optarg, "r", stdin) == NULL) { + fprintf(stderr, "%s: %s: reopen: %s\n", + prog, optarg, strerror(errno)); + exit(EXIT_FAILURE); + } + break; + case 'n': + envflags |= MDB_NOSUBDIR; + break; + case 's': + subname = strdup(optarg); + break; + case 'N': + putflags = MDB_NOOVERWRITE|MDB_NODUPDATA; + break; + case 'T': + mode |= NOHDR; + break; + default: + usage(); + } + } + + if (optind != argc - 1) + usage(); + + dbuf.mv_size = 4096; + dbuf.mv_data = malloc(dbuf.mv_size); + + if (!(mode & NOHDR)) + readhdr(); + + envname = argv[optind]; + rc = mdb_env_create(&env); + if (rc) { + fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc)); + return EXIT_FAILURE; + } + + mdb_env_set_maxdbs(env, 2); + + if (info.me_maxreaders) + mdb_env_set_maxreaders(env, info.me_maxreaders); + + if (info.me_mapsize) + mdb_env_set_mapsize(env, info.me_mapsize); + + if (info.me_mapaddr) + envflags |= MDB_FIXEDMAP; + + rc = mdb_env_open(env, envname, envflags, 0664); + if (rc) { + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2; + kbuf.mv_data = malloc(kbuf.mv_size); + + while(!Eof) { + MDB_val key, data; + int batch = 0; + flags = 0; + + if (!dohdr) { + dohdr = 1; + } else if (!(mode & NOHDR)) + readhdr(); + + rc = mdb_txn_begin(env, NULL, 0, &txn); + if (rc) { + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi); + if (rc) { + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + + rc = mdb_cursor_open(txn, dbi, &mc); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + + while(1) { + rc = readline(&key, &kbuf); + if (rc == EOF) + break; + if (rc) + goto txn_abort; + + rc = readline(&data, &dbuf); + if (rc) + goto txn_abort; + + rc = mdb_cursor_put(mc, &key, &data, putflags); + if (rc == MDB_KEYEXIST && putflags) + continue; + if (rc) + goto txn_abort; + batch++; + if (batch == 100) { + rc = mdb_txn_commit(txn); + if (rc) { + fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", + prog, lineno, mdb_strerror(rc)); + goto env_close; + } + rc = mdb_txn_begin(env, NULL, 0, &txn); + if (rc) { + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + rc = mdb_cursor_open(txn, dbi, &mc); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + batch = 0; + } + } + rc = mdb_txn_commit(txn); + txn = NULL; + if (rc) { + fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", + prog, lineno, mdb_strerror(rc)); + goto env_close; + } + mdb_dbi_close(env, dbi); + } + +txn_abort: + mdb_txn_abort(txn); +env_close: + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/external/db_drivers/liblmdb64/mdb_stat.1 b/external/db_drivers/liblmdb64/mdb_stat.1 new file mode 100644 index 000000000..3d8d461d9 --- /dev/null +++ b/external/db_drivers/liblmdb64/mdb_stat.1 @@ -0,0 +1,64 @@ +.TH MDB_STAT 1 "2014/06/20" "LMDB 0.9.14" +.\" Copyright 2012-2014 Howard Chu, Symas Corp. All Rights Reserved. +.\" Copying restrictions apply. See COPYRIGHT/LICENSE. +.SH NAME +mdb_stat \- LMDB environment status tool +.SH SYNOPSIS +.B mdb_stat +.BR \ envpath +[\c +.BR \-V ] +[\c +.BR \-e ] +[\c +.BR \-f [ f [ f ]]] +[\c +.BR \-n ] +[\c +.BR \-r [ r ]] +[\c +.BR \-a \ | +.BI \-s \ subdb\fR] +.SH DESCRIPTION +The +.B mdb_stat +utility displays the status of an LMDB environment. +.SH OPTIONS +.TP +.BR \-V +Write the library version number to the standard output, and exit. +.TP +.BR \-e +Display information about the database environment. +.TP +.BR \-f +Display information about the environment freelist. +If \fB\-ff\fP is given, summarize each freelist entry. +If \fB\-fff\fP is given, display the full list of page IDs in the freelist. +.TP +.BR \-n +Display the status of an LMDB database which does not use subdirectories. +.TP +.BR \-r +Display information about the environment reader table. +Shows the process ID, thread ID, and transaction ID for each active +reader slot. The process ID and transaction ID are in decimal, the +thread ID is in hexadecimal. The transaction ID is displayed as "-" +if the reader does not currently have a read transaction open. +If \fB\-rr\fP is given, check for stale entries in the reader +table and clear them. The reader table will be printed again +after the check is performed. +.TP +.BR \-a +Display the status of all of the subdatabases in the environment. +.TP +.BR \-s \ subdb +Display the status of a specific subdatabase. +.SH DIAGNOSTICS +Exit status is zero if no errors occur. +Errors result in a non-zero exit status and +a diagnostic message being written to standard error. +.SH "SEE ALSO" +.BR mdb_copy (1) +.SH AUTHOR +Howard Chu of Symas Corporation <http://www.symas.com> diff --git a/external/db_drivers/liblmdb64/mdb_stat.c b/external/db_drivers/liblmdb64/mdb_stat.c new file mode 100644 index 000000000..1e9229296 --- /dev/null +++ b/external/db_drivers/liblmdb64/mdb_stat.c @@ -0,0 +1,263 @@ +/* mdb_stat.c - memory-mapped database status tool */ +/* + * Copyright 2011-2013 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "lmdb.h" + +#ifdef _WIN32 +#define Z "I" +#else +#define Z "z" +#endif + +static void prstat(MDB_stat *ms) +{ +#if 0 + printf(" Page size: %u\n", ms->ms_psize); +#endif + printf(" Tree depth: %u\n", ms->ms_depth); + printf(" Branch pages: %"Z"u\n", ms->ms_branch_pages); + printf(" Leaf pages: %"Z"u\n", ms->ms_leaf_pages); + printf(" Overflow pages: %"Z"u\n", ms->ms_overflow_pages); + printf(" Entries: %"Z"u\n", ms->ms_entries); +} + +static void usage(char *prog) +{ + fprintf(stderr, "usage: %s dbpath [-V] [-n] [-e] [-r[r]] [-f[f[f]]] [-a|-s subdb]\n", prog); + exit(EXIT_FAILURE); +} + +int main(int argc, char *argv[]) +{ + int i, rc; + MDB_env *env; + MDB_txn *txn; + MDB_dbi dbi; + MDB_stat mst; + MDB_envinfo mei; + char *prog = argv[0]; + char *envname; + char *subname = NULL; + int alldbs = 0, envinfo = 0, envflags = 0, freinfo = 0, rdrinfo = 0; + + if (argc < 2) { + usage(prog); + } + + /* -a: print stat of main DB and all subDBs + * -s: print stat of only the named subDB + * -e: print env info + * -f: print freelist info + * -r: print reader info + * -n: use NOSUBDIR flag on env_open + * -V: print version and exit + * (default) print stat of only the main DB + */ + while ((i = getopt(argc, argv, "Vaefnrs:")) != EOF) { + switch(i) { + case 'V': + printf("%s\n", MDB_VERSION_STRING); + exit(0); + break; + case 'a': + if (subname) + usage(prog); + alldbs++; + break; + case 'e': + envinfo++; + break; + case 'f': + freinfo++; + break; + case 'n': + envflags |= MDB_NOSUBDIR; + break; + case 'r': + rdrinfo++; + break; + case 's': + if (alldbs) + usage(prog); + subname = optarg; + break; + default: + usage(prog); + } + } + + if (optind != argc - 1) + usage(prog); + + envname = argv[optind]; + rc = mdb_env_create(&env); + if (rc) { + fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc)); + return EXIT_FAILURE; + } + + if (alldbs || subname) { + mdb_env_set_maxdbs(env, 4); + } + + rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); + if (rc) { + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + if (envinfo) { + (void)mdb_env_stat(env, &mst); + (void)mdb_env_info(env, &mei); + printf("Environment Info\n"); + printf(" Map address: %p\n", mei.me_mapaddr); + printf(" Map size: %"Z"u\n", mei.me_mapsize); + printf(" Page size: %u\n", mst.ms_psize); + printf(" Max pages: %"Z"u\n", mei.me_mapsize / mst.ms_psize); + printf(" Number of pages used: %"Z"u\n", mei.me_last_pgno+1); + printf(" Last transaction ID: %"Z"u\n", mei.me_last_txnid); + printf(" Max readers: %u\n", mei.me_maxreaders); + printf(" Number of readers used: %u\n", mei.me_numreaders); + } + + if (rdrinfo) { + printf("Reader Table Status\n"); + rc = mdb_reader_list(env, (MDB_msg_func *)fputs, stdout); + if (rdrinfo > 1) { + int dead; + mdb_reader_check(env, &dead); + printf(" %d stale readers cleared.\n", dead); + rc = mdb_reader_list(env, (MDB_msg_func *)fputs, stdout); + } + if (!(subname || alldbs || freinfo)) + goto env_close; + } + + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) { + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + if (freinfo) { + MDB_cursor *cursor; + MDB_val key, data; + size_t pages = 0, *iptr; + + printf("Freelist Status\n"); + dbi = 0; + rc = mdb_cursor_open(txn, dbi, &cursor); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + rc = mdb_stat(txn, dbi, &mst); + if (rc) { + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + prstat(&mst); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + iptr = data.mv_data; + pages += *iptr; + if (freinfo > 1) { + char *bad = ""; + size_t pg, prev; + ssize_t i, j, span = 0; + j = *iptr++; + for (i = j, prev = 1; --i >= 0; ) { + pg = iptr[i]; + if (pg <= prev) + bad = " [bad sequence]"; + prev = pg; + pg += span; + for (; i >= span && iptr[i-span] == pg; span++, pg++) ; + } + printf(" Transaction %"Z"u, %"Z"d pages, maxspan %"Z"d%s\n", + *(size_t *)key.mv_data, j, span, bad); + if (freinfo > 2) { + for (--j; j >= 0; ) { + pg = iptr[j]; + for (span=1; --j >= 0 && iptr[j] == pg+span; span++) ; + printf(span>1 ? " %9"Z"u[%"Z"d]\n" : " %9"Z"u\n", + pg, span); + } + } + } + } + mdb_cursor_close(cursor); + printf(" Free pages: %"Z"u\n", pages); + } + + rc = mdb_open(txn, subname, 0, &dbi); + if (rc) { + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + + rc = mdb_stat(txn, dbi, &mst); + if (rc) { + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + printf("Status of %s\n", subname ? subname : "Main DB"); + prstat(&mst); + + if (alldbs) { + MDB_cursor *cursor; + MDB_val key; + + rc = mdb_cursor_open(txn, dbi, &cursor); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) { + char *str; + MDB_dbi db2; + if (memchr(key.mv_data, '\0', key.mv_size)) + continue; + str = malloc(key.mv_size+1); + memcpy(str, key.mv_data, key.mv_size); + str[key.mv_size] = '\0'; + rc = mdb_open(txn, str, 0, &db2); + if (rc == MDB_SUCCESS) + printf("Status of %s\n", str); + free(str); + if (rc) continue; + rc = mdb_stat(txn, db2, &mst); + if (rc) { + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + prstat(&mst); + mdb_close(env, db2); + } + mdb_cursor_close(cursor); + } + + if (rc == MDB_NOTFOUND) + rc = MDB_SUCCESS; + + mdb_close(env, dbi); +txn_abort: + mdb_txn_abort(txn); +env_close: + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/external/db_drivers/liblmdb64/midl.c b/external/db_drivers/liblmdb64/midl.c new file mode 100644 index 000000000..88a3aff10 --- /dev/null +++ b/external/db_drivers/liblmdb64/midl.c @@ -0,0 +1,360 @@ +/** @file midl.c + * @brief ldap bdb back-end ID List functions */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software <http://www.openldap.org/>. + * + * Copyright 2000-2014 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +#include <limits.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include "midl.h" + +/** @defgroup internal LMDB Internals + * @{ + */ +/** @defgroup idls ID List Management + * @{ + */ +#define CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) ) + +unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id ) +{ + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = ids[0]; + + while( 0 < n ) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = CMP( ids[cursor], id ); + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor; + n -= pivot + 1; + + } else { + return cursor; + } + } + + if( val > 0 ) { + ++cursor; + } + return cursor; +} + +#if 0 /* superseded by append/sort */ +int mdb_midl_insert( MDB_IDL ids, MDB_ID id ) +{ + unsigned x, i; + + x = mdb_midl_search( ids, id ); + assert( x > 0 ); + + if( x < 1 ) { + /* internal error */ + return -2; + } + + if ( x <= ids[0] && ids[x] == id ) { + /* duplicate */ + assert(0); + return -1; + } + + if ( ++ids[0] >= MDB_IDL_DB_MAX ) { + /* no room */ + --ids[0]; + return -2; + + } else { + /* insert id */ + for (i=ids[0]; i>x; i--) + ids[i] = ids[i-1]; + ids[x] = id; + } + + return 0; +} +#endif + +MDB_IDL mdb_midl_alloc(int num) +{ + MDB_IDL ids = malloc((num+2) * sizeof(MDB_ID)); + if (ids) { + *ids++ = num; + *ids = 0; + } + return ids; +} + +void mdb_midl_free(MDB_IDL ids) +{ + if (ids) + free(ids-1); +} + +int mdb_midl_shrink( MDB_IDL *idp ) +{ + MDB_IDL ids = *idp; + if (*(--ids) > MDB_IDL_UM_MAX && + (ids = realloc(ids, (MDB_IDL_UM_MAX+1) * sizeof(MDB_ID)))) + { + *ids++ = MDB_IDL_UM_MAX; + *idp = ids; + return 1; + } + return 0; +} + +static int mdb_midl_grow( MDB_IDL *idp, int num ) +{ + MDB_IDL idn = *idp-1; + /* grow it */ + idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID)); + if (!idn) + return ENOMEM; + *idn++ += num; + *idp = idn; + return 0; +} + +int mdb_midl_need( MDB_IDL *idp, unsigned num ) +{ + MDB_IDL ids = *idp; + num += ids[0]; + if (num > ids[-1]) { + num = (num + num/4 + (256 + 2)) & -256; + if (!(ids = realloc(ids-1, num * sizeof(MDB_ID)))) + return ENOMEM; + *ids++ = num - 2; + *idp = ids; + } + return 0; +} + +int mdb_midl_append( MDB_IDL *idp, MDB_ID id ) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] >= ids[-1]) { + if (mdb_midl_grow(idp, MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0]++; + ids[ids[0]] = id; + return 0; +} + +int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app ) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] + app[0] >= ids[-1]) { + if (mdb_midl_grow(idp, app[0])) + return ENOMEM; + ids = *idp; + } + memcpy(&ids[ids[0]+1], &app[1], app[0] * sizeof(MDB_ID)); + ids[0] += app[0]; + return 0; +} + +int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n ) +{ + MDB_ID *ids = *idp, len = ids[0]; + /* Too big? */ + if (len + n > ids[-1]) { + if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0] = len + n; + ids += len; + while (n) + ids[n--] = id++; + return 0; +} + +void mdb_midl_xmerge( MDB_IDL idl, MDB_IDL merge ) +{ + MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i+j, total = k; + idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */ + old_id = idl[j]; + while (i) { + merge_id = merge[i--]; + for (; old_id < merge_id; old_id = idl[--j]) + idl[k--] = old_id; + idl[k--] = merge_id; + } + idl[0] = total; +} + +/* Quicksort + Insertion sort for small arrays */ + +#define SMALL 8 +#define MIDL_SWAP(a,b) { itmp=(a); (a)=(b); (b)=itmp; } + +void +mdb_midl_sort( MDB_IDL ids ) +{ + /* Max possible depth of int-indexed tree * 2 items/level */ + int istack[sizeof(int)*CHAR_BIT * 2]; + int i,j,k,l,ir,jstack; + MDB_ID a, itmp; + + ir = (int)ids[0]; + l = 1; + jstack = 0; + for(;;) { + if (ir - l < SMALL) { /* Insertion sort */ + for (j=l+1;j<=ir;j++) { + a = ids[j]; + for (i=j-1;i>=1;i--) { + if (ids[i] >= a) break; + ids[i+1] = ids[i]; + } + ids[i+1] = a; + } + if (jstack == 0) break; + ir = istack[jstack--]; + l = istack[jstack--]; + } else { + k = (l + ir) >> 1; /* Choose median of left, center, right */ + MIDL_SWAP(ids[k], ids[l+1]); + if (ids[l] < ids[ir]) { + MIDL_SWAP(ids[l], ids[ir]); + } + if (ids[l+1] < ids[ir]) { + MIDL_SWAP(ids[l+1], ids[ir]); + } + if (ids[l] < ids[l+1]) { + MIDL_SWAP(ids[l], ids[l+1]); + } + i = l+1; + j = ir; + a = ids[l+1]; + for(;;) { + do i++; while(ids[i] > a); + do j--; while(ids[j] < a); + if (j < i) break; + MIDL_SWAP(ids[i],ids[j]); + } + ids[l+1] = ids[j]; + ids[j] = a; + jstack += 2; + if (ir-i+1 >= j-l) { + istack[jstack] = ir; + istack[jstack-1] = i; + ir = j-1; + } else { + istack[jstack] = j-1; + istack[jstack-1] = l; + l = i; + } + } + } +} + +unsigned mdb_mid2l_search( MDB_ID2L ids, MDB_ID id ) +{ + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = (unsigned)ids[0].mid; + + while( 0 < n ) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = CMP( id, ids[cursor].mid ); + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor; + n -= pivot + 1; + + } else { + return cursor; + } + } + + if( val > 0 ) { + ++cursor; + } + return cursor; +} + +int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id ) +{ + unsigned x, i; + + x = mdb_mid2l_search( ids, id->mid ); + + if( x < 1 ) { + /* internal error */ + return -2; + } + + if ( x <= ids[0].mid && ids[x].mid == id->mid ) { + /* duplicate */ + return -1; + } + + if ( ids[0].mid >= MDB_IDL_UM_MAX ) { + /* too big */ + return -2; + + } else { + /* insert id */ + ids[0].mid++; + for (i=(unsigned)ids[0].mid; i>x; i--) + ids[i] = ids[i-1]; + ids[x] = *id; + } + + return 0; +} + +int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ) +{ + /* Too big? */ + if (ids[0].mid >= MDB_IDL_UM_MAX) { + return -2; + } + ids[0].mid++; + ids[ids[0].mid] = *id; + return 0; +} + +/** @} */ +/** @} */ diff --git a/external/db_drivers/liblmdb64/midl.h b/external/db_drivers/liblmdb64/midl.h new file mode 100644 index 000000000..a7f25026c --- /dev/null +++ b/external/db_drivers/liblmdb64/midl.h @@ -0,0 +1,186 @@ +/** @file midl.h + * @brief LMDB ID List header file. + * + * This file was originally part of back-bdb but has been + * modified for use in libmdb. Most of the macros defined + * in this file are unused, just left over from the original. + * + * This file is only used internally in libmdb and its definitions + * are not exposed publicly. + */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software <http://www.openldap.org/>. + * + * Copyright 2000-2014 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +#ifndef _MDB_MIDL_H_ +#define _MDB_MIDL_H_ + +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** @defgroup internal LMDB Internals + * @{ + */ + +/** @defgroup idls ID List Management + * @{ + */ + /** A generic unsigned ID number. These were entryIDs in back-bdb. + * Preferably it should have the same size as a pointer. + */ +typedef size_t MDB_ID; + + /** An IDL is an ID List, a sorted array of IDs. The first + * element of the array is a counter for how many actual + * IDs are in the list. In the original back-bdb code, IDLs are + * sorted in ascending order. For libmdb IDLs are sorted in + * descending order. + */ +typedef MDB_ID *MDB_IDL; + +/* IDL sizes - likely should be even bigger + * limiting factors: sizeof(ID), thread stack size + */ +#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ +#define MDB_IDL_DB_SIZE (1<<MDB_IDL_LOGN) +#define MDB_IDL_UM_SIZE (1<<(MDB_IDL_LOGN+1)) + +#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE-1) +#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE-1) + +#define MDB_IDL_SIZEOF(ids) (((ids)[0]+1) * sizeof(MDB_ID)) +#define MDB_IDL_IS_ZERO(ids) ( (ids)[0] == 0 ) +#define MDB_IDL_CPY( dst, src ) (memcpy( dst, src, MDB_IDL_SIZEOF( src ) )) +#define MDB_IDL_FIRST( ids ) ( (ids)[1] ) +#define MDB_IDL_LAST( ids ) ( (ids)[(ids)[0]] ) + + /** Current max length of an #mdb_midl_alloc()ed IDL */ +#define MDB_IDL_ALLOCLEN( ids ) ( (ids)[-1] ) + + /** Append ID to IDL. The IDL must be big enough. */ +#define mdb_midl_xappend(idl, id) do { \ + MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \ + xidl[xlen] = (id); \ + } while (0) + + /** Search for an ID in an IDL. + * @param[in] ids The IDL to search. + * @param[in] id The ID to search for. + * @return The index of the first ID greater than or equal to \b id. + */ +unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id ); + + /** Allocate an IDL. + * Allocates memory for an IDL of the given size. + * @return IDL on success, NULL on failure. + */ +MDB_IDL mdb_midl_alloc(int num); + + /** Free an IDL. + * @param[in] ids The IDL to free. + */ +void mdb_midl_free(MDB_IDL ids); + + /** Shrink an IDL. + * Return the IDL to the default size if it has grown larger. + * @param[in,out] idp Address of the IDL to shrink. + * @return 0 on no change, non-zero if shrunk. + */ +int mdb_midl_shrink(MDB_IDL *idp); + + /** Make room for num additional elements in an IDL. + * @param[in,out] idp Address of the IDL. + * @param[in] num Number of elements to make room for. + * @return 0 on success, ENOMEM on failure. + */ +int mdb_midl_need(MDB_IDL *idp, unsigned num); + + /** Append an ID onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] id The ID to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append( MDB_IDL *idp, MDB_ID id ); + + /** Append an IDL onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] app The IDL to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app ); + + /** Append an ID range onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] id The lowest ID to append. + * @param[in] n Number of IDs to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n ); + + /** Merge an IDL onto an IDL. The destination IDL must be big enough. + * @param[in] idl The IDL to merge into. + * @param[in] merge The IDL to merge. + */ +void mdb_midl_xmerge( MDB_IDL idl, MDB_IDL merge ); + + /** Sort an IDL. + * @param[in,out] ids The IDL to sort. + */ +void mdb_midl_sort( MDB_IDL ids ); + + /** An ID2 is an ID/pointer pair. + */ +typedef struct MDB_ID2 { + MDB_ID mid; /**< The ID */ + void *mptr; /**< The pointer */ +} MDB_ID2; + + /** An ID2L is an ID2 List, a sorted array of ID2s. + * The first element's \b mid member is a count of how many actual + * elements are in the array. The \b mptr member of the first element is unused. + * The array is sorted in ascending order by \b mid. + */ +typedef MDB_ID2 *MDB_ID2L; + + /** Search for an ID in an ID2L. + * @param[in] ids The ID2L to search. + * @param[in] id The ID to search for. + * @return The index of the first ID2 whose \b mid member is greater than or equal to \b id. + */ +unsigned mdb_mid2l_search( MDB_ID2L ids, MDB_ID id ); + + + /** Insert an ID2 into a ID2L. + * @param[in,out] ids The ID2L to insert into. + * @param[in] id The ID2 to insert. + * @return 0 on success, -1 if the ID was already present in the ID2L. + */ +int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id ); + + /** Append an ID2 into a ID2L. + * @param[in,out] ids The ID2L to append into. + * @param[in] id The ID2 to append. + * @return 0 on success, -2 if the ID2L is too big. + */ +int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ); + +/** @} */ +/** @} */ +#ifdef __cplusplus +} +#endif +#endif /* _MDB_MIDL_H_ */ diff --git a/external/db_drivers/liblmdb64/mtest.c b/external/db_drivers/liblmdb64/mtest.c new file mode 100644 index 000000000..79b4175e7 --- /dev/null +++ b/external/db_drivers/liblmdb64/mtest.c @@ -0,0 +1,173 @@ +/* mtest.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor, *cur2; + MDB_cursor_op op; + int count; + int *values; + char sval[32] = ""; + + srand(time(NULL)); + + count = (rand()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i<count;i++) { + values[i] = rand()%1024; + } + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP /*|MDB_NOSYNC*/, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, NULL, 0, &dbi)); + + key.mv_size = sizeof(int); + key.mv_data = sval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + for (i=0;i<count;i++) { + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NOOVERWRITE))) { + j++; + data.mv_size = sizeof(sval); + data.mv_data = sval; + } + } + if (j) printf("%d duplicates skipped\n", j); + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + j=0; + key.mv_data = sval; + for (i= count - 1; i > -1; i-= (rand()%5)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(sval, "%03x ", values[i]); + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, NULL))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor last\n"); + E(mdb_cursor_get(cursor, &key, &data, MDB_LAST)); + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor last/prev\n"); + E(mdb_cursor_get(cursor, &key, &data, MDB_LAST)); + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + E(mdb_cursor_get(cursor, &key, &data, MDB_PREV)); + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + + mdb_txn_abort(txn); + + printf("Deleting with cursor\n"); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_cursor_open(txn, dbi, &cur2)); + for (i=0; i<50; i++) { + if (RES(MDB_NOTFOUND, mdb_cursor_get(cur2, &key, &data, MDB_NEXT))) + break; + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + E(mdb_del(txn, dbi, &key, NULL)); + } + + printf("Restarting cursor in txn\n"); + for (op=MDB_FIRST, i=0; i<=32; op=MDB_NEXT, i++) { + if (RES(MDB_NOTFOUND, mdb_cursor_get(cur2, &key, &data, op))) + break; + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cur2); + E(mdb_txn_commit(txn)); + + printf("Restarting cursor outside txn\n"); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + for (op=MDB_FIRST, i=0; i<=32; op=MDB_NEXT, i++) { + if (RES(MDB_NOTFOUND, mdb_cursor_get(cursor, &key, &data, op))) + break; + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb64/mtest2.c b/external/db_drivers/liblmdb64/mtest2.c new file mode 100644 index 000000000..f1a3dbd6c --- /dev/null +++ b/external/db_drivers/liblmdb64/mtest2.c @@ -0,0 +1,123 @@ +/* mtest2.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +/* Just like mtest.c, but using a subDB instead of the main DB */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[32] = ""; + + srand(time(NULL)); + + count = (rand()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i<count;i++) { + values[i] = rand()%1024; + } + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 4)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, "id1", MDB_CREATE, &dbi)); + + key.mv_size = sizeof(int); + key.mv_data = sval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + for (i=0;i<count;i++) { + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NOOVERWRITE))) + j++; + } + if (j) printf("%d duplicates skipped\n", j); + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + j=0; + key.mv_data = sval; + for (i= count - 1; i > -1; i-= (rand()%5)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(sval, "%03x ", values[i]); + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, NULL))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb64/mtest3.c b/external/db_drivers/liblmdb64/mtest3.c new file mode 100644 index 000000000..f705c52de --- /dev/null +++ b/external/db_drivers/liblmdb64/mtest3.c @@ -0,0 +1,133 @@ +/* mtest3.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +/* Tests for sorted duplicate DBs */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[32]; + char kval[sizeof(int)]; + + srand(time(NULL)); + + memset(sval, 0, sizeof(sval)); + + count = (rand()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i<count;i++) { + values[i] = rand()%1024; + } + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 4)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, "id2", MDB_CREATE|MDB_DUPSORT, &dbi)); + + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + for (i=0;i<count;i++) { + if (!(i & 0x0f)) + sprintf(kval, "%03x", values[i]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA))) + j++; + } + if (j) printf("%d duplicates skipped\n", j); + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + j=0; + + for (i= count - 1; i > -1; i-= (rand()%5)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(kval, "%03x", values[i & ~0x0f]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, &data))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb64/mtest4.c b/external/db_drivers/liblmdb64/mtest4.c new file mode 100644 index 000000000..da5a95304 --- /dev/null +++ b/external/db_drivers/liblmdb64/mtest4.c @@ -0,0 +1,168 @@ +/* mtest4.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +/* Tests for sorted duplicate DBs with fixed-size keys */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[8]; + char kval[sizeof(int)]; + + memset(sval, 0, sizeof(sval)); + + count = 510; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i<count;i++) { + values[i] = i*5; + } + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 4)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, "id4", MDB_CREATE|MDB_DUPSORT|MDB_DUPFIXED, &dbi)); + + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + strcpy(kval, "001"); + for (i=0;i<count;i++) { + sprintf(sval, "%07x", values[i]); + if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA))) + j++; + } + if (j) printf("%d duplicates skipped\n", j); + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + /* there should be one full page of dups now. + */ + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + /* test all 3 branches of split code: + * 1: new key in lower half + * 2: new key at split point + * 3: new key in upper half + */ + + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + sprintf(sval, "%07x", values[3]+1); + E(mdb_txn_begin(env, NULL, 0, &txn)); + (void)RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA)); + mdb_txn_abort(txn); + + sprintf(sval, "%07x", values[255]+1); + E(mdb_txn_begin(env, NULL, 0, &txn)); + (void)RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA)); + mdb_txn_abort(txn); + + sprintf(sval, "%07x", values[500]+1); + E(mdb_txn_begin(env, NULL, 0, &txn)); + (void)RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA)); + E(mdb_txn_commit(txn)); + + /* Try MDB_NEXT_MULTIPLE */ + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT_MULTIPLE)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + j=0; + + for (i= count - 1; i > -1; i-= (rand()%3)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(sval, "%07x", values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, &data))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb64/mtest5.c b/external/db_drivers/liblmdb64/mtest5.c new file mode 100644 index 000000000..39a8c728a --- /dev/null +++ b/external/db_drivers/liblmdb64/mtest5.c @@ -0,0 +1,135 @@ +/* mtest5.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +/* Tests for sorted duplicate DBs using cursor_put */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[32]; + char kval[sizeof(int)]; + + srand(time(NULL)); + + memset(sval, 0, sizeof(sval)); + + count = (rand()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i<count;i++) { + values[i] = rand()%1024; + } + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 4)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, "id2", MDB_CREATE|MDB_DUPSORT, &dbi)); + E(mdb_cursor_open(txn, dbi, &cursor)); + + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + for (i=0;i<count;i++) { + if (!(i & 0x0f)) + sprintf(kval, "%03x", values[i]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + if (RES(MDB_KEYEXIST, mdb_cursor_put(cursor, &key, &data, MDB_NODUPDATA))) + j++; + } + if (j) printf("%d duplicates skipped\n", j); + mdb_cursor_close(cursor); + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + j=0; + + for (i= count - 1; i > -1; i-= (rand()%5)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(kval, "%03x", values[i & ~0x0f]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, &data))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb64/mtest6.c b/external/db_drivers/liblmdb64/mtest6.c new file mode 100644 index 000000000..07d575893 --- /dev/null +++ b/external/db_drivers/liblmdb64/mtest6.c @@ -0,0 +1,137 @@ +/* mtest6.c - memory-mapped database tester/toy */ +/* + * Copyright 2011-2014 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +/* Tests for DB splits and merges */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "lmdb.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +char dkbuf[1024]; + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + long kval; + char *sval; + + srand(time(NULL)); + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 4)); + E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664)); + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_open(txn, "id6", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + E(mdb_cursor_open(txn, dbi, &cursor)); + E(mdb_stat(txn, dbi, &mst)); + + sval = calloc(1, mst.ms_psize / 4); + key.mv_size = sizeof(long); + key.mv_data = &kval; + data.mv_size = mst.ms_psize / 4 - 30; + data.mv_data = sval; + + printf("Adding 12 values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5; + sprintf(sval, "%08x", kval); + (void)RES(MDB_KEYEXIST, mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE)); + } + printf("Adding 12 more values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5+4; + sprintf(sval, "%08x", kval); + (void)RES(MDB_KEYEXIST, mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE)); + } + printf("Adding 12 more values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5+1; + sprintf(sval, "%08x", kval); + (void)RES(MDB_KEYEXIST, mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE)); + } + E(mdb_cursor_get(cursor, &key, &data, MDB_FIRST)); + + do { + printf("key: %p %s, data: %p %.*s\n", + key.mv_data, mdb_dkey(&key, dkbuf), + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0); + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_txn_commit(txn); + +#if 0 + j=0; + + for (i= count - 1; i > -1; i-= (rand()%5)) { + j++; + txn=NULL; + E(mdb_txn_begin(env, NULL, 0, &txn)); + sprintf(kval, "%03x", values[i & ~0x0f]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, &data))) { + j--; + mdb_txn_abort(txn); + } else { + E(mdb_txn_commit(txn)); + } + } + free(values); + printf("Deleted %d values\n", j); + + E(mdb_env_stat(env, &mst)); + E(mdb_txn_begin(env, NULL, 1, &txn)); + E(mdb_cursor_open(txn, dbi, &cursor)); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); +#endif + mdb_env_close(env); + + return 0; +} diff --git a/external/db_drivers/liblmdb64/sample-bdb.txt b/external/db_drivers/liblmdb64/sample-bdb.txt new file mode 100644 index 000000000..6a959bd68 --- /dev/null +++ b/external/db_drivers/liblmdb64/sample-bdb.txt @@ -0,0 +1,73 @@ +/* sample-bdb.txt - BerkeleyDB toy/sample + * + * Do a line-by-line comparison of this and sample-mdb.txt + */ +/* + * Copyright 2012 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include <string.h> +#include <db.h> + +int main(int argc,char * argv[]) +{ + int rc; + DB_ENV *env; + DB *dbi; + DBT key, data; + DB_TXN *txn; + DBC *cursor; + char sval[32], kval[32]; + + /* Note: Most error checking omitted for simplicity */ + +#define FLAGS (DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_INIT_MPOOL|DB_CREATE|DB_THREAD) + rc = db_env_create(&env, 0); + rc = env->open(env, "./testdb", FLAGS, 0664); + rc = db_create(&dbi, env, 0); + rc = env->txn_begin(env, NULL, &txn, 0); + rc = dbi->open(dbi, txn, "test.bdb", NULL, DB_BTREE, DB_CREATE, 0664); + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.size = sizeof(int); + key.data = sval; + data.size = sizeof(sval); + data.data = sval; + + sprintf(sval, "%03x %d foo bar", 32, 3141592); + rc = dbi->put(dbi, txn, &key, &data, 0); + rc = txn->commit(txn, 0); + if (rc) { + fprintf(stderr, "txn->commit: (%d) %s\n", rc, db_strerror(rc)); + goto leave; + } + rc = env->txn_begin(env, NULL, &txn, 0); + rc = dbi->cursor(dbi, txn, &cursor, 0); + key.flags = DB_DBT_USERMEM; + key.data = kval; + key.ulen = sizeof(kval); + data.flags = DB_DBT_USERMEM; + data.data = sval; + data.ulen = sizeof(sval); + while ((rc = cursor->c_get(cursor, &key, &data, DB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.data, (int) key.size, (char *) key.data, + data.data, (int) data.size, (char *) data.data); + } + rc = cursor->c_close(cursor); + rc = txn->abort(txn); +leave: + rc = dbi->close(dbi, 0); + rc = env->close(env, 0); + return rc; +} diff --git a/external/db_drivers/liblmdb64/sample-mdb.txt b/external/db_drivers/liblmdb64/sample-mdb.txt new file mode 100644 index 000000000..a233ec5dd --- /dev/null +++ b/external/db_drivers/liblmdb64/sample-mdb.txt @@ -0,0 +1,62 @@ +/* sample-mdb.txt - MDB toy/sample + * + * Do a line-by-line comparison of this and sample-bdb.txt + */ +/* + * Copyright 2012 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +#include <stdio.h> +#include "lmdb.h" + +int main(int argc,char * argv[]) +{ + int rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_cursor *cursor; + char sval[32]; + + /* Note: Most error checking omitted for simplicity */ + + rc = mdb_env_create(&env); + rc = mdb_env_open(env, "./testdb", 0, 0664); + rc = mdb_txn_begin(env, NULL, 0, &txn); + rc = mdb_open(txn, NULL, 0, &dbi); + + key.mv_size = sizeof(int); + key.mv_data = sval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + sprintf(sval, "%03x %d foo bar", 32, 3141592); + rc = mdb_put(txn, dbi, &key, &data, 0); + rc = mdb_txn_commit(txn); + if (rc) { + fprintf(stderr, "mdb_txn_commit: (%d) %s\n", rc, mdb_strerror(rc)); + goto leave; + } + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_txn_abort(txn); +leave: + mdb_close(env, dbi); + mdb_env_close(env); + return 0; +} diff --git a/external/db_drivers/liblmdb64/tooltag b/external/db_drivers/liblmdb64/tooltag new file mode 100644 index 000000000..229bf16ba --- /dev/null +++ b/external/db_drivers/liblmdb64/tooltag @@ -0,0 +1,22 @@ +<tagfile> + <compound kind="page"> + <name>mdb_copy_1</name> + <title>mdb_copy - environment copy tool</title> + <filename>mdb_copy.1</filename> + </compound> + <compound kind="page"> + <name>mdb_dump_1</name> + <title>mdb_dump - environment export tool</title> + <filename>mdb_dump.1</filename> + </compound> + <compound kind="page"> + <name>mdb_load_1</name> + <title>mdb_load - environment import tool</title> + <filename>mdb_load.1</filename> + </compound> + <compound kind="page"> + <name>mdb_stat_1</name> + <title>mdb_stat - environment status tool</title> + <filename>mdb_stat.1</filename> + </compound> +</tagfile> diff --git a/external/unbound/CMakeLists.txt b/external/unbound/CMakeLists.txt index fdfa0a0fe..b00ff9b3c 100644 --- a/external/unbound/CMakeLists.txt +++ b/external/unbound/CMakeLists.txt @@ -1,3 +1,31 @@ +# Copyright (c) 2014-2015, The Monero Project +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are +# permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, this list +# of conditions and the following disclaimer in the documentation and/or other +# materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors may be +# used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + cmake_minimum_required(VERSION 2.8.7) project(unbound C) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 294c6c0f6..88012ee13 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -89,6 +89,7 @@ endfunction () add_subdirectory(common) add_subdirectory(crypto) add_subdirectory(cryptonote_core) +add_subdirectory(blockchain_db) add_subdirectory(mnemonics) add_subdirectory(rpc) add_subdirectory(wallet) @@ -98,3 +99,5 @@ add_subdirectory(miner) add_subdirectory(simplewallet) add_subdirectory(daemonizer) add_subdirectory(daemon) + +add_subdirectory(blockchain_converter) diff --git a/src/blockchain_converter/CMakeLists.txt b/src/blockchain_converter/CMakeLists.txt new file mode 100644 index 000000000..e0f3be901 --- /dev/null +++ b/src/blockchain_converter/CMakeLists.txt @@ -0,0 +1,110 @@ +# Copyright (c) 2014-2015, The Monero Project +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are +# permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, this list +# of conditions and the following disclaimer in the documentation and/or other +# materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors may be +# used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set(blockchain_converter_sources + blockchain_converter.cpp + ) + +set(blockchain_converter_private_headers) + +bitmonero_private_headers(blockchain_converter + ${blockchain_converter_private_headers}) + +set(blockchain_import_sources + blockchain_import.cpp + ) + +set(blockchain_import_private_headers + import.h + fake_core.h + ) + +bitmonero_private_headers(blockchain_import + ${blockchain_import_private_headers}) + +set(blockchain_export_sources + blockchain_export.cpp + ) + +set(blockchain_export_private_headers + import.h + blockchain_export.h + ) + +bitmonero_private_headers(blockchain_export + ${blockchain_export_private_headers}) + + + +if (BLOCKCHAIN_DB STREQUAL DB_LMDB) +bitmonero_add_executable(blockchain_converter + ${blockchain_converter_sources} + ${blockchain_converter_private_headers}) + +target_link_libraries(blockchain_converter + LINK_PRIVATE + cryptonote_core + blockchain_db + ${CMAKE_THREAD_LIBS_INIT}) + +add_dependencies(blockchain_converter + version) +set_property(TARGET blockchain_converter + PROPERTY + OUTPUT_NAME "blockchain_converter") +endif () + +bitmonero_add_executable(blockchain_import + ${blockchain_import_sources} + ${blockchain_import_private_headers}) + +target_link_libraries(blockchain_import + LINK_PRIVATE + cryptonote_core + ${CMAKE_THREAD_LIBS_INIT}) + +add_dependencies(blockchain_import + version) +set_property(TARGET blockchain_import + PROPERTY + OUTPUT_NAME "blockchain_import") + +bitmonero_add_executable(blockchain_export + ${blockchain_export_sources} + ${blockchain_export_private_headers}) + +target_link_libraries(blockchain_export + LINK_PRIVATE + cryptonote_core + ${CMAKE_THREAD_LIBS_INIT}) + +add_dependencies(blockchain_export + version) +set_property(TARGET blockchain_export + PROPERTY + OUTPUT_NAME "blockchain_export") diff --git a/src/blockchain_converter/README.md b/src/blockchain_converter/README.md new file mode 100644 index 000000000..00160c6b9 --- /dev/null +++ b/src/blockchain_converter/README.md @@ -0,0 +1,54 @@ + +For importing into the LMDB database, compile with `DATABASE=lmdb` + +e.g. + +`DATABASE=lmdb make release` + +This is also the default compile setting on the blockchain branch. + +By default, the exporter will use the original in-memory database (blockchain.bin) as its source. +This default is to make migrating to an LMDB database easy, without having to recompile anything. +To change the source, adjust `SOURCE_DB` in `src/blockchain_converter/blockchain_export.h` according to the comments. + +# Usage: + +See also each utility's "--help" option. + +## Export an existing in-memory database + +`$ blockchain_export` + +This loads the existing blockchain, for whichever database type it was compiled for, and exports it to `$MONERO_DATA_DIR/export/blockchain.raw` + +## Import the exported file + +`$ blockchain_import` + +This imports blocks from `$MONERO_DATA_DIR/export/blockchain.raw` into the current database. + +Defaults: `--batch on`, `--batch size 20000`, `--verify on` + +Batch size refers to number of blocks and can be adjusted for performance based on available RAM. + +Verification should only be turned off if importing from a trusted blockchain. + +```bash +# use default settings to import blockchain.raw into database +$ blockchain_import + +# fast import with large batch size, verification off +$ blockchain_import --batch-size 100000 --verify off + +# LMDB flags can be set by appending them to the database type: +# flags: nosync, nometasync, writemap, mapasync +$ blockchain_import --database lmdb#nosync +$ blockchain_import --database lmdb#nosync,nometasync +``` + +## Blockchain converter with batching +`blockchain_converter` has also been updated and includes batching for faster writes. However, on lower RAM systems, this will be slower than using the exporter and importer utilities. The converter needs to keep the blockchain in memory for the duration of the conversion, like the original bitmonerod, thus leaving less memory available to the destination database to operate. + +```bash +$ blockchain_converter --batch on --batch-size 20000 +``` diff --git a/src/blockchain_converter/blockchain_converter.cpp b/src/blockchain_converter/blockchain_converter.cpp new file mode 100644 index 000000000..03ab5e434 --- /dev/null +++ b/src/blockchain_converter/blockchain_converter.cpp @@ -0,0 +1,296 @@ +// Copyright (c) 2014-2015, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "include_base_utils.h" +#include "common/util.h" +#include "warnings.h" +#include "crypto/crypto.h" +#include "cryptonote_config.h" +#include "cryptonote_core/cryptonote_format_utils.h" +#include "misc_language.h" +#include "cryptonote_core/blockchain_storage.h" +#include "blockchain_db/blockchain_db.h" +#include "cryptonote_core/blockchain.h" +#include "blockchain_db/lmdb/db_lmdb.h" +#include "cryptonote_core/tx_pool.h" +#include "common/command_line.h" +#include "serialization/json_utils.h" +#include "include_base_utils.h" +#include "version.h" +#include <iostream> + +namespace +{ + +// CONFIG +bool opt_batch = true; +bool opt_resume = true; +bool opt_testnet = false; + +// number of blocks per batch transaction +// adjustable through command-line argument according to available RAM +uint64_t db_batch_size = 20000; + +} + +namespace po = boost::program_options; + +using namespace cryptonote; +using namespace epee; + +struct fake_core +{ + Blockchain dummy; + tx_memory_pool m_pool; + + blockchain_storage m_storage; + +#if !defined(BLOCKCHAIN_DB) + // for multi_db_runtime: + fake_core(const boost::filesystem::path &path, const bool use_testnet) : dummy(m_pool), m_pool(&dummy), m_storage(m_pool) +#else + // for multi_db_compile: + fake_core(const boost::filesystem::path &path, const bool use_testnet) : dummy(m_pool), m_pool(dummy), m_storage(&m_pool) +#endif + { + m_pool.init(path.string()); + m_storage.init(path.string(), use_testnet); + } +}; + +int main(int argc, char* argv[]) +{ + uint64_t height = 0; + uint64_t start_block = 0; + uint64_t end_block = 0; + uint64_t num_blocks = 0; + + boost::filesystem::path default_data_path {tools::get_default_data_dir()}; + boost::filesystem::path default_testnet_data_path {default_data_path / "testnet"}; + + + po::options_description desc_cmd_only("Command line options"); + po::options_description desc_cmd_sett("Command line options and settings options"); + const command_line::arg_descriptor<uint32_t> arg_log_level = {"log-level", "", LOG_LEVEL_0}; + const command_line::arg_descriptor<uint64_t> arg_batch_size = {"batch-size", "", db_batch_size}; + const command_line::arg_descriptor<bool> arg_testnet_on = { + "testnet" + , "Run on testnet." + , opt_testnet + }; + const command_line::arg_descriptor<uint64_t> arg_block_number = + {"block-number", "Number of blocks (default: use entire source blockchain)", + 0}; + + command_line::add_arg(desc_cmd_sett, command_line::arg_data_dir, default_data_path.string()); + command_line::add_arg(desc_cmd_sett, command_line::arg_testnet_data_dir, default_testnet_data_path.string()); + command_line::add_arg(desc_cmd_sett, arg_log_level); + command_line::add_arg(desc_cmd_sett, arg_batch_size); + command_line::add_arg(desc_cmd_sett, arg_testnet_on); + command_line::add_arg(desc_cmd_sett, arg_block_number); + + command_line::add_arg(desc_cmd_only, command_line::arg_help); + + const command_line::arg_descriptor<bool> arg_batch = {"batch", + "Batch transactions for faster import", true}; + const command_line::arg_descriptor<bool> arg_resume = {"resume", + "Resume from current height if output database already exists", true}; + + // call add_options() directly for these arguments since command_line helpers + // support only boolean switch, not boolean argument + desc_cmd_sett.add_options() + (arg_batch.name, make_semantic(arg_batch), arg_batch.description) + (arg_resume.name, make_semantic(arg_resume), arg_resume.description) + ; + + po::options_description desc_options("Allowed options"); + desc_options.add(desc_cmd_only).add(desc_cmd_sett); + + + po::variables_map vm; + bool r = command_line::handle_error_helper(desc_options, [&]() + { + po::store(po::parse_command_line(argc, argv, desc_options), vm); + po::notify(vm); + + return true; + }); + if (!r) + return 1; + + int log_level = command_line::get_arg(vm, arg_log_level); + opt_batch = command_line::get_arg(vm, arg_batch); + opt_resume = command_line::get_arg(vm, arg_resume); + db_batch_size = command_line::get_arg(vm, arg_batch_size); + + if (command_line::get_arg(vm, command_line::arg_help)) + { + std::cout << CRYPTONOTE_NAME << " v" << MONERO_VERSION_FULL << ENDL << ENDL; + std::cout << desc_options << std::endl; + return 1; + } + + if (! opt_batch && ! vm["batch-size"].defaulted()) + { + std::cerr << "Error: batch-size set, but batch option not enabled" << ENDL; + return 1; + } + if (! db_batch_size) + { + std::cerr << "Error: batch-size must be > 0" << ENDL; + return 1; + } + + log_space::get_set_log_detalisation_level(true, log_level); + log_space::log_singletone::add_logger(LOGGER_CONSOLE, NULL, NULL); + LOG_PRINT_L0("Starting..."); + + std::string src_folder; + opt_testnet = command_line::get_arg(vm, arg_testnet_on); + auto data_dir_arg = opt_testnet ? command_line::arg_testnet_data_dir : command_line::arg_data_dir; + src_folder = command_line::get_arg(vm, data_dir_arg); + boost::filesystem::path dest_folder(src_folder); + + num_blocks = command_line::get_arg(vm, arg_block_number); + + if (opt_batch) + { + LOG_PRINT_L0("batch: " << std::boolalpha << opt_batch << std::noboolalpha + << " batch size: " << db_batch_size); + } + else + { + LOG_PRINT_L0("batch: " << std::boolalpha << opt_batch << std::noboolalpha); + } + LOG_PRINT_L0("resume: " << std::boolalpha << opt_resume << std::noboolalpha); + LOG_PRINT_L0("testnet: " << std::boolalpha << opt_testnet << std::noboolalpha); + + fake_core c(src_folder, opt_testnet); + + height = c.m_storage.get_current_blockchain_height(); + + BlockchainDB *blockchain; + blockchain = new BlockchainLMDB(opt_batch); + dest_folder /= blockchain->get_db_name(); + LOG_PRINT_L0("Source blockchain: " << src_folder); + LOG_PRINT_L0("Dest blockchain: " << dest_folder.string()); + LOG_PRINT_L0("Opening dest blockchain (BlockchainDB " << blockchain->get_db_name() << ")"); + blockchain->open(dest_folder.string()); + LOG_PRINT_L0("Source blockchain height: " << height); + LOG_PRINT_L0("Dest blockchain height: " << blockchain->height()); + + if (opt_resume) + // next block number to add is same as current height + start_block = blockchain->height(); + + if (! num_blocks || (start_block + num_blocks > height)) + end_block = height - 1; + else + end_block = start_block + num_blocks - 1; + + LOG_PRINT_L0("start height: " << start_block+1 << " stop height: " << + end_block+1); + + if (start_block > end_block) + { + LOG_PRINT_L0("Finished: no blocks to add"); + delete blockchain; + return 0; + } + + if (opt_batch) + blockchain->batch_start(); + uint64_t i = 0; + for (i = start_block; i < end_block + 1; ++i) + { + // block: i height: i+1 end height: end_block + 1 + if ((i+1) % 10 == 0) + { + std::cout << "\r \r" << "height " << i+1 << "/" << + end_block+1 << " (" << (i+1)*100/(end_block+1)<< "%)" << std::flush; + } + // for debugging: + // std::cout << "height " << i+1 << "/" << end_block+1 + // << " ((" << i+1 << ")*100/(end_block+1))" << "%)" << ENDL; + + block b = c.m_storage.get_block(i); + size_t bsize = c.m_storage.get_block_size(i); + difficulty_type bdiff = c.m_storage.get_block_cumulative_difficulty(i); + uint64_t bcoins = c.m_storage.get_block_coins_generated(i); + std::vector<transaction> txs; + std::vector<crypto::hash> missed; + + c.m_storage.get_transactions(b.tx_hashes, txs, missed); + if (missed.size()) + { + std::cout << ENDL; + std::cerr << "Missed transaction(s) for block at height " << i + 1 << ", exiting" << ENDL; + delete blockchain; + return 1; + } + + try + { + blockchain->add_block(b, bsize, bdiff, bcoins, txs); + + if (opt_batch) + { + if ((i < end_block) && ((i + 1) % db_batch_size == 0)) + { + std::cout << "\r \r"; + std::cout << "[- batch commit at height " << i + 1 << " -]" << ENDL; + blockchain->batch_stop(); + blockchain->batch_start(); + std::cout << ENDL; + blockchain->show_stats(); + } + } + } + catch (const std::exception& e) + { + std::cout << ENDL; + std::cerr << "Error adding block " << i << " to new blockchain: " << e.what() << ENDL; + delete blockchain; + return 2; + } + } + if (opt_batch) + { + std::cout << "\r \r" << "height " << i << "/" << + end_block+1 << " (" << (i)*100/(end_block+1)<< "%)" << std::flush; + std::cout << ENDL; + std::cout << "[- batch commit at height " << i << " -]" << ENDL; + blockchain->batch_stop(); + } + std::cout << ENDL; + blockchain->show_stats(); + std::cout << "Finished at height: " << i << " block: " << i-1 << ENDL; + + delete blockchain; + return 0; +} diff --git a/src/blockchain_converter/blockchain_export.cpp b/src/blockchain_converter/blockchain_export.cpp new file mode 100644 index 000000000..4587ce925 --- /dev/null +++ b/src/blockchain_converter/blockchain_export.cpp @@ -0,0 +1,394 @@ +// Copyright (c) 2014-2015, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <algorithm> +#include <cstdio> +#include <fstream> +#include <boost/iostreams/copy.hpp> +#include <atomic> + +#include <boost/archive/binary_oarchive.hpp> +#include <boost/archive/binary_iarchive.hpp> +#include <boost/iostreams/stream_buffer.hpp> +#include <boost/iostreams/stream.hpp> +#include <boost/iostreams/device/back_inserter.hpp> +#include <boost/iostreams/filtering_streambuf.hpp> +#include <boost/iostreams/filter/bzip2.hpp> +#include "common/command_line.h" +#include "version.h" +#include "blockchain_export.h" +#include "cryptonote_core/cryptonote_boost_serialization.h" + +#include "import.h" + +static int max_chunk = 0; +static size_t height; + +namespace po = boost::program_options; + +using namespace cryptonote; +using namespace epee; + +bool BlockchainExport::open(const boost::filesystem::path& dir_path) +{ + if (boost::filesystem::exists(dir_path)) + { + if (!boost::filesystem::is_directory(dir_path)) + { + LOG_PRINT_RED_L0("export directory path is a file: " << dir_path); + return false; + } + } + else + { + if (!boost::filesystem::create_directory(dir_path)) + { + LOG_PRINT_RED_L0("Failed to create directory " << dir_path); + return false; + } + } + + std::string file_path = (dir_path / BLOCKCHAIN_RAW).string(); + m_raw_data_file = new std::ofstream(); + m_raw_data_file->open(file_path , std::ios_base::binary | std::ios_base::out| std::ios::trunc); + if (m_raw_data_file->fail()) + return false; + + m_output_stream = new boost::iostreams::stream<boost::iostreams::back_insert_device<buffer_type>>(m_buffer); + m_raw_archive = new boost::archive::binary_oarchive(*m_output_stream); + if (m_raw_archive == NULL) + return false; + + return true; +} + +void BlockchainExport::flush_chunk() +{ + m_output_stream->flush(); + char buffer[STR_LENGTH_OF_INT + 1]; + int chunk_size = (int) m_buffer.size(); + if (chunk_size > BUFFER_SIZE) + { + LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > BUFFER_SIZE " << BUFFER_SIZE); + } + sprintf(buffer, STR_FORMAT_OF_INT, chunk_size); + m_raw_data_file->write(buffer, STR_LENGTH_OF_INT); + if (max_chunk < chunk_size) + { + max_chunk = chunk_size; + } + long pos_before = m_raw_data_file->tellp(); + std::copy(m_buffer.begin(), m_buffer.end(), std::ostreambuf_iterator<char>(*m_raw_data_file)); + m_raw_data_file->flush(); + long pos_after = m_raw_data_file->tellp(); + long num_chars_written = pos_after - pos_before; + if ((int) num_chars_written != chunk_size) + { + LOG_PRINT_RED_L0("INTERNAL ERROR: num chars wrote NEQ buffer size. height = " << height); + } + + m_buffer.clear(); + delete m_raw_archive; + delete m_output_stream; + m_output_stream = new boost::iostreams::stream<boost::iostreams::back_insert_device<buffer_type>>(m_buffer); + m_raw_archive = new boost::archive::binary_oarchive(*m_output_stream); +} + +void BlockchainExport::serialize_block_to_text_buffer(const block& block) +{ + *m_raw_archive << block; +} + +void BlockchainExport::buffer_serialize_tx(const transaction& tx) +{ + *m_raw_archive << tx; +} + +void BlockchainExport::buffer_write_num_txs(const std::list<transaction> txs) +{ + int n = txs.size(); + *m_raw_archive << n; +} + +void BlockchainExport::write_block(block& block) +{ + serialize_block_to_text_buffer(block); + std::list<transaction> txs; + + uint64_t block_height = boost::get<txin_gen>(block.miner_tx.vin.front()).height; + + // put coinbase transaction first + transaction coinbase_tx = block.miner_tx; + crypto::hash coinbase_tx_hash = get_transaction_hash(coinbase_tx); +#if SOURCE_DB == DB_MEMORY + const transaction* cb_tx_full = m_blockchain_storage->get_tx(coinbase_tx_hash); +#else + transaction cb_tx_full = m_blockchain_storage->get_db().get_tx(coinbase_tx_hash); +#endif + +#if SOURCE_DB == DB_MEMORY + if (cb_tx_full != NULL) + { + txs.push_back(*cb_tx_full); + } +#else + // TODO: should check and abort if cb_tx_full equals null_hash? + txs.push_back(cb_tx_full); +#endif + + // now add all regular transactions + BOOST_FOREACH(const auto& tx_id, block.tx_hashes) + { +#if SOURCE_DB == DB_MEMORY + const transaction* tx = m_blockchain_storage->get_tx(tx_id); +#else + transaction tx = m_blockchain_storage->get_db().get_tx(tx_id); +#endif + +#if SOURCE_DB == DB_MEMORY + if(tx == NULL) + { + if (! m_tx_pool) + throw std::runtime_error("Aborting: tx == NULL, so memory pool required to get tx, but memory pool isn't enabled"); + else + { + transaction tx; + if(m_tx_pool->get_transaction(tx_id, tx)) + txs.push_back(tx); + else + throw std::runtime_error("Aborting: tx not found in pool"); + } + } + else + txs.push_back(*tx); +#else + txs.push_back(tx); +#endif + } + + // serialize all txs to the persistant storage + buffer_write_num_txs(txs); + BOOST_FOREACH(const auto& tx, txs) + { + buffer_serialize_tx(tx); + } + + // These three attributes are currently necessary for a fast import that adds blocks without verification. + bool include_extra_block_data = true; + if (include_extra_block_data) + { +#if SOURCE_DB == DB_MEMORY + size_t block_size = m_blockchain_storage->get_block_size(block_height); +#else + size_t block_size = m_blockchain_storage->get_db().get_block_size(block_height); +#endif +#if SOURCE_DB == DB_MEMORY + difficulty_type cumulative_difficulty = m_blockchain_storage->get_block_cumulative_difficulty(block_height); +#else + difficulty_type cumulative_difficulty = m_blockchain_storage->get_db().get_block_cumulative_difficulty(block_height); +#endif +#if SOURCE_DB == DB_MEMORY + uint64_t coins_generated = m_blockchain_storage->get_block_coins_generated(block_height); +#else + // TODO TEST to verify that this is the equivalent. make sure no off-by-one error with block height vs block number + uint64_t coins_generated = m_blockchain_storage->get_db().get_block_already_generated_coins(block_height); +#endif + + *m_raw_archive << block_size; + *m_raw_archive << cumulative_difficulty; + *m_raw_archive << coins_generated; + } +} + +bool BlockchainExport::BlockchainExport::close() +{ + if (m_raw_data_file->fail()) + return false; + + m_raw_data_file->flush(); + delete m_raw_archive; + delete m_output_stream; + delete m_raw_data_file; + return true; +} + + +#if SOURCE_DB == DB_MEMORY +bool BlockchainExport::store_blockchain_raw(blockchain_storage* _blockchain_storage, tx_memory_pool* _tx_pool, boost::filesystem::path& output_dir, uint64_t requested_block_height) +#else +bool BlockchainExport::store_blockchain_raw(Blockchain* _blockchain_storage, tx_memory_pool* _tx_pool, boost::filesystem::path& output_dir, uint64_t requested_block_height) +#endif +{ + uint64_t block_height = 0; + m_blockchain_storage = _blockchain_storage; + m_tx_pool = _tx_pool; + uint64_t progress_interval = 100; + std::string refresh_string = "\r \r"; + LOG_PRINT_L0("Storing blocks raw data..."); + if (!BlockchainExport::open(output_dir)) + { + LOG_PRINT_RED_L0("failed to open raw file for write"); + return false; + } + block b; + LOG_PRINT_L0("source blockchain height: " << m_blockchain_storage->get_current_blockchain_height()); + LOG_PRINT_L0("requested block height: " << requested_block_height); + if ((requested_block_height > 0) && (requested_block_height < m_blockchain_storage->get_current_blockchain_height())) + block_height = requested_block_height; + else + { + block_height = m_blockchain_storage->get_current_blockchain_height(); + LOG_PRINT_L0("Using block height of source blockchain: " << block_height); + } + for (height=0; height < block_height; ++height) + { + crypto::hash hash = m_blockchain_storage->get_block_id_by_height(height); + m_blockchain_storage->get_block_by_hash(hash, b); + write_block(b); + if (height % NUM_BLOCKS_PER_CHUNK == 0) { + flush_chunk(); + } + if (height % progress_interval == 0) { + std::cout << refresh_string; + std::cout << "height " << height << "/" << block_height << std::flush; + } + } + if (height % NUM_BLOCKS_PER_CHUNK != 0) + { + flush_chunk(); + } + std::cout << refresh_string; + std::cout << "height " << height << "/" << block_height << ENDL; + + LOG_PRINT_L0("longest chunk was " << max_chunk << " bytes"); + return BlockchainExport::close(); +} + + +int main(int argc, char* argv[]) +{ + uint32_t log_level = 0; + uint64_t block_height = 0; + std::string import_filename = BLOCKCHAIN_RAW; + + boost::filesystem::path default_data_path {tools::get_default_data_dir()}; + boost::filesystem::path default_testnet_data_path {default_data_path / "testnet"}; + + po::options_description desc_cmd_only("Command line options"); + po::options_description desc_cmd_sett("Command line options and settings options"); + const command_line::arg_descriptor<uint32_t> arg_log_level = {"log-level", "", log_level}; + const command_line::arg_descriptor<uint64_t> arg_block_height = {"block-number", "", block_height}; + const command_line::arg_descriptor<bool> arg_testnet_on = { + "testnet" + , "Run on testnet." + , false + }; + + + command_line::add_arg(desc_cmd_sett, command_line::arg_data_dir, default_data_path.string()); + command_line::add_arg(desc_cmd_sett, command_line::arg_testnet_data_dir, default_testnet_data_path.string()); + command_line::add_arg(desc_cmd_sett, arg_log_level); + command_line::add_arg(desc_cmd_sett, arg_block_height); + command_line::add_arg(desc_cmd_sett, arg_testnet_on); + + command_line::add_arg(desc_cmd_only, command_line::arg_help); + + po::options_description desc_options("Allowed options"); + desc_options.add(desc_cmd_only).add(desc_cmd_sett); + + po::variables_map vm; + bool r = command_line::handle_error_helper(desc_options, [&]() + { + po::store(po::parse_command_line(argc, argv, desc_options), vm); + po::notify(vm); + return true; + }); + if (! r) + return 1; + + if (command_line::get_arg(vm, command_line::arg_help)) + { + std::cout << CRYPTONOTE_NAME << " v" << MONERO_VERSION_FULL << ENDL << ENDL; + std::cout << desc_options << std::endl; + return 1; + } + + log_level = command_line::get_arg(vm, arg_log_level); + block_height = command_line::get_arg(vm, arg_block_height); + + log_space::get_set_log_detalisation_level(true, log_level); + log_space::log_singletone::add_logger(LOGGER_CONSOLE, NULL, NULL); + LOG_PRINT_L0("Starting..."); + LOG_PRINT_L0("Setting log level = " << log_level); + + bool opt_testnet = command_line::get_arg(vm, arg_testnet_on); + + std::string m_config_folder; + + auto data_dir_arg = opt_testnet ? command_line::arg_testnet_data_dir : command_line::arg_data_dir; + m_config_folder = command_line::get_arg(vm, data_dir_arg); + boost::filesystem::path output_dir {m_config_folder}; + output_dir /= "export"; + LOG_PRINT_L0("Export directory: " << output_dir.string()); + + // If we wanted to use the memory pool, we would set up a fake_core. + +#if SOURCE_DB == DB_MEMORY + // blockchain_storage* core_storage = NULL; + // tx_memory_pool m_mempool(*core_storage); // is this fake anyway? just passing in NULL! so m_mempool can't be used anyway, right? + // core_storage = new blockchain_storage(&m_mempool); + + blockchain_storage* core_storage = new blockchain_storage(NULL); + LOG_PRINT_L0("Initializing source blockchain (in-memory database)"); + r = core_storage->init(m_config_folder, opt_testnet); +#else + // Use Blockchain instead of lower-level BlockchainDB for two reasons: + // 1. Blockchain has the init() method for easy setup + // 2. exporter needs to use get_current_blockchain_height(), get_block_id_by_height(), get_block_by_hash() + // + // cannot match blockchain_storage setup above with just one line, + // e.g. + // Blockchain* core_storage = new Blockchain(NULL); + // because unlike blockchain_storage constructor, which takes a pointer to + // tx_memory_pool, Blockchain's constructor takes tx_memory_pool object. + LOG_PRINT_L0("Initializing source blockchain (BlockchainDB)"); + Blockchain* core_storage = NULL; + tx_memory_pool m_mempool(*core_storage); + core_storage = new Blockchain(m_mempool); + r = core_storage->init(m_config_folder, opt_testnet); +#endif + + CHECK_AND_ASSERT_MES(r, false, "Failed to initialize source blockchain storage"); + LOG_PRINT_L0("Source blockchain storage initialized OK"); + LOG_PRINT_L0("Exporting blockchain raw data..."); + + BlockchainExport be; + r = be.store_blockchain_raw(core_storage, NULL, output_dir, block_height); + CHECK_AND_ASSERT_MES(r, false, "Failed to export blockchain raw data"); + LOG_PRINT_L0("Blockchain raw data exported OK"); +} diff --git a/src/blockchain_converter/blockchain_export.h b/src/blockchain_converter/blockchain_export.h new file mode 100644 index 000000000..43e25c039 --- /dev/null +++ b/src/blockchain_converter/blockchain_export.h @@ -0,0 +1,87 @@ +// Copyright (c) 2014-2015, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include <boost/archive/binary_oarchive.hpp> +#include <boost/iostreams/stream_buffer.hpp> +#include <boost/iostreams/stream.hpp> +#include <boost/iostreams/device/back_inserter.hpp> +#include "cryptonote_core/cryptonote_basic.h" +#include "cryptonote_core/blockchain_storage.h" +#include "cryptonote_core/blockchain.h" +#include "blockchain_db/blockchain_db.h" +#include "blockchain_db/lmdb/db_lmdb.h" + +// CONFIG: choose one of the three #define's +// +// DB_MEMORY is a sensible default for users migrating to LMDB, as it allows +// the exporter to use the in-memory blockchain while the other binaries +// work with LMDB, without recompiling anything. +// +#define SOURCE_DB DB_MEMORY +// #define SOURCE_DB DB_LMDB +// to use global compile-time setting (DB_MEMORY or DB_LMDB): +// #define SOURCE_DB BLOCKCHAIN_DB + +using namespace cryptonote; + +class BlockchainExport +{ +public: +#if SOURCE_DB == DB_MEMORY + bool store_blockchain_raw(cryptonote::blockchain_storage* cs, cryptonote::tx_memory_pool* txp, + boost::filesystem::path& output_dir, uint64_t use_block_height=0); +#else + bool store_blockchain_raw(cryptonote::Blockchain* cs, cryptonote::tx_memory_pool* txp, + boost::filesystem::path& output_dir, uint64_t use_block_height=0); +#endif + +protected: +#if SOURCE_DB == DB_MEMORY + blockchain_storage* m_blockchain_storage; +#else + Blockchain* m_blockchain_storage; +#endif + + tx_memory_pool* m_tx_pool; + typedef std::vector<char> buffer_type; + std::ofstream * m_raw_data_file; + boost::archive::binary_oarchive * m_raw_archive; + buffer_type m_buffer; + boost::iostreams::stream<boost::iostreams::back_insert_device<buffer_type>>* m_output_stream; + + // open export file for write + bool open(const boost::filesystem::path& dir_path); + bool close(); + void write_block(block& block); + void serialize_block_to_text_buffer(const block& block); + void buffer_serialize_tx(const transaction& tx); + void buffer_write_num_txs(const std::list<transaction> txs); + void flush_chunk(); +}; diff --git a/src/blockchain_converter/blockchain_import.cpp b/src/blockchain_converter/blockchain_import.cpp new file mode 100644 index 000000000..46d1ac189 --- /dev/null +++ b/src/blockchain_converter/blockchain_import.cpp @@ -0,0 +1,754 @@ +// Copyright (c) 2014-2015, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <atomic> +#include <cstdio> +#include <algorithm> +#include <fstream> + +#include <boost/filesystem.hpp> +#include <boost/iostreams/stream.hpp> +#include <boost/archive/binary_iarchive.hpp> +#include "cryptonote_core/cryptonote_basic.h" +#include "cryptonote_core/cryptonote_format_utils.h" +#include "cryptonote_core/cryptonote_boost_serialization.h" +#include "serialization/json_utils.h" // dump_json() +#include "include_base_utils.h" +#include "common/command_line.h" +#include "version.h" + +#include <lmdb.h> // for db flag arguments + +#include "import.h" +#include "fake_core.h" + +// CONFIG +static bool opt_batch = true; +static bool opt_verify = true; // use add_new_block, which does verification before calling add_block +static bool opt_resume = true; +static bool opt_testnet = true; + +// number of blocks per batch transaction +// adjustable through command-line argument according to available RAM +static uint64_t db_batch_size = 20000; + +static std::string refresh_string = "\r \r"; + + +namespace po = boost::program_options; + +using namespace cryptonote; +using namespace epee; + + +int parse_db_arguments(const std::string& db_arg_str, std::string& db_engine, int& mdb_flags) +{ + std::vector<std::string> db_args; + boost::split(db_args, db_arg_str, boost::is_any_of("#")); + db_engine = db_args.front(); + boost::algorithm::trim(db_engine); + + if (db_args.size() == 1) + { + return 0; + } + else if (db_args.size() > 2) + { + std::cerr << "unrecognized database argument format: " << db_arg_str << ENDL; + return 1; + } + + std::string db_arg_str2 = db_args[1]; + boost::split(db_args, db_arg_str2, boost::is_any_of(",")); + for (auto& it : db_args) + { + boost::algorithm::trim(it); + if (it.empty()) + continue; + LOG_PRINT_L1("LMDB flag: " << it); + if (it == "nosync") + { + mdb_flags |= MDB_NOSYNC; + } + else if (it == "nometasync") + { + mdb_flags |= MDB_NOMETASYNC; + } + else if (it == "writemap") + { + mdb_flags |= MDB_WRITEMAP; + } + else if (it == "mapasync") + { + mdb_flags |= MDB_MAPASYNC; + } + else + { + std::cerr << "unrecognized database flag: " << it << ENDL; + return 1; + } + } + return 0; +} + + +int count_blocks(std::string& import_file_path) +{ + boost::filesystem::path raw_file_path(import_file_path); + boost::system::error_code ec; + if (!boost::filesystem::exists(raw_file_path, ec)) + { + LOG_PRINT_L0("import file not found: " << raw_file_path); + throw std::runtime_error("Aborting"); + } + std::ifstream import_file; + import_file.open(import_file_path, std::ios_base::binary | std::ifstream::in); + + uint64_t h = 0; + if (import_file.fail()) + { + LOG_PRINT_L0("import_file.open() fail"); + throw std::runtime_error("Aborting"); + } + LOG_PRINT_L0("Scanning blockchain from import file..."); + char buffer1[STR_LENGTH_OF_INT + 1]; + block b; + transaction tx; + bool quit = false; + uint64_t bytes_read = 0; + int progress_interval = 10; + + while (! quit) + { + int chunk_size; + import_file.read(buffer1, STR_LENGTH_OF_INT); + if (!import_file) { + std::cout << refresh_string; + LOG_PRINT_L1("End of import file reached"); + quit = true; + break; + } + h += NUM_BLOCKS_PER_CHUNK; + if (h % progress_interval == 0) + { + std::cout << refresh_string << "block height: " << h << + std::flush; + } + bytes_read += STR_LENGTH_OF_INT; + buffer1[STR_LENGTH_OF_INT] = '\0'; + chunk_size = atoi(buffer1); + if (chunk_size > BUFFER_SIZE) + { + std::cout << refresh_string; + LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > BUFFER_SIZE " << BUFFER_SIZE + << " height: " << h); + throw std::runtime_error("Aborting: chunk size exceeds buffer size"); + } + if (chunk_size > 100000) + { + std::cout << refresh_string; + LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > 100000" << " height: " + << h); + } + else if (chunk_size <= 0) { + std::cout << refresh_string; + LOG_PRINT_L0("ERROR: chunk_size " << chunk_size << " <= 0" << " height: " << h); + throw std::runtime_error("Aborting"); + } + // skip to next expected block size value + import_file.seekg(chunk_size, std::ios_base::cur); + if (! import_file) { + std::cout << refresh_string; + LOG_PRINT_L0("ERROR: unexpected end of import file: bytes read before error: " + << import_file.gcount() << " of chunk_size " << chunk_size); + throw std::runtime_error("Aborting"); + } + bytes_read += chunk_size; + std::cout << refresh_string; + + LOG_PRINT_L3("Total bytes scanned: " << bytes_read); + } + + import_file.close(); + + std::cout << ENDL; + std::cout << "Done scanning import file" << ENDL; + std::cout << "Total bytes scanned: " << bytes_read << ENDL; + std::cout << "Height: " << h << ENDL; + + return h; +} + +template <typename FakeCore> +int import_from_file(FakeCore& simple_core, std::string& import_file_path) +{ +#if !defined(BLOCKCHAIN_DB) + static_assert(std::is_same<fake_core_memory, FakeCore>::value || std::is_same<fake_core_lmdb, FakeCore>::value, + "FakeCore constraint error"); +#endif +#if !defined(BLOCKCHAIN_DB) || (BLOCKCHAIN_DB == DB_LMDB) + if (std::is_same<fake_core_lmdb, FakeCore>::value) + { + // Reset stats, in case we're using newly created db, accumulating stats + // from addition of genesis block. + // This aligns internal db counts with importer counts. + simple_core.m_storage.get_db().reset_stats(); + } +#endif + boost::filesystem::path raw_file_path(import_file_path); + boost::system::error_code ec; + if (!boost::filesystem::exists(raw_file_path, ec)) + { + LOG_PRINT_L0("import file not found: " << raw_file_path); + return false; + } + + uint64_t source_height = count_blocks(import_file_path); + LOG_PRINT_L0("import file blockchain height: " << source_height); + + std::ifstream import_file; + import_file.open(import_file_path, std::ios_base::binary | std::ifstream::in); + + uint64_t h = 0; + if (import_file.fail()) + { + LOG_PRINT_L0("import_file.open() fail"); + return false; + } + char buffer1[STR_LENGTH_OF_INT + 1]; + char buffer_block[BUFFER_SIZE]; + block b; + transaction tx; + int quit = 0; + uint64_t bytes_read = 0; + + uint64_t start_height = 1; + if (opt_resume) + start_height = simple_core.m_storage.get_current_blockchain_height(); + + // Note that a new blockchain will start with a height of 1 (block number 0) + // due to genesis block being added at initialization. + + // CONFIG + // TODO: can expand on this, e.g. with --block-number option + uint64_t stop_height = source_height; + + // These are what we'll try to use, and they don't have to be a determination + // from source and destination blockchains, but those are the current + // defaults. + LOG_PRINT_L0("start height: " << start_height << " stop height: " << + stop_height); + + bool use_batch = false; + if (opt_batch) + { + if (simple_core.support_batch) + use_batch = true; + else + LOG_PRINT_L0("WARNING: batch transactions enabled but unsupported or unnecessary for this database engine - ignoring"); + } + + if (use_batch) + simple_core.batch_start(); + + LOG_PRINT_L0("Reading blockchain from import file..."); + std::cout << ENDL; + + // Within the loop, we skip to start_height before we start adding. + // TODO: Not a bottleneck, but we can use what's done in count_blocks() and + // only do the chunk size reads, skipping the chunk content reads until we're + // at start_height. + while (! quit) + { + int chunk_size; + import_file.read(buffer1, STR_LENGTH_OF_INT); + if (! import_file) { + std::cout << refresh_string; + LOG_PRINT_L0("End of import file reached"); + quit = 1; + break; + } + bytes_read += STR_LENGTH_OF_INT; + buffer1[STR_LENGTH_OF_INT] = '\0'; + chunk_size = atoi(buffer1); + if (chunk_size > BUFFER_SIZE) + { + LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > BUFFER_SIZE " << BUFFER_SIZE); + throw std::runtime_error("Aborting: chunk size exceeds buffer size"); + } + if (chunk_size > 100000) + { + LOG_PRINT_L0("WARNING: chunk_size " << chunk_size << " > 100000"); + } + else if (chunk_size < 0) { + LOG_PRINT_L0("ERROR: chunk_size " << chunk_size << " < 0"); + return 2; + } + import_file.read(buffer_block, chunk_size); + if (! import_file) { + LOG_PRINT_L0("ERROR: unexpected end of import file: bytes read before error: " + << import_file.gcount() << " of chunk_size " << chunk_size); + return 2; + } + bytes_read += chunk_size; + LOG_PRINT_L3("Total bytes read: " << bytes_read); + + if (h + NUM_BLOCKS_PER_CHUNK < start_height + 1) + { + h += NUM_BLOCKS_PER_CHUNK; + continue; + } + if (h > stop_height) + { + LOG_PRINT_L0("Specified height reached - stopping. height: " << h << " block: " << h-1); + quit = 1; + break; + } + + try + { + boost::iostreams::basic_array_source<char> device(buffer_block, chunk_size); + boost::iostreams::stream<boost::iostreams::basic_array_source<char>> s(device); + boost::archive::binary_iarchive a(s); + + int display_interval = 1000; + int progress_interval = 10; + for (int chunk_ind = 0; chunk_ind < NUM_BLOCKS_PER_CHUNK; chunk_ind++) + { + h++; + if (h % display_interval == 0) + { + std::cout << refresh_string; + LOG_PRINT_L0("loading block height " << h); + } + else + { + LOG_PRINT_L3("loading block height " << h); + } + try { + a >> b; + } + catch (const std::exception& e) + { + std::cout << refresh_string; + LOG_PRINT_RED_L0("exception while de-archiving block, height=" << h); + quit = 1; + break; + } + LOG_PRINT_L2("block prev_id: " << b.prev_id << ENDL); + + if (h % progress_interval == 0) + { + std::cout << refresh_string << "block " << h-1 + << std::flush; + } + + std::vector<transaction> txs; + + int num_txs; + try + { + a >> num_txs; + } + catch (const std::exception& e) + { + std::cout << refresh_string; + LOG_PRINT_RED_L0("exception while de-archiving tx-num, height=" << h); + quit = 1; + break; + } + for(int tx_num = 1; tx_num <= num_txs; tx_num++) + { + try { + a >> tx; + } + catch (const std::exception& e) + { + LOG_PRINT_RED_L0("exception while de-archiving tx, height=" << h <<", tx_num=" << tx_num); + quit = 1; + break; + } + // if (tx_num == 1) { + // std::cout << "coinbase transaction" << ENDL; + // } + // crypto::hash hsh = null_hash; + // size_t blob_size = 0; + // NOTE: all tx hashes except for coinbase tx are available in the block data + // get_transaction_hash(tx, hsh, blob_size); + // LOG_PRINT_L0("tx " << tx_num << " " << hsh << " : " << ENDL); + // LOG_PRINT_L0(obj_to_json_str(tx) << ENDL); + + // add blocks with verification. + // for Blockchain and blockchain_storage add_new_block(). + if (opt_verify) + { + if (tx_num == 1) { + continue; // coinbase transaction. no need to insert to tx_pool. + } + // crypto::hash hsh = null_hash; + // size_t blob_size = 0; + // get_transaction_hash(tx, hsh, blob_size); + tx_verification_context tvc = AUTO_VAL_INIT(tvc); + bool r = true; + r = simple_core.m_pool.add_tx(tx, tvc, true); + if (!r) + { + LOG_PRINT_RED_L0("failed to add transaction to transaction pool, height=" << h <<", tx_num=" << tx_num); + quit = 1; + break; + } + } + else + { + // for add_block() method, without (much) processing. + // don't add coinbase transaction to txs. + // + // because add_block() calls + // add_transaction(blk_hash, blk.miner_tx) first, and + // then a for loop for the transactions in txs. + if (tx_num > 1) + { + txs.push_back(tx); + } + } + } + + if (opt_verify) + { + block_verification_context bvc = boost::value_initialized<block_verification_context>(); + simple_core.m_storage.add_new_block(b, bvc); + + if (bvc.m_verifivation_failed) + { + LOG_PRINT_L0("Failed to add block to blockchain, verification failed, height = " << h); + LOG_PRINT_L0("skipping rest of import file"); + // ok to commit previously batched data because it failed only in + // verification of potential new block with nothing added to batch + // yet + quit = 1; + break; + } + if (! bvc.m_added_to_main_chain) + { + LOG_PRINT_L0("Failed to add block to blockchain, height = " << h); + LOG_PRINT_L0("skipping rest of import file"); + // make sure we don't commit partial block data + quit = 2; + break; + } + } + else + { + size_t block_size; + difficulty_type cumulative_difficulty; + uint64_t coins_generated; + + a >> block_size; + a >> cumulative_difficulty; + a >> coins_generated; + + std::cout << refresh_string; + LOG_PRINT_L2("block_size: " << block_size); + LOG_PRINT_L2("cumulative_difficulty: " << cumulative_difficulty); + LOG_PRINT_L2("coins_generated: " << coins_generated); + + try + { + simple_core.add_block(b, block_size, cumulative_difficulty, coins_generated, txs); + } + catch (const std::exception& e) + { + std::cout << refresh_string; + LOG_PRINT_RED_L0("Error adding block to blockchain: " << e.what()); + quit = 2; // make sure we don't commit partial block data + break; + } + } + + if (use_batch) + { + if (h % db_batch_size == 0) + { + std::cout << refresh_string; + std::cout << ENDL << "[- batch commit at height " << h << " -]" << ENDL; + simple_core.batch_stop(); + simple_core.batch_start(); + std::cout << ENDL; +#if !defined(BLOCKCHAIN_DB) || (BLOCKCHAIN_DB == DB_LMDB) + simple_core.m_storage.get_db().show_stats(); +#endif + } + } + } + } + catch (const std::exception& e) + { + std::cout << refresh_string; + LOG_PRINT_RED_L0("exception while reading from import file, height=" << h); + return 2; + } + } // while + + import_file.close(); + + if (use_batch) + { + if (quit > 1) + { + // There was an error, so don't commit pending data. + // Destructor will abort write txn. + } + else + { + simple_core.batch_stop(); + } +#if !defined(BLOCKCHAIN_DB) || (BLOCKCHAIN_DB == DB_LMDB) + simple_core.m_storage.get_db().show_stats(); +#endif + if (h > 0) + LOG_PRINT_L0("Finished at height: " << h << " block: " << h-1); + } + std::cout << ENDL; + return 0; +} + +int main(int argc, char* argv[]) +{ + std::string import_filename = BLOCKCHAIN_RAW; +#if defined(BLOCKCHAIN_DB) && (BLOCKCHAIN_DB == DB_MEMORY) + std::string default_db_engine = "memory"; +#else + std::string default_db_engine = "lmdb"; +#endif + + uint32_t log_level = LOG_LEVEL_0; + std::string dirname; + std::string db_arg_str; + + boost::filesystem::path default_data_path {tools::get_default_data_dir()}; + boost::filesystem::path default_testnet_data_path {default_data_path / "testnet"}; + + po::options_description desc_cmd_only("Command line options"); + po::options_description desc_cmd_sett("Command line options and settings options"); + const command_line::arg_descriptor<uint32_t> arg_log_level = {"log-level", "", log_level}; + const command_line::arg_descriptor<uint64_t> arg_batch_size = {"batch-size", "", db_batch_size}; + const command_line::arg_descriptor<bool> arg_testnet_on = { + "testnet" + , "Run on testnet." + , false + }; + const command_line::arg_descriptor<bool> arg_count_blocks = { + "count-blocks" + , "Count blocks in import file and exit" + , false + }; + const command_line::arg_descriptor<std::string> arg_database = { + "database", "available: memory, lmdb" + , default_db_engine + }; + const command_line::arg_descriptor<bool> arg_verify = {"verify", + "Verify blocks and transactions during import", true}; + const command_line::arg_descriptor<bool> arg_batch = {"batch", + "Batch transactions for faster import", true}; + const command_line::arg_descriptor<bool> arg_resume = {"resume", + "Resume from current height if output database already exists", true}; + + command_line::add_arg(desc_cmd_sett, command_line::arg_data_dir, default_data_path.string()); + command_line::add_arg(desc_cmd_sett, command_line::arg_testnet_data_dir, default_testnet_data_path.string()); + command_line::add_arg(desc_cmd_sett, arg_log_level); + command_line::add_arg(desc_cmd_sett, arg_batch_size); + command_line::add_arg(desc_cmd_sett, arg_testnet_on); + command_line::add_arg(desc_cmd_sett, arg_database); + + command_line::add_arg(desc_cmd_only, arg_count_blocks); + command_line::add_arg(desc_cmd_only, command_line::arg_help); + + // call add_options() directly for these arguments since + // command_line helpers support only boolean switch, not boolean argument + desc_cmd_sett.add_options() + (arg_verify.name, make_semantic(arg_verify), arg_verify.description) + (arg_batch.name, make_semantic(arg_batch), arg_batch.description) + (arg_resume.name, make_semantic(arg_resume), arg_resume.description) + ; + + po::options_description desc_options("Allowed options"); + desc_options.add(desc_cmd_only).add(desc_cmd_sett); + + po::variables_map vm; + bool r = command_line::handle_error_helper(desc_options, [&]() + { + po::store(po::parse_command_line(argc, argv, desc_options), vm); + po::notify(vm); + return true; + }); + if (! r) + return 1; + + log_level = command_line::get_arg(vm, arg_log_level); + opt_verify = command_line::get_arg(vm, arg_verify); + opt_batch = command_line::get_arg(vm, arg_batch); + opt_resume = command_line::get_arg(vm, arg_resume); + db_batch_size = command_line::get_arg(vm, arg_batch_size); + + if (command_line::get_arg(vm, command_line::arg_help)) + { + std::cout << CRYPTONOTE_NAME << " v" << MONERO_VERSION_FULL << ENDL << ENDL; + std::cout << desc_options << std::endl; + return 1; + } + + if (! opt_batch && ! vm["batch-size"].defaulted()) + { + std::cerr << "Error: batch-size set, but batch option not enabled" << ENDL; + exit(1); + } + if (! db_batch_size) + { + std::cerr << "Error: batch-size must be > 0" << ENDL; + exit(1); + } + + std::vector<std::string> db_engines {"memory", "lmdb"}; + + opt_testnet = command_line::get_arg(vm, arg_testnet_on); + auto data_dir_arg = opt_testnet ? command_line::arg_testnet_data_dir : command_line::arg_data_dir; + dirname = command_line::get_arg(vm, data_dir_arg); + db_arg_str = command_line::get_arg(vm, arg_database); + + log_space::get_set_log_detalisation_level(true, log_level); + log_space::log_singletone::add_logger(LOGGER_CONSOLE, NULL, NULL); + LOG_PRINT_L0("Starting..."); + LOG_PRINT_L0("Setting log level = " << log_level); + + boost::filesystem::path file_path {dirname}; + std::string import_file_path; + + import_file_path = (file_path / "export" / import_filename).string(); + + if (command_line::has_arg(vm, arg_count_blocks)) + { + count_blocks(import_file_path); + exit(0); + } + + + std::string db_engine; + int mdb_flags = 0; + int res = 0; + res = parse_db_arguments(db_arg_str, db_engine, mdb_flags); + if (res) + { + std::cerr << "Error parsing database argument(s)" << ENDL; + exit(1); + } + + if (std::find(db_engines.begin(), db_engines.end(), db_engine) == db_engines.end()) + { + std::cerr << "Invalid database engine: " << db_engine << std::endl; + exit(1); + } + + LOG_PRINT_L0("database: " << db_engine); + LOG_PRINT_L0("verify: " << std::boolalpha << opt_verify << std::noboolalpha); + if (opt_batch) + { + LOG_PRINT_L0("batch: " << std::boolalpha << opt_batch << std::noboolalpha + << " batch size: " << db_batch_size); + } + else + { + LOG_PRINT_L0("batch: " << std::boolalpha << opt_batch << std::noboolalpha); + } + LOG_PRINT_L0("resume: " << std::boolalpha << opt_resume << std::noboolalpha); + LOG_PRINT_L0("testnet: " << std::boolalpha << opt_testnet << std::noboolalpha); + + std::cout << "import file path: " << import_file_path << ENDL; + std::cout << "database path: " << file_path.string() << ENDL; + + try + { + + // fake_core needed for verification to work when enabled. + // + // NOTE: don't need fake_core method of doing things when we're going to call + // BlockchainDB add_block() directly and have available the 3 block + // properties to do so. Both ways work, but fake core isn't necessary in that + // circumstance. + + // for multi_db_runtime: +#if !defined(BLOCKCHAIN_DB) + if (db_engine == "lmdb") + { + fake_core_lmdb simple_core(dirname, opt_testnet, opt_batch, mdb_flags); + import_from_file(simple_core, import_file_path); + } + else if (db_engine == "memory") + { + fake_core_memory simple_core(dirname, opt_testnet); + import_from_file(simple_core, import_file_path); + } + else + { + std::cerr << "database engine unrecognized" << ENDL; + exit(1); + } + + // for multi_db_compile: +#else + if (db_engine != default_db_engine) + { + std::cerr << "Invalid database engine for compiled version: " << db_engine << std::endl; + exit(1); + } +#if BLOCKCHAIN_DB == DB_LMDB + fake_core_lmdb simple_core(dirname, opt_testnet, opt_batch, mdb_flags); +#else + fake_core_memory simple_core(dirname, opt_testnet); +#endif + + import_from_file(simple_core, import_file_path); +#endif + + } + catch (const DB_ERROR& e) + { + std::cout << std::string("Error loading blockchain db: ") + e.what() + " -- shutting down now" << ENDL; + exit(1); + } + + // destructors called at exit: + // + // ensure db closed + // - transactions properly checked and handled + // - disk sync if needed + // + // fake_core object's destructor is called when it goes out of scope. For an + // LMDB fake_core, it calls Blockchain::deinit() on its object, which in turn + // calls delete on its BlockchainDB derived class' object, which closes its + // files. +} diff --git a/src/blockchain_converter/fake_core.h b/src/blockchain_converter/fake_core.h new file mode 100644 index 000000000..175cb4660 --- /dev/null +++ b/src/blockchain_converter/fake_core.h @@ -0,0 +1,143 @@ +// Copyright (c) 2014-2015, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include <boost/filesystem.hpp> +#include "cryptonote_core/blockchain.h" // BlockchainDB and LMDB +#include "cryptonote_core/blockchain_storage.h" // in-memory DB + +using namespace cryptonote; + + +#if !defined(BLOCKCHAIN_DB) || BLOCKCHAIN_DB == DB_LMDB + +struct fake_core_lmdb +{ + Blockchain m_storage; + tx_memory_pool m_pool; + bool support_batch; + bool support_add_block; + + // for multi_db_runtime: +#if !defined(BLOCKCHAIN_DB) + fake_core_lmdb(const boost::filesystem::path &path, const bool use_testnet=false, const bool do_batch=true, const int mdb_flags=0) : m_pool(&m_storage), m_storage(m_pool) + // for multi_db_compile: +#else + fake_core_lmdb(const boost::filesystem::path &path, const bool use_testnet=false, const bool do_batch=true, const int mdb_flags=0) : m_pool(m_storage), m_storage(m_pool) +#endif + { + m_pool.init(path.string()); + m_storage.init(path.string(), use_testnet, mdb_flags); + if (do_batch) + m_storage.get_db().set_batch_transactions(do_batch); + support_batch = true; + support_add_block = true; + } + ~fake_core_lmdb() + { + m_storage.deinit(); + } + + uint64_t add_block(const block& blk + , const size_t& block_size + , const difficulty_type& cumulative_difficulty + , const uint64_t& coins_generated + , const std::vector<transaction>& txs + ) + { + return m_storage.get_db().add_block(blk, block_size, cumulative_difficulty, coins_generated, txs); + } + + void batch_start() + { + m_storage.get_db().batch_start(); + } + + void batch_stop() + { + m_storage.get_db().batch_stop(); + } + +}; +#endif + +#if !defined(BLOCKCHAIN_DB) || BLOCKCHAIN_DB == DB_MEMORY + +struct fake_core_memory +{ + blockchain_storage m_storage; + tx_memory_pool m_pool; + bool support_batch; + bool support_add_block; + + // for multi_db_runtime: +#if !defined(BLOCKCHAIN_DB) + fake_core_memory(const boost::filesystem::path &path, const bool use_testnet=false) : m_pool(&m_storage), m_storage(m_pool) +#else + // for multi_db_compile: + fake_core_memory(const boost::filesystem::path &path, const bool use_testnet=false) : m_pool(m_storage), m_storage(&m_pool) +#endif + { + m_pool.init(path.string()); + m_storage.init(path.string(), use_testnet); + support_batch = false; + support_add_block = false; + } + ~fake_core_memory() + { + LOG_PRINT_L3("fake_core_memory() destructor called - want to see it ripple down"); + m_storage.deinit(); + } + + uint64_t add_block(const block& blk + , const size_t& block_size + , const difficulty_type& cumulative_difficulty + , const uint64_t& coins_generated + , const std::vector<transaction>& txs + ) + { + // TODO: + // would need to refactor handle_block_to_main_chain() to have a direct add_block() method like Blockchain class + throw std::runtime_error("direct add_block() method not implemented for in-memory db"); + return 2; + } + + void batch_start() + { + LOG_PRINT_L0("WARNING: [batch_start] opt_batch set, but this database doesn't support/need transactions - ignoring"); + } + + void batch_stop() + { + LOG_PRINT_L0("WARNING: [batch_stop] opt_batch set, but this database doesn't support/need transactions - ignoring"); + } + +}; + +#endif diff --git a/src/blockchain_converter/import.h b/src/blockchain_converter/import.h new file mode 100644 index 000000000..632b4c0d9 --- /dev/null +++ b/src/blockchain_converter/import.h @@ -0,0 +1,37 @@ +// Copyright (c) 2014-2015, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +// TODO: bounds checking is done before writing to buffer, but buffer size +// should be a sensible maximum +#define BUFFER_SIZE 1000000 +#define NUM_BLOCKS_PER_CHUNK 1 +#define STR_LENGTH_OF_INT 9 +#define STR_FORMAT_OF_INT "%09d" +#define BLOCKCHAIN_RAW "blockchain.raw" diff --git a/src/blockchain_db/CMakeLists.txt b/src/blockchain_db/CMakeLists.txt new file mode 100644 index 000000000..84b2d6a74 --- /dev/null +++ b/src/blockchain_db/CMakeLists.txt @@ -0,0 +1,60 @@ +# Copyright (c) 2014-2015, The Monero Project +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are +# permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, this list +# of conditions and the following disclaimer in the documentation and/or other +# materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors may be +# used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set(blockchain_db_sources + blockchain_db.cpp + lmdb/db_lmdb.cpp + ) + +set(blockchain_db_headers) + +set(blockchain_db_private_headers + blockchain_db.h + lmdb/db_lmdb.h + ) + +bitmonero_private_headers(blockchain_db + ${crypto_private_headers}) +bitmonero_add_library(blockchain_db + ${blockchain_db_sources} + ${blockchain_db_headers} + ${blockchain_db_private_headers}) +target_link_libraries(blockchain_db + LINK_PUBLIC + common + crypto + cryptonote_core + ${Boost_DATE_TIME_LIBRARY} + ${Boost_PROGRAM_OPTIONS_LIBRARY} + ${Boost_SERIALIZATION_LIBRARY} + LINK_PRIVATE + ${Boost_FILESYSTEM_LIBRARY} + ${Boost_SYSTEM_LIBRARY} + ${Boost_THREAD_LIBRARY} + ${LMDB_LIBRARY} + ${EXTRA_LIBRARIES}) diff --git a/src/blockchain_db/blockchain_db.cpp b/src/blockchain_db/blockchain_db.cpp new file mode 100644 index 000000000..8b08d3805 --- /dev/null +++ b/src/blockchain_db/blockchain_db.cpp @@ -0,0 +1,180 @@ +// Copyright (c) 2014, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "blockchain_db.h" +#include "cryptonote_core/cryptonote_format_utils.h" +#include "profile_tools.h" + +using epee::string_tools::pod_to_hex; + +namespace cryptonote +{ + +void BlockchainDB::pop_block() +{ + block blk; + std::vector<transaction> txs; + pop_block(blk, txs); +} + +void BlockchainDB::add_transaction(const crypto::hash& blk_hash, const transaction& tx, const crypto::hash* tx_hash_ptr) +{ + crypto::hash tx_hash; + if (!tx_hash_ptr) + { + // should only need to compute hash for miner transactions + tx_hash = get_transaction_hash(tx); + LOG_PRINT_L3("null tx_hash_ptr - needed to compute: " << tx_hash); + } + else + { + tx_hash = *tx_hash_ptr; + } + + add_transaction_data(blk_hash, tx, tx_hash); + + // iterate tx.vout using indices instead of C++11 foreach syntax because + // we need the index + if (tx.vout.size() != 0) // it may be technically possible for a tx to have no outputs + { + for (uint64_t i = 0; i < tx.vout.size(); ++i) + { + add_output(tx_hash, tx.vout[i], i); + } + + for (const txin_v& tx_input : tx.vin) + { + if (tx_input.type() == typeid(txin_to_key)) + { + add_spent_key(boost::get<txin_to_key>(tx_input).k_image); + } + } + } +} + +uint64_t BlockchainDB::add_block( const block& blk + , const size_t& block_size + , const difficulty_type& cumulative_difficulty + , const uint64_t& coins_generated + , const std::vector<transaction>& txs + ) +{ + TIME_MEASURE_START(time1); + crypto::hash blk_hash = get_block_hash(blk); + TIME_MEASURE_FINISH(time1); + time_blk_hash += time1; + + // call out to subclass implementation to add the block & metadata + time1 = epee::misc_utils::get_tick_count(); + add_block(blk, block_size, cumulative_difficulty, coins_generated, blk_hash); + TIME_MEASURE_FINISH(time1); + time_add_block1 += time1; + + // call out to add the transactions + + time1 = epee::misc_utils::get_tick_count(); + add_transaction(blk_hash, blk.miner_tx); + int tx_i = 0; + crypto::hash tx_hash = null_hash; + for (const transaction& tx : txs) + { + tx_hash = blk.tx_hashes[tx_i]; + add_transaction(blk_hash, tx, &tx_hash); + ++tx_i; + } + TIME_MEASURE_FINISH(time1); + time_add_transaction += time1; + + ++num_calls; + + return height(); +} + +void BlockchainDB::pop_block(block& blk, std::vector<transaction>& txs) +{ + blk = get_top_block(); + + remove_block(); + + remove_transaction(get_transaction_hash(blk.miner_tx)); + for (const auto& h : blk.tx_hashes) + { + txs.push_back(get_tx(h)); + remove_transaction(h); + } +} + +void BlockchainDB::remove_transaction(const crypto::hash& tx_hash) +{ + transaction tx = get_tx(tx_hash); + + for (const txin_v& tx_input : tx.vin) + { + if (tx_input.type() == typeid(txin_to_key)) + { + remove_spent_key(boost::get<txin_to_key>(tx_input).k_image); + } + } + + // need tx as tx.vout has the tx outputs, and the output amounts are needed + remove_transaction_data(tx_hash, tx); +} + +void BlockchainDB::reset_stats() +{ + num_calls = 0; + time_blk_hash = 0; + time_tx_exists = 0; + time_add_block1 = 0; + time_add_transaction = 0; + time_commit1 = 0; +} + +void BlockchainDB::show_stats() +{ + LOG_PRINT_L1(ENDL + << "*********************************" + << ENDL + << "num_calls: " << num_calls + << ENDL + << "time_blk_hash: " << time_blk_hash << "ms" + << ENDL + << "time_tx_exists: " << time_tx_exists << "ms" + << ENDL + << "time_add_block1: " << time_add_block1 << "ms" + << ENDL + << "time_add_transaction: " << time_add_transaction << "ms" + << ENDL + << "time_commit1: " << time_commit1 << "ms" + << ENDL + << "*********************************" + << ENDL + ); +} + +} // namespace cryptonote diff --git a/src/blockchain_db/blockchain_db.h b/src/blockchain_db/blockchain_db.h new file mode 100644 index 000000000..7b6b55a40 --- /dev/null +++ b/src/blockchain_db/blockchain_db.h @@ -0,0 +1,490 @@ +// Copyright (c) 2014, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#ifndef BLOCKCHAIN_DB_H +#define BLOCKCHAIN_DB_H + +#include <list> +#include <string> +#include <exception> +#include "crypto/hash.h" +#include "cryptonote_core/cryptonote_basic.h" +#include "cryptonote_core/difficulty.h" + +/* DB Driver Interface + * + * The DB interface is a store for the canonical block chain. + * It serves as a persistent storage for the blockchain. + * + * For the sake of efficiency, the reference implementation will also + * store some blockchain data outside of the blocks, such as spent + * transfer key images, unspent transaction outputs, etc. + * + * Transactions are duplicated so that we don't have to fetch a whole block + * in order to fetch a transaction from that block. If this is deemed + * unnecessary later, this can change. + * + * Spent key images are duplicated outside of the blocks so it is quick + * to verify an output hasn't already been spent + * + * Unspent transaction outputs are duplicated to quickly gather random + * outputs to use for mixins + * + * IMPORTANT: + * A concrete implementation of this interface should populate these + * duplicated members! It is possible to have a partial implementation + * of this interface call to private members of the interface to be added + * later that will then populate as needed. + * + * General: + * open() + * close() + * sync() + * reset() + * + * Lock and unlock provided for reorg externally, and for block + * additions internally, this way threaded reads are completely fine + * unless the blockchain is changing. + * bool lock() + * unlock() + * + * vector<str> get_filenames() + * + * Blocks: + * bool block_exists(hash) + * height add_block(block, block_size, cumulative_difficulty, coins_generated, transactions) + * block get_block(hash) + * height get_block_height(hash) + * header get_block_header(hash) + * block get_block_from_height(height) + * size_t get_block_size(height) + * difficulty get_block_cumulative_difficulty(height) + * uint64_t get_block_already_generated_coins(height) + * uint64_t get_block_timestamp(height) + * uint64_t get_top_block_timestamp() + * hash get_block_hash_from_height(height) + * blocks get_blocks_range(height1, height2) + * hashes get_hashes_range(height1, height2) + * hash top_block_hash() + * block get_top_block() + * height height() + * void pop_block(block&, tx_list&) + * + * Transactions: + * bool tx_exists(hash) + * uint64_t get_tx_unlock_time(hash) + * tx get_tx(hash) + * uint64_t get_tx_count() + * tx_list get_tx_list(hash_list) + * height get_tx_block_height(hash) + * + * Outputs: + * index get_random_output(amount) + * uint64_t get_num_outputs(amount) + * pub_key get_output_key(amount, index) + * tx_out get_output(tx_hash, index) + * hash,index get_output_tx_and_index_from_global(index) + * hash,index get_output_tx_and_index(amount, index) + * vec<uint64> get_tx_output_indices(tx_hash) + * + * + * Spent Output Key Images: + * bool has_key_image(key_image) + * + * Exceptions: + * DB_ERROR -- generic + * DB_OPEN_FAILURE + * DB_CREATE_FAILURE + * DB_SYNC_FAILURE + * BLOCK_DNE + * BLOCK_PARENT_DNE + * BLOCK_EXISTS + * BLOCK_INVALID -- considering making this multiple errors + * TX_DNE + * TX_EXISTS + * OUTPUT_DNE + * OUTPUT_EXISTS + * KEY_IMAGE_EXISTS + */ + +namespace cryptonote +{ + +// typedef for convenience +typedef std::pair<crypto::hash, uint64_t> tx_out_index; + +/*********************************** + * Exception Definitions + ***********************************/ +class DB_EXCEPTION : public std::exception +{ + private: + std::string m; + + protected: + DB_EXCEPTION(const char *s) : m(s) { } + + public: + virtual ~DB_EXCEPTION() { } + + const char* what() const throw() + { + return m.c_str(); + } +}; + +class DB_ERROR : public DB_EXCEPTION +{ + public: + DB_ERROR() : DB_EXCEPTION("Generic DB Error") { } + DB_ERROR(const char* s) : DB_EXCEPTION(s) { } +}; + +class DB_OPEN_FAILURE : public DB_EXCEPTION +{ + public: + DB_OPEN_FAILURE() : DB_EXCEPTION("Failed to open the db") { } + DB_OPEN_FAILURE(const char* s) : DB_EXCEPTION(s) { } +}; + +class DB_CREATE_FAILURE : public DB_EXCEPTION +{ + public: + DB_CREATE_FAILURE() : DB_EXCEPTION("Failed to create the db") { } + DB_CREATE_FAILURE(const char* s) : DB_EXCEPTION(s) { } +}; + +class DB_SYNC_FAILURE : public DB_EXCEPTION +{ + public: + DB_SYNC_FAILURE() : DB_EXCEPTION("Failed to sync the db") { } + DB_SYNC_FAILURE(const char* s) : DB_EXCEPTION(s) { } +}; + +class BLOCK_DNE : public DB_EXCEPTION +{ + public: + BLOCK_DNE() : DB_EXCEPTION("The block requested does not exist") { } + BLOCK_DNE(const char* s) : DB_EXCEPTION(s) { } +}; + +class BLOCK_PARENT_DNE : public DB_EXCEPTION +{ + public: + BLOCK_PARENT_DNE() : DB_EXCEPTION("The parent of the block does not exist") { } + BLOCK_PARENT_DNE(const char* s) : DB_EXCEPTION(s) { } +}; + +class BLOCK_EXISTS : public DB_EXCEPTION +{ + public: + BLOCK_EXISTS() : DB_EXCEPTION("The block to be added already exists!") { } + BLOCK_EXISTS(const char* s) : DB_EXCEPTION(s) { } +}; + +class BLOCK_INVALID : public DB_EXCEPTION +{ + public: + BLOCK_INVALID() : DB_EXCEPTION("The block to be added did not pass validation!") { } + BLOCK_INVALID(const char* s) : DB_EXCEPTION(s) { } +}; + +class TX_DNE : public DB_EXCEPTION +{ + public: + TX_DNE() : DB_EXCEPTION("The transaction requested does not exist") { } + TX_DNE(const char* s) : DB_EXCEPTION(s) { } +}; + +class TX_EXISTS : public DB_EXCEPTION +{ + public: + TX_EXISTS() : DB_EXCEPTION("The transaction to be added already exists!") { } + TX_EXISTS(const char* s) : DB_EXCEPTION(s) { } +}; + +class OUTPUT_DNE : public DB_EXCEPTION +{ + public: + OUTPUT_DNE() : DB_EXCEPTION("The output requested does not exist!") { } + OUTPUT_DNE(const char* s) : DB_EXCEPTION(s) { } +}; + +class OUTPUT_EXISTS : public DB_EXCEPTION +{ + public: + OUTPUT_EXISTS() : DB_EXCEPTION("The output to be added already exists!") { } + OUTPUT_EXISTS(const char* s) : DB_EXCEPTION(s) { } +}; + +class KEY_IMAGE_EXISTS : public DB_EXCEPTION +{ + public: + KEY_IMAGE_EXISTS() : DB_EXCEPTION("The spent key image to be added already exists!") { } + KEY_IMAGE_EXISTS(const char* s) : DB_EXCEPTION(s) { } +}; + +/*********************************** + * End of Exception Definitions + ***********************************/ + + +class BlockchainDB +{ +private: + /********************************************************************* + * private virtual members + *********************************************************************/ + + // tells the subclass to add the block and metadata to storage + virtual void add_block( const block& blk + , const size_t& block_size + , const difficulty_type& cumulative_difficulty + , const uint64_t& coins_generated + , const crypto::hash& blk_hash + ) = 0; + + // tells the subclass to remove data about the top block + virtual void remove_block() = 0; + + // tells the subclass to store the transaction and its metadata + virtual void add_transaction_data(const crypto::hash& blk_hash, const transaction& tx, const crypto::hash& tx_hash) = 0; + + // tells the subclass to remove data about a transaction + virtual void remove_transaction_data(const crypto::hash& tx_hash, const transaction& tx) = 0; + + // tells the subclass to store an output + virtual void add_output(const crypto::hash& tx_hash, const tx_out& tx_output, const uint64_t& local_index) = 0; + + // tells the subclass to remove an output + virtual void remove_output(const tx_out& tx_output) = 0; + + // tells the subclass to store a spent key + virtual void add_spent_key(const crypto::key_image& k_image) = 0; + + // tells the subclass to remove a spent key + virtual void remove_spent_key(const crypto::key_image& k_image) = 0; + + + /********************************************************************* + * private concrete members + *********************************************************************/ + // private version of pop_block, for undoing if an add_block goes tits up + void pop_block(); + + // helper function for add_transactions, to add each individual tx + void add_transaction(const crypto::hash& blk_hash, const transaction& tx, const crypto::hash* tx_hash_ptr = NULL); + + // helper function to remove transaction from blockchain + void remove_transaction(const crypto::hash& tx_hash); + + uint64_t num_calls = 0; + uint64_t time_blk_hash = 0; + uint64_t time_add_block1 = 0; + uint64_t time_add_transaction = 0; + + +protected: + + mutable uint64_t time_tx_exists = 0; + uint64_t time_commit1 = 0; + + +public: + + // virtual dtor + virtual ~BlockchainDB() { }; + + // reset profiling stats + void reset_stats(); + + // show profiling stats + void show_stats(); + + // open the db at location <filename>, or create it if there isn't one. + virtual void open(const std::string& filename, const int db_flags = 0) = 0; + + // make sure implementation has a create function as well + virtual void create(const std::string& filename) = 0; + + // close and sync the db + virtual void close() = 0; + + // sync the db + virtual void sync() = 0; + + // reset the db -- USE WITH CARE + virtual void reset() = 0; + + // get all files used by this db (if any) + virtual std::vector<std::string> get_filenames() const = 0; + + // return the name of the folder the db's file(s) should reside in + virtual std::string get_db_name() const = 0; + + + // FIXME: these are just for functionality mocking, need to implement + // RAII-friendly and multi-read one-write friendly locking mechanism + // + // acquire db lock + virtual bool lock() = 0; + + // release db lock + virtual void unlock() = 0; + + virtual void batch_start() = 0; + virtual void batch_stop() = 0; + virtual void set_batch_transactions(bool) = 0; + + // adds a block with the given metadata to the top of the blockchain, returns the new height + // NOTE: subclass implementations of this (or the functions it calls) need + // to handle undoing any partially-added blocks in the event of a failure. + virtual uint64_t add_block( const block& blk + , const size_t& block_size + , const difficulty_type& cumulative_difficulty + , const uint64_t& coins_generated + , const std::vector<transaction>& txs + ); + + // return true if a block with hash <h> exists in the blockchain + virtual bool block_exists(const crypto::hash& h) const = 0; + + // return block with hash <h> + virtual block get_block(const crypto::hash& h) const = 0; + + // return the height of the block with hash <h> on the blockchain, + // throw if it doesn't exist + virtual uint64_t get_block_height(const crypto::hash& h) const = 0; + + // return header for block with hash <h> + virtual block_header get_block_header(const crypto::hash& h) const = 0; + + // return block at height <height> + virtual block get_block_from_height(const uint64_t& height) const = 0; + + // return timestamp of block at height <height> + virtual uint64_t get_block_timestamp(const uint64_t& height) const = 0; + + // return timestamp of most recent block + virtual uint64_t get_top_block_timestamp() const = 0; + + // return block size of block at height <height> + virtual size_t get_block_size(const uint64_t& height) const = 0; + + // return cumulative difficulty up to and including block at height <height> + virtual difficulty_type get_block_cumulative_difficulty(const uint64_t& height) const = 0; + + // return difficulty of block at height <height> + virtual difficulty_type get_block_difficulty(const uint64_t& height) const = 0; + + // return number of coins generated up to and including block at height <height> + virtual uint64_t get_block_already_generated_coins(const uint64_t& height) const = 0; + + // return hash of block at height <height> + virtual crypto::hash get_block_hash_from_height(const uint64_t& height) const = 0; + + // return vector of blocks in range <h1,h2> of height (inclusively) + virtual std::vector<block> get_blocks_range(const uint64_t& h1, const uint64_t& h2) const = 0; + + // return vector of block hashes in range <h1, h2> of height (inclusively) + virtual std::vector<crypto::hash> get_hashes_range(const uint64_t& h1, const uint64_t& h2) const = 0; + + // return the hash of the top block on the chain + virtual crypto::hash top_block_hash() const = 0; + + // return the block at the top of the blockchain + virtual block get_top_block() const = 0; + + // return the height of the chain + virtual uint64_t height() const = 0; + + // pops the top block off the blockchain. + // Returns by reference the popped block and its associated transactions + // + // IMPORTANT: + // When a block is popped, the transactions associated with it need to be + // removed, as well as outputs and spent key images associated with + // those transactions. + virtual void pop_block(block& blk, std::vector<transaction>& txs); + + + // return true if a transaction with hash <h> exists + virtual bool tx_exists(const crypto::hash& h) const = 0; + + // return unlock time of tx with hash <h> + virtual uint64_t get_tx_unlock_time(const crypto::hash& h) const = 0; + + // return tx with hash <h> + // throw if no such tx exists + virtual transaction get_tx(const crypto::hash& h) const = 0; + + // returns the total number of transactions in all blocks + virtual uint64_t get_tx_count() const = 0; + + // return list of tx with hashes <hlist>. + // TODO: decide if a missing hash means return empty list + // or just skip that hash + virtual std::vector<transaction> get_tx_list(const std::vector<crypto::hash>& hlist) const = 0; + + // returns height of block that contains transaction with hash <h> + virtual uint64_t get_tx_block_height(const crypto::hash& h) const = 0; + + // return global output index of a random output of amount <amount> + virtual uint64_t get_random_output(const uint64_t& amount) const = 0; + + // returns the total number of outputs of amount <amount> + virtual uint64_t get_num_outputs(const uint64_t& amount) const = 0; + + // return public key for output with global output amount <amount> and index <index> + virtual crypto::public_key get_output_key(const uint64_t& amount, const uint64_t& index) const = 0; + + // returns the output indexed by <index> in the transaction with hash <h> + virtual tx_out get_output(const crypto::hash& h, const uint64_t& index) const = 0; + + // returns the tx hash associated with an output, referenced by global output index + virtual tx_out_index get_output_tx_and_index_from_global(const uint64_t& index) const = 0; + + // returns the transaction-local reference for the output with <amount> at <index> + // return type is pair of tx hash and index + virtual tx_out_index get_output_tx_and_index(const uint64_t& amount, const uint64_t& index) const = 0; + + // return a vector of indices corresponding to the global output index for + // each output in the transaction with hash <h> + virtual std::vector<uint64_t> get_tx_output_indices(const crypto::hash& h) const = 0; + // return a vector of indices corresponding to the amount output index for + // each output in the transaction with hash <h> + virtual std::vector<uint64_t> get_tx_amount_output_indices(const crypto::hash& h) const = 0; + + // returns true if key image <img> is present in spent key images storage + virtual bool has_key_image(const crypto::key_image& img) const = 0; + +}; // class BlockchainDB + + +} // namespace cryptonote + +#endif // BLOCKCHAIN_DB_H diff --git a/src/blockchain_db/lmdb/db_lmdb.cpp b/src/blockchain_db/lmdb/db_lmdb.cpp new file mode 100644 index 000000000..b1da6308f --- /dev/null +++ b/src/blockchain_db/lmdb/db_lmdb.cpp @@ -0,0 +1,1821 @@ +// Copyright (c) 2014, The Monero Project +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "db_lmdb.h" + +#include <boost/filesystem.hpp> +#include <memory> // std::unique_ptr +#include <cstring> // memcpy + +#include "cryptonote_core/cryptonote_format_utils.h" +#include "crypto/crypto.h" +#include "profile_tools.h" + +using epee::string_tools::pod_to_hex; + +namespace +{ + +template <typename T> +inline void throw0(const T &e) +{ + LOG_PRINT_L0(e.what()); + throw e; +} + +template <typename T> +inline void throw1(const T &e) +{ + LOG_PRINT_L1(e.what()); + throw e; +} + +// cursor needs to be closed when it goes out of scope, +// this helps if the function using it throws +struct lmdb_cur +{ + lmdb_cur(MDB_txn* txn, MDB_dbi dbi) + { + if (mdb_cursor_open(txn, dbi, &m_cur)) + throw0(cryptonote::DB_ERROR("Error opening db cursor")); + done = false; + } + + ~lmdb_cur() { close(); } + + operator MDB_cursor*() { return m_cur; } + operator MDB_cursor**() { return &m_cur; } + + void close() + { + if (!done) + { + mdb_cursor_close(m_cur); + done = true; + } + } + +private: + MDB_cursor* m_cur; + bool done; +}; + +template<typename T> +struct MDB_val_copy: public MDB_val +{ + MDB_val_copy(const T &t): t_copy(t) + { + mv_size = sizeof (T); + mv_data = &t_copy; + } +private: + T t_copy; +}; + +template<> +struct MDB_val_copy<cryptonote::blobdata>: public MDB_val +{ + MDB_val_copy(const cryptonote::blobdata &bd): data(new char[bd.size()]) + { + memcpy(data.get(), bd.data(), bd.size()); + mv_size = bd.size(); + mv_data = data.get(); + } +private: + std::unique_ptr<char[]> data; +}; + +auto compare_uint64 = [](const MDB_val *a, const MDB_val *b) { + const uint64_t va = *(const uint64_t*)a->mv_data; + const uint64_t vb = *(const uint64_t*)b->mv_data; + if (va < vb) return -1; + else if (va == vb) return 0; + else return 1; +}; + +const char* const LMDB_BLOCKS = "blocks"; +const char* const LMDB_BLOCK_TIMESTAMPS = "block_timestamps"; +const char* const LMDB_BLOCK_HEIGHTS = "block_heights"; +const char* const LMDB_BLOCK_HASHES = "block_hashes"; +const char* const LMDB_BLOCK_SIZES = "block_sizes"; +const char* const LMDB_BLOCK_DIFFS = "block_diffs"; +const char* const LMDB_BLOCK_COINS = "block_coins"; + +const char* const LMDB_TXS = "txs"; +const char* const LMDB_TX_UNLOCKS = "tx_unlocks"; +const char* const LMDB_TX_HEIGHTS = "tx_heights"; +const char* const LMDB_TX_OUTPUTS = "tx_outputs"; + +const char* const LMDB_OUTPUT_TXS = "output_txs"; +const char* const LMDB_OUTPUT_INDICES = "output_indices"; +const char* const LMDB_OUTPUT_AMOUNTS = "output_amounts"; +const char* const LMDB_OUTPUT_KEYS = "output_keys"; +const char* const LMDB_OUTPUTS = "outputs"; +const char* const LMDB_OUTPUT_GINDICES = "output_gindices"; +const char* const LMDB_SPENT_KEYS = "spent_keys"; + +inline void lmdb_db_open(MDB_txn* txn, const char* name, int flags, MDB_dbi& dbi, const std::string& error_string) +{ + if (mdb_dbi_open(txn, name, flags, &dbi)) + throw0(cryptonote::DB_OPEN_FAILURE(error_string.c_str())); +} + +} // anonymous namespace + +namespace cryptonote +{ + +// If m_batch_active is set, a batch transaction exists beyond this class, such +// as a batch import with verification enabled, or possibly (later) a batch +// network sync. +// +// For some of the lookup methods, such as get_block_timestamp(), tx_exists(), +// and get_tx(), when m_batch_active is set, the lookup uses the batch +// transaction. This isn't only because the transaction is available, but it's +// necessary so that lookups include the database updates only present in the +// current batch write. +// +// A regular network sync without batch writes is expected to open a new read +// transaction, as those lookups are part of the validation done prior to the +// write for block and tx data, so no write transaction is open at the time. + +void BlockchainLMDB::add_block( const block& blk + , const size_t& block_size + , const difficulty_type& cumulative_difficulty + , const uint64_t& coins_generated + , const crypto::hash& blk_hash + ) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + MDB_val_copy<crypto::hash> val_h(blk_hash); + MDB_val unused; + if (mdb_get(*m_write_txn, m_block_heights, &val_h, &unused) == 0) + throw1(BLOCK_EXISTS("Attempting to add block that's already in the db")); + + if (m_height > 0) + { + MDB_val_copy<crypto::hash> parent_key(blk.prev_id); + MDB_val parent_h; + if (mdb_get(*m_write_txn, m_block_heights, &parent_key, &parent_h)) + { + LOG_PRINT_L3("m_height: " << m_height); + LOG_PRINT_L3("parent_key: " << blk.prev_id); + throw0(DB_ERROR("Failed to get top block hash to check for new block's parent")); + } + uint64_t parent_height = *(const uint64_t *)parent_h.mv_data; + if (parent_height != m_height - 1) + throw0(BLOCK_PARENT_DNE("Top block is not new block's parent")); + } + + MDB_val_copy<uint64_t> key(m_height); + + MDB_val_copy<blobdata> blob(block_to_blob(blk)); + auto res = mdb_put(*m_write_txn, m_blocks, &key, &blob, 0); + if (res) + throw0(DB_ERROR(std::string("Failed to add block blob to db transaction: ").append(mdb_strerror(res)).c_str())); + + MDB_val_copy<size_t> sz(block_size); + if (mdb_put(*m_write_txn, m_block_sizes, &key, &sz, 0)) + throw0(DB_ERROR("Failed to add block size to db transaction")); + + MDB_val_copy<uint64_t> ts(blk.timestamp); + if (mdb_put(*m_write_txn, m_block_timestamps, &key, &ts, 0)) + throw0(DB_ERROR("Failed to add block timestamp to db transaction")); + + MDB_val_copy<difficulty_type> diff(cumulative_difficulty); + if (mdb_put(*m_write_txn, m_block_diffs, &key, &diff, 0)) + throw0(DB_ERROR("Failed to add block cumulative difficulty to db transaction")); + + MDB_val_copy<uint64_t> coinsgen(coins_generated); + if (mdb_put(*m_write_txn, m_block_coins, &key, &coinsgen, 0)) + throw0(DB_ERROR("Failed to add block total generated coins to db transaction")); + + if (mdb_put(*m_write_txn, m_block_heights, &val_h, &key, 0)) + throw0(DB_ERROR("Failed to add block height by hash to db transaction")); + + if (mdb_put(*m_write_txn, m_block_hashes, &key, &val_h, 0)) + throw0(DB_ERROR("Failed to add block hash to db transaction")); + +} + +void BlockchainLMDB::remove_block() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + if (m_height == 0) + throw0(BLOCK_DNE ("Attempting to remove block from an empty blockchain")); + + MDB_val_copy<uint64_t> k(m_height - 1); + MDB_val h; + if (mdb_get(*m_write_txn, m_block_hashes, &k, &h)) + throw1(BLOCK_DNE("Attempting to remove block that's not in the db")); + + if (mdb_del(*m_write_txn, m_blocks, &k, NULL)) + throw1(DB_ERROR("Failed to add removal of block to db transaction")); + + if (mdb_del(*m_write_txn, m_block_sizes, &k, NULL)) + throw1(DB_ERROR("Failed to add removal of block size to db transaction")); + + if (mdb_del(*m_write_txn, m_block_diffs, &k, NULL)) + throw1(DB_ERROR("Failed to add removal of block cumulative difficulty to db transaction")); + + if (mdb_del(*m_write_txn, m_block_coins, &k, NULL)) + throw1(DB_ERROR("Failed to add removal of block total generated coins to db transaction")); + + if (mdb_del(*m_write_txn, m_block_timestamps, &k, NULL)) + throw1(DB_ERROR("Failed to add removal of block timestamp to db transaction")); + + if (mdb_del(*m_write_txn, m_block_heights, &h, NULL)) + throw1(DB_ERROR("Failed to add removal of block height by hash to db transaction")); + + if (mdb_del(*m_write_txn, m_block_hashes, &k, NULL)) + throw1(DB_ERROR("Failed to add removal of block hash to db transaction")); +} + +void BlockchainLMDB::add_transaction_data(const crypto::hash& blk_hash, const transaction& tx, const crypto::hash& tx_hash) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + MDB_val_copy<crypto::hash> val_h(tx_hash); + MDB_val unused; + if (mdb_get(*m_write_txn, m_txs, &val_h, &unused) == 0) + throw1(TX_EXISTS("Attempting to add transaction that's already in the db")); + + MDB_val_copy<blobdata> blob(tx_to_blob(tx)); + if (mdb_put(*m_write_txn, m_txs, &val_h, &blob, 0)) + throw0(DB_ERROR("Failed to add tx blob to db transaction")); + + MDB_val_copy<uint64_t> height(m_height); + if (mdb_put(*m_write_txn, m_tx_heights, &val_h, &height, 0)) + throw0(DB_ERROR("Failed to add tx block height to db transaction")); + + MDB_val_copy<uint64_t> unlock_time(tx.unlock_time); + if (mdb_put(*m_write_txn, m_tx_unlocks, &val_h, &unlock_time, 0)) + throw0(DB_ERROR("Failed to add tx unlock time to db transaction")); +} + +void BlockchainLMDB::remove_transaction_data(const crypto::hash& tx_hash, const transaction& tx) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + MDB_val_copy<crypto::hash> val_h(tx_hash); + MDB_val unused; + if (mdb_get(*m_write_txn, m_txs, &val_h, &unused)) + throw1(TX_DNE("Attempting to remove transaction that isn't in the db")); + + if (mdb_del(*m_write_txn, m_txs, &val_h, NULL)) + throw1(DB_ERROR("Failed to add removal of tx to db transaction")); + if (mdb_del(*m_write_txn, m_tx_unlocks, &val_h, NULL)) + throw1(DB_ERROR("Failed to add removal of tx unlock time to db transaction")); + if (mdb_del(*m_write_txn, m_tx_heights, &val_h, NULL)) + throw1(DB_ERROR("Failed to add removal of tx block height to db transaction")); + + remove_tx_outputs(tx_hash, tx); + + if (mdb_del(*m_write_txn, m_tx_outputs, &val_h, NULL)) + throw1(DB_ERROR("Failed to add removal of tx outputs to db transaction")); + +} + +void BlockchainLMDB::add_output(const crypto::hash& tx_hash, const tx_out& tx_output, const uint64_t& local_index) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + MDB_val_copy<uint64_t> k(m_num_outputs); + MDB_val_copy<crypto::hash> v(tx_hash); + + if (mdb_put(*m_write_txn, m_output_txs, &k, &v, 0)) + throw0(DB_ERROR("Failed to add output tx hash to db transaction")); + if (mdb_put(*m_write_txn, m_tx_outputs, &v, &k, 0)) + throw0(DB_ERROR("Failed to add tx output index to db transaction")); + + MDB_val_copy<uint64_t> val_local_index(local_index); + if (mdb_put(*m_write_txn, m_output_indices, &k, &val_local_index, 0)) + throw0(DB_ERROR("Failed to add tx output index to db transaction")); + + MDB_val_copy<uint64_t> val_amount(tx_output.amount); + if (auto result = mdb_put(*m_write_txn, m_output_amounts, &val_amount, &k, 0)) + throw0(DB_ERROR(std::string("Failed to add output amount to db transaction").append(mdb_strerror(result)).c_str())); + + if (tx_output.target.type() == typeid(txout_to_key)) + { + MDB_val_copy<crypto::public_key> val_pubkey(boost::get<txout_to_key>(tx_output.target).key); + if (mdb_put(*m_write_txn, m_output_keys, &k, &val_pubkey, 0)) + throw0(DB_ERROR("Failed to add output pubkey to db transaction")); + } + + +/****** Uncomment if ever outputs actually need to be stored in this manner + * + blobdata b = output_to_blob(tx_output); + + v.mv_size = b.size(); + v.mv_data = &b; + if (mdb_put(*m_write_txn, m_outputs, &k, &v, 0)) + throw0(DB_ERROR("Failed to add output to db transaction")); + if (mdb_put(*m_write_txn, m_output_gindices, &v, &k, 0)) + throw0(DB_ERROR("Failed to add output global index to db transaction")); +************************************************************************/ + + m_num_outputs++; +} + +void BlockchainLMDB::remove_tx_outputs(const crypto::hash& tx_hash, const transaction& tx) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + + lmdb_cur cur(*m_write_txn, m_tx_outputs); + + MDB_val_copy<crypto::hash> k(tx_hash); + MDB_val v; + + auto result = mdb_cursor_get(cur, &k, &v, MDB_SET); + if (result == MDB_NOTFOUND) + { + LOG_ERROR("Attempting to remove a tx's outputs, but none found. Continuing, but...be wary, because that's weird."); + } + else if (result) + { + throw0(DB_ERROR("DB error attempting to get an output")); + } + else + { + size_t num_elems = 0; + mdb_cursor_count(cur, &num_elems); + + mdb_cursor_get(cur, &k, &v, MDB_FIRST_DUP); + + for (uint64_t i = 0; i < num_elems; ++i) + { + const tx_out tx_output = tx.vout[i]; + remove_output(*(const uint64_t*)v.mv_data, tx_output.amount); + if (i < num_elems - 1) + { + mdb_cursor_get(cur, &k, &v, MDB_NEXT_DUP); + } + } + } + + cur.close(); +} + +// TODO: probably remove this function +void BlockchainLMDB::remove_output(const tx_out& tx_output) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__ << " (unused version - does nothing)"); + return; +} + +void BlockchainLMDB::remove_output(const uint64_t& out_index, const uint64_t amount) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + MDB_val_copy<uint64_t> k(out_index); + +/****** Uncomment if ever outputs actually need to be stored in this manner + blobdata b; + t_serializable_object_to_blob(tx_output, b); + k.mv_size = b.size(); + k.mv_data = &b; + + if (mdb_get(*m_write_txn, m_output_gindices, &k, &v)) + throw1(OUTPUT_DNE("Attempting to remove output that does not exist")); + + uint64_t gindex = *(uint64_t*)v.mv_data; + + auto result = mdb_del(*m_write_txn, m_output_gindices, &k, NULL); + if (result != 0 && result != MDB_NOTFOUND) + throw1(DB_ERROR("Error adding removal of output global index to db transaction")); + + result = mdb_del(*m_write_txn, m_outputs, &v, NULL); + if (result != 0 && result != MDB_NOTFOUND) + throw1(DB_ERROR("Error adding removal of output to db transaction")); +*********************************************************************/ + + auto result = mdb_del(*m_write_txn, m_output_indices, &k, NULL); + if (result == MDB_NOTFOUND) + { + LOG_PRINT_L0("Unexpected: global output index not found in m_output_indices"); + } + else if (result) + { + throw1(DB_ERROR("Error adding removal of output tx index to db transaction")); + } + + result = mdb_del(*m_write_txn, m_output_txs, &k, NULL); + // if (result != 0 && result != MDB_NOTFOUND) + // throw1(DB_ERROR("Error adding removal of output tx hash to db transaction")); + if (result == MDB_NOTFOUND) + { + LOG_PRINT_L0("Unexpected: global output index not found in m_output_txs"); + } + else if (result) + { + throw1(DB_ERROR("Error adding removal of output tx hash to db transaction")); + } + + result = mdb_del(*m_write_txn, m_output_keys, &k, NULL); + if (result == MDB_NOTFOUND) + { + LOG_PRINT_L0("Unexpected: global output index not found in m_output_keys"); + } + else if (result) + throw1(DB_ERROR("Error adding removal of output pubkey to db transaction")); + + remove_amount_output_index(amount, out_index); + + m_num_outputs--; +} + +void BlockchainLMDB::remove_amount_output_index(const uint64_t amount, const uint64_t global_output_index) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + lmdb_cur cur(*m_write_txn, m_output_amounts); + + MDB_val_copy<uint64_t> k(amount); + MDB_val v; + + auto result = mdb_cursor_get(cur, &k, &v, MDB_SET); + if (result == MDB_NOTFOUND) + throw1(OUTPUT_DNE("Attempting to get an output index by amount and amount index, but amount not found")); + else if (result) + throw0(DB_ERROR("DB error attempting to get an output")); + + size_t num_elems = 0; + mdb_cursor_count(cur, &num_elems); + + mdb_cursor_get(cur, &k, &v, MDB_FIRST_DUP); + + uint64_t amount_output_index = 0; + uint64_t goi = 0; + bool found_index = false; + for (uint64_t i = 0; i < num_elems; ++i) + { + mdb_cursor_get(cur, &k, &v, MDB_GET_CURRENT); + goi = *(const uint64_t *)v.mv_data; + if (goi == global_output_index) + { + amount_output_index = i; + found_index = true; + break; + } + mdb_cursor_get(cur, &k, &v, MDB_NEXT_DUP); + } + if (found_index) + { + // found the amount output index + // now delete it + result = mdb_cursor_del(cur, 0); + if (result) + throw0(DB_ERROR(std::string("Error deleting amount output index ").append(boost::lexical_cast<std::string>(amount_output_index)).c_str())); + } + else + { + // not found + cur.close(); + throw1(OUTPUT_DNE("Failed to find amount output index")); + } + cur.close(); +} + +void BlockchainLMDB::add_spent_key(const crypto::key_image& k_image) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + MDB_val_copy<crypto::key_image> val_key(k_image); + MDB_val unused; + if (mdb_get(*m_write_txn, m_spent_keys, &val_key, &unused) == 0) + throw1(KEY_IMAGE_EXISTS("Attempting to add spent key image that's already in the db")); + + char anything = '\0'; + unused.mv_size = sizeof(char); + unused.mv_data = &anything; + if (auto result = mdb_put(*m_write_txn, m_spent_keys, &val_key, &unused, 0)) + throw1(DB_ERROR(std::string("Error adding spent key image to db transaction: ").append(mdb_strerror(result)).c_str())); +} + +void BlockchainLMDB::remove_spent_key(const crypto::key_image& k_image) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + MDB_val_copy<crypto::key_image> k(k_image); + auto result = mdb_del(*m_write_txn, m_spent_keys, &k, NULL); + if (result != 0 && result != MDB_NOTFOUND) + throw1(DB_ERROR("Error adding removal of key image to db transaction")); +} + +blobdata BlockchainLMDB::output_to_blob(const tx_out& output) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + blobdata b; + if (!t_serializable_object_to_blob(output, b)) + throw1(DB_ERROR("Error serializing output to blob")); + return b; +} + +tx_out BlockchainLMDB::output_from_blob(const blobdata& blob) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + std::stringstream ss; + ss << blob; + binary_archive<false> ba(ss); + tx_out o; + + if (!(::serialization::serialize(ba, o))) + throw1(DB_ERROR("Error deserializing tx output blob")); + + return o; +} + +uint64_t BlockchainLMDB::get_output_global_index(const uint64_t& amount, const uint64_t& index) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + lmdb_cur cur(txn, m_output_amounts); + + MDB_val_copy<uint64_t> k(amount); + MDB_val v; + + auto result = mdb_cursor_get(cur, &k, &v, MDB_SET); + if (result == MDB_NOTFOUND) + throw1(OUTPUT_DNE("Attempting to get an output index by amount and amount index, but amount not found")); + else if (result) + throw0(DB_ERROR("DB error attempting to get an output")); + + size_t num_elems = 0; + mdb_cursor_count(cur, &num_elems); + if (num_elems <= index) + throw1(OUTPUT_DNE("Attempting to get an output index by amount and amount index, but output not found")); + + mdb_cursor_get(cur, &k, &v, MDB_FIRST_DUP); + + for (uint64_t i = 0; i < index; ++i) + { + mdb_cursor_get(cur, &k, &v, MDB_NEXT_DUP); + } + + mdb_cursor_get(cur, &k, &v, MDB_GET_CURRENT); + + uint64_t glob_index = *(const uint64_t*)v.mv_data; + + cur.close(); + + txn.commit(); + + return glob_index; +} + +void BlockchainLMDB::check_open() const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + if (!m_open) + throw0(DB_ERROR("DB operation attempted on a not-open DB instance")); +} + +BlockchainLMDB::~BlockchainLMDB() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + + // batch transaction shouldn't be active at this point. If it is, consider it aborted. + if (m_batch_active) + batch_abort(); +} + +BlockchainLMDB::BlockchainLMDB(bool batch_transactions) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + // initialize folder to something "safe" just in case + // someone accidentally misuses this class... + m_folder = "thishsouldnotexistbecauseitisgibberish"; + m_open = false; + + m_batch_transactions = batch_transactions; + m_write_txn = nullptr; + m_batch_active = false; + m_height = 0; +} + +void BlockchainLMDB::open(const std::string& filename, const int mdb_flags) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + + if (m_open) + throw0(DB_OPEN_FAILURE("Attempted to open db, but it's already open")); + + boost::filesystem::path direc(filename); + if (boost::filesystem::exists(direc)) + { + if (!boost::filesystem::is_directory(direc)) + throw0(DB_OPEN_FAILURE("LMDB needs a directory path, but a file was passed")); + } + else + { + if (!boost::filesystem::create_directory(direc)) + throw0(DB_OPEN_FAILURE(std::string("Failed to create directory ").append(filename).c_str())); + } + + // check for existing LMDB files in base directory + boost::filesystem::path old_files = direc.parent_path(); + if (boost::filesystem::exists(old_files / "data.mdb") || + boost::filesystem::exists(old_files / "lock.mdb")) + { + LOG_PRINT_L0("Found existing LMDB files in " << old_files.c_str()); + LOG_PRINT_L0("Move data.mdb and/or lock.mdb to " << filename << ", or delete them, and then restart"); + throw DB_ERROR("Database could not be opened"); + } + + m_folder = filename; + + // set up lmdb environment + if (mdb_env_create(&m_env)) + throw0(DB_ERROR("Failed to create lmdb environment")); + if (mdb_env_set_maxdbs(m_env, 20)) + throw0(DB_ERROR("Failed to set max number of dbs")); + + size_t mapsize = 1LL << 34; + if (auto result = mdb_env_set_mapsize(m_env, mapsize)) + throw0(DB_ERROR(std::string("Failed to set max memory map size: ").append(mdb_strerror(result)).c_str())); + if (auto result = mdb_env_open(m_env, filename.c_str(), mdb_flags, 0644)) + throw0(DB_ERROR(std::string("Failed to open lmdb environment: ").append(mdb_strerror(result)).c_str())); + + // get a read/write MDB_txn + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, 0, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + // open necessary databases, and set properties as needed + // uses macros to avoid having to change things too many places + lmdb_db_open(txn, LMDB_BLOCKS, MDB_INTEGERKEY | MDB_CREATE, m_blocks, "Failed to open db handle for m_blocks"); + + lmdb_db_open(txn, LMDB_BLOCK_TIMESTAMPS, MDB_INTEGERKEY | MDB_CREATE, m_block_timestamps, "Failed to open db handle for m_block_timestamps"); + lmdb_db_open(txn, LMDB_BLOCK_HEIGHTS, MDB_CREATE, m_block_heights, "Failed to open db handle for m_block_heights"); + lmdb_db_open(txn, LMDB_BLOCK_HASHES, MDB_INTEGERKEY | MDB_CREATE, m_block_hashes, "Failed to open db handle for m_block_hashes"); + lmdb_db_open(txn, LMDB_BLOCK_SIZES, MDB_INTEGERKEY | MDB_CREATE, m_block_sizes, "Failed to open db handle for m_block_sizes"); + lmdb_db_open(txn, LMDB_BLOCK_DIFFS, MDB_INTEGERKEY | MDB_CREATE, m_block_diffs, "Failed to open db handle for m_block_diffs"); + lmdb_db_open(txn, LMDB_BLOCK_COINS, MDB_INTEGERKEY | MDB_CREATE, m_block_coins, "Failed to open db handle for m_block_coins"); + + lmdb_db_open(txn, LMDB_TXS, MDB_CREATE, m_txs, "Failed to open db handle for m_txs"); + lmdb_db_open(txn, LMDB_TX_UNLOCKS, MDB_CREATE, m_tx_unlocks, "Failed to open db handle for m_tx_unlocks"); + lmdb_db_open(txn, LMDB_TX_HEIGHTS, MDB_CREATE, m_tx_heights, "Failed to open db handle for m_tx_heights"); + lmdb_db_open(txn, LMDB_TX_OUTPUTS, MDB_DUPSORT | MDB_CREATE, m_tx_outputs, "Failed to open db handle for m_tx_outputs"); + + lmdb_db_open(txn, LMDB_OUTPUT_TXS, MDB_INTEGERKEY | MDB_CREATE, m_output_txs, "Failed to open db handle for m_output_txs"); + lmdb_db_open(txn, LMDB_OUTPUT_INDICES, MDB_INTEGERKEY | MDB_CREATE, m_output_indices, "Failed to open db handle for m_output_indices"); + lmdb_db_open(txn, LMDB_OUTPUT_AMOUNTS, MDB_INTEGERKEY | MDB_DUPSORT | MDB_CREATE, m_output_amounts, "Failed to open db handle for m_output_amounts"); + lmdb_db_open(txn, LMDB_OUTPUT_KEYS, MDB_INTEGERKEY | MDB_CREATE, m_output_keys, "Failed to open db handle for m_output_keys"); + +/*************** not used, but kept for posterity + lmdb_db_open(txn, LMDB_OUTPUTS, MDB_INTEGERKEY | MDB_CREATE, m_outputs, "Failed to open db handle for m_outputs"); + lmdb_db_open(txn, LMDB_OUTPUT_GINDICES, MDB_CREATE, m_output_gindices, "Failed to open db handle for m_output_gindices"); +*************************************************/ + + lmdb_db_open(txn, LMDB_SPENT_KEYS, MDB_CREATE, m_spent_keys, "Failed to open db handle for m_spent_keys"); + + mdb_set_dupsort(txn, m_output_amounts, compare_uint64); + mdb_set_dupsort(txn, m_tx_outputs, compare_uint64); + + // get and keep current height + MDB_stat db_stats; + if (mdb_stat(txn, m_blocks, &db_stats)) + throw0(DB_ERROR("Failed to query m_blocks")); + LOG_PRINT_L2("Setting m_height to: " << db_stats.ms_entries); + m_height = db_stats.ms_entries; + + // get and keep current number of outputs + if (mdb_stat(txn, m_output_indices, &db_stats)) + throw0(DB_ERROR("Failed to query m_output_indices")); + m_num_outputs = db_stats.ms_entries; + + // commit the transaction + txn.commit(); + + m_open = true; + // from here, init should be finished +} + +// unused for now, create will happen on open if doesn't exist +void BlockchainLMDB::create(const std::string& filename) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + throw DB_CREATE_FAILURE("create() is not implemented for this BlockchainDB, open() will create files if needed."); +} + +void BlockchainLMDB::close() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + if (m_batch_active) + { + LOG_PRINT_L3("close() first calling batch_abort() due to active batch transaction"); + batch_abort(); + } + this->sync(); + + // FIXME: not yet thread safe!!! Use with care. + mdb_env_close(m_env); +} + +void BlockchainLMDB::sync() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + // Does nothing unless LMDB environment was opened with MDB_NOSYNC or in part + // MDB_NOMETASYNC. Force flush to be synchronous. + if (auto result = mdb_env_sync(m_env, true)) + { + throw0(DB_ERROR(std::string("Failed to sync database").append(mdb_strerror(result)).c_str())); + } +} + +void BlockchainLMDB::reset() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + // TODO: this +} + +std::vector<std::string> BlockchainLMDB::get_filenames() const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + std::vector<std::string> filenames; + + boost::filesystem::path datafile(m_folder); + datafile /= "data.mdb"; + boost::filesystem::path lockfile(m_folder); + lockfile /= "lock.mdb"; + + filenames.push_back(datafile.string()); + filenames.push_back(lockfile.string()); + + return filenames; +} + +std::string BlockchainLMDB::get_db_name() const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + + return std::string("lmdb"); +} + +// TODO: this? +bool BlockchainLMDB::lock() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + return false; +} + +// TODO: this? +void BlockchainLMDB::unlock() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); +} + +bool BlockchainLMDB::block_exists(const crypto::hash& h) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + MDB_val_copy<crypto::hash> key(h); + MDB_val result; + auto get_result = mdb_get(txn, m_block_heights, &key, &result); + if (get_result == MDB_NOTFOUND) + { + txn.commit(); + LOG_PRINT_L3("Block with hash " << epee::string_tools::pod_to_hex(h) << " not found in db"); + return false; + } + else if (get_result) + throw0(DB_ERROR("DB error attempting to fetch block index from hash")); + + txn.commit(); + return true; +} + +block BlockchainLMDB::get_block(const crypto::hash& h) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + return get_block_from_height(get_block_height(h)); +} + +uint64_t BlockchainLMDB::get_block_height(const crypto::hash& h) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + MDB_val_copy<crypto::hash> key(h); + MDB_val result; + auto get_result = mdb_get(txn, m_block_heights, &key, &result); + if (get_result == MDB_NOTFOUND) + throw1(BLOCK_DNE("Attempted to retrieve non-existent block height")); + else if (get_result) + throw0(DB_ERROR("Error attempting to retrieve a block height from the db")); + + txn.commit(); + return *(const uint64_t*)result.mv_data; +} + +block_header BlockchainLMDB::get_block_header(const crypto::hash& h) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + // block_header object is automatically cast from block object + return get_block(h); +} + +block BlockchainLMDB::get_block_from_height(const uint64_t& height) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + MDB_val_copy<uint64_t> key(height); + MDB_val result; + auto get_result = mdb_get(txn, m_blocks, &key, &result); + if (get_result == MDB_NOTFOUND) + { + throw0(DB_ERROR(std::string("Attempt to get block from height ").append(boost::lexical_cast<std::string>(height)).append(" failed -- block not in db").c_str())); + } + else if (get_result) + throw0(DB_ERROR("Error attempting to retrieve a block from the db")); + + txn.commit(); + + blobdata bd; + bd.assign(reinterpret_cast<char*>(result.mv_data), result.mv_size); + + block b; + if (!parse_and_validate_block_from_blob(bd, b)) + throw0(DB_ERROR("Failed to parse block from blob retrieved from the db")); + + return b; +} + +uint64_t BlockchainLMDB::get_block_timestamp(const uint64_t& height) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + txn_safe* txn_ptr = &txn; + if (m_batch_active) + txn_ptr = m_write_txn; + else + { + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + } + MDB_val_copy<uint64_t> key(height); + MDB_val result; + auto get_result = mdb_get(*txn_ptr, m_block_timestamps, &key, &result); + if (get_result == MDB_NOTFOUND) + { + throw0(DB_ERROR(std::string("Attempt to get timestamp from height ").append(boost::lexical_cast<std::string>(height)).append(" failed -- timestamp not in db").c_str())); + } + else if (get_result) + throw0(DB_ERROR("Error attempting to retrieve a timestamp from the db")); + + if (! m_batch_active) + txn.commit(); + return *(const uint64_t*)result.mv_data; +} + +uint64_t BlockchainLMDB::get_top_block_timestamp() const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + // if no blocks, return 0 + if (m_height == 0) + { + return 0; + } + + return get_block_timestamp(m_height - 1); +} + +size_t BlockchainLMDB::get_block_size(const uint64_t& height) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + txn_safe* txn_ptr = &txn; + if (m_batch_active) + txn_ptr = m_write_txn; + else + { + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + } + + MDB_val_copy<uint64_t> key(height); + MDB_val result; + auto get_result = mdb_get(*txn_ptr, m_block_sizes, &key, &result); + if (get_result == MDB_NOTFOUND) + { + throw0(DB_ERROR(std::string("Attempt to get block size from height ").append(boost::lexical_cast<std::string>(height)).append(" failed -- block size not in db").c_str())); + } + else if (get_result) + throw0(DB_ERROR("Error attempting to retrieve a block size from the db")); + + if (! m_batch_active) + txn.commit(); + return *(const size_t*)result.mv_data; +} + +difficulty_type BlockchainLMDB::get_block_cumulative_difficulty(const uint64_t& height) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__ << " height: " << height); + check_open(); + + txn_safe txn; + txn_safe* txn_ptr = &txn; + if (m_batch_active) + txn_ptr = m_write_txn; + else + { + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + } + MDB_val_copy<uint64_t> key(height); + MDB_val result; + auto get_result = mdb_get(*txn_ptr, m_block_diffs, &key, &result); + if (get_result == MDB_NOTFOUND) + { + throw0(DB_ERROR(std::string("Attempt to get cumulative difficulty from height ").append(boost::lexical_cast<std::string>(height)).append(" failed -- difficulty not in db").c_str())); + } + else if (get_result) + throw0(DB_ERROR("Error attempting to retrieve a cumulative difficulty from the db")); + + if (! m_batch_active) + txn.commit(); + return *(difficulty_type*)result.mv_data; +} + +difficulty_type BlockchainLMDB::get_block_difficulty(const uint64_t& height) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + difficulty_type diff1 = 0; + difficulty_type diff2 = 0; + + diff1 = get_block_cumulative_difficulty(height); + if (height != 0) + { + diff2 = get_block_cumulative_difficulty(height - 1); + } + + return diff1 - diff2; +} + +uint64_t BlockchainLMDB::get_block_already_generated_coins(const uint64_t& height) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + txn_safe* txn_ptr = &txn; + if (m_batch_active) + txn_ptr = m_write_txn; + else + { + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + } + + MDB_val_copy<uint64_t> key(height); + MDB_val result; + auto get_result = mdb_get(*txn_ptr, m_block_coins, &key, &result); + if (get_result == MDB_NOTFOUND) + { + throw0(DB_ERROR(std::string("Attempt to get generated coins from height ").append(boost::lexical_cast<std::string>(height)).append(" failed -- block size not in db").c_str())); + } + else if (get_result) + throw0(DB_ERROR("Error attempting to retrieve a total generated coins from the db")); + + if (! m_batch_active) + txn.commit(); + return *(const uint64_t*)result.mv_data; +} + +crypto::hash BlockchainLMDB::get_block_hash_from_height(const uint64_t& height) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + txn_safe* txn_ptr = &txn; + if (m_batch_active) + txn_ptr = m_write_txn; + else + { + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + } + + MDB_val_copy<uint64_t> key(height); + MDB_val result; + auto get_result = mdb_get(*txn_ptr, m_block_hashes, &key, &result); + if (get_result == MDB_NOTFOUND) + { + throw0(BLOCK_DNE(std::string("Attempt to get hash from height ").append(boost::lexical_cast<std::string>(height)).append(" failed -- hash not in db").c_str())); + } + else if (get_result) + throw0(DB_ERROR(std::string("Error attempting to retrieve a block hash from the db: "). + append(mdb_strerror(get_result)).c_str())); + + if (! m_batch_active) + txn.commit(); + return *(crypto::hash*)result.mv_data; +} + +std::vector<block> BlockchainLMDB::get_blocks_range(const uint64_t& h1, const uint64_t& h2) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + std::vector<block> v; + + for (uint64_t height = h1; height <= h2; ++height) + { + v.push_back(get_block_from_height(height)); + } + + return v; +} + +std::vector<crypto::hash> BlockchainLMDB::get_hashes_range(const uint64_t& h1, const uint64_t& h2) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + std::vector<crypto::hash> v; + + for (uint64_t height = h1; height <= h2; ++height) + { + v.push_back(get_block_hash_from_height(height)); + } + + return v; +} + +crypto::hash BlockchainLMDB::top_block_hash() const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + if (m_height != 0) + { + return get_block_hash_from_height(m_height - 1); + } + + return null_hash; +} + +block BlockchainLMDB::get_top_block() const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + if (m_height != 0) + { + return get_block_from_height(m_height - 1); + } + + block b; + return b; +} + +uint64_t BlockchainLMDB::height() const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + return m_height; +} + +bool BlockchainLMDB::tx_exists(const crypto::hash& h) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + txn_safe* txn_ptr = &txn; + if (m_batch_active) + txn_ptr = m_write_txn; + else + { + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + } + + MDB_val_copy<crypto::hash> key(h); + MDB_val result; + + TIME_MEASURE_START(time1); + auto get_result = mdb_get(*txn_ptr, m_txs, &key, &result); + TIME_MEASURE_FINISH(time1); + time_tx_exists += time1; + if (get_result == MDB_NOTFOUND) + { + if (! m_batch_active) + txn.commit(); + LOG_PRINT_L1("transaction with hash " << epee::string_tools::pod_to_hex(h) << " not found in db"); + return false; + } + else if (get_result) + throw0(DB_ERROR("DB error attempting to fetch transaction from hash")); + + return true; +} + +uint64_t BlockchainLMDB::get_tx_unlock_time(const crypto::hash& h) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + MDB_val_copy<crypto::hash> key(h); + MDB_val result; + auto get_result = mdb_get(txn, m_tx_unlocks, &key, &result); + if (get_result == MDB_NOTFOUND) + throw1(TX_DNE(std::string("tx unlock time with hash ").append(epee::string_tools::pod_to_hex(h)).append(" not found in db").c_str())); + else if (get_result) + throw0(DB_ERROR("DB error attempting to fetch tx unlock time from hash")); + + return *(const uint64_t*)result.mv_data; +} + +transaction BlockchainLMDB::get_tx(const crypto::hash& h) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + txn_safe* txn_ptr = &txn; + if (m_batch_active) + txn_ptr = m_write_txn; + else + { + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + } + + MDB_val_copy<crypto::hash> key(h); + MDB_val result; + auto get_result = mdb_get(*txn_ptr, m_txs, &key, &result); + if (get_result == MDB_NOTFOUND) + throw1(TX_DNE(std::string("tx with hash ").append(epee::string_tools::pod_to_hex(h)).append(" not found in db").c_str())); + else if (get_result) + throw0(DB_ERROR("DB error attempting to fetch tx from hash")); + + blobdata bd; + bd.assign(reinterpret_cast<char*>(result.mv_data), result.mv_size); + + transaction tx; + if (!parse_and_validate_tx_from_blob(bd, tx)) + throw0(DB_ERROR("Failed to parse tx from blob retrieved from the db")); + if (! m_batch_active) + txn.commit(); + + return tx; +} + +uint64_t BlockchainLMDB::get_tx_count() const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + MDB_stat db_stats; + if (mdb_stat(txn, m_txs, &db_stats)) + throw0(DB_ERROR("Failed to query m_txs")); + + txn.commit(); + + return db_stats.ms_entries; +} + +std::vector<transaction> BlockchainLMDB::get_tx_list(const std::vector<crypto::hash>& hlist) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + std::vector<transaction> v; + + for (auto& h : hlist) + { + v.push_back(get_tx(h)); + } + + return v; +} + +uint64_t BlockchainLMDB::get_tx_block_height(const crypto::hash& h) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + txn_safe* txn_ptr = &txn; + if (m_batch_active) + txn_ptr = m_write_txn; + else + { + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + } + + MDB_val_copy<crypto::hash> key(h); + MDB_val result; + auto get_result = mdb_get(*txn_ptr, m_tx_heights, &key, &result); + if (get_result == MDB_NOTFOUND) + { + throw1(TX_DNE(std::string("tx height with hash ").append(epee::string_tools::pod_to_hex(h)).append(" not found in db").c_str())); + } + else if (get_result) + throw0(DB_ERROR("DB error attempting to fetch tx height from hash")); + + if (! m_batch_active) + txn.commit(); + + return *(const uint64_t*)result.mv_data; +} + +//FIXME: make sure the random method used here is appropriate +uint64_t BlockchainLMDB::get_random_output(const uint64_t& amount) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + uint64_t num_outputs = get_num_outputs(amount); + if (num_outputs == 0) + throw1(OUTPUT_DNE("Attempting to get a random output for an amount, but none exist")); + + return crypto::rand<uint64_t>() % num_outputs; +} + +uint64_t BlockchainLMDB::get_num_outputs(const uint64_t& amount) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + lmdb_cur cur(txn, m_output_amounts); + + MDB_val_copy<uint64_t> k(amount); + MDB_val v; + auto result = mdb_cursor_get(cur, &k, &v, MDB_SET); + if (result == MDB_NOTFOUND) + { + return 0; + } + else if (result) + throw0(DB_ERROR("DB error attempting to get number of outputs of an amount")); + + size_t num_elems = 0; + mdb_cursor_count(cur, &num_elems); + + txn.commit(); + + return num_elems; +} + +crypto::public_key BlockchainLMDB::get_output_key(const uint64_t& amount, const uint64_t& index) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + uint64_t glob_index = get_output_global_index(amount, index); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + MDB_val_copy<uint64_t> k(glob_index); + MDB_val v; + auto get_result = mdb_get(txn, m_output_keys, &k, &v); + if (get_result == MDB_NOTFOUND) + throw0(DB_ERROR("Attempting to get output pubkey by global index, but key does not exist")); + else if (get_result) + throw0(DB_ERROR("Error attempting to retrieve an output pubkey from the db")); + + return *(crypto::public_key*)v.mv_data; +} + +tx_out BlockchainLMDB::get_output(const crypto::hash& h, const uint64_t& index) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + lmdb_cur cur(txn, m_tx_outputs); + + MDB_val_copy<crypto::hash> k(h); + MDB_val v; + auto result = mdb_cursor_get(cur, &k, &v, MDB_SET); + if (result == MDB_NOTFOUND) + throw1(OUTPUT_DNE("Attempting to get an output by tx hash and tx index, but output not found")); + else if (result) + throw0(DB_ERROR("DB error attempting to get an output")); + + size_t num_elems = 0; + mdb_cursor_count(cur, &num_elems); + if (num_elems <= index) + throw1(OUTPUT_DNE("Attempting to get an output by tx hash and tx index, but output not found")); + + mdb_cursor_get(cur, &k, &v, MDB_FIRST_DUP); + + for (uint64_t i = 0; i < index; ++i) + { + mdb_cursor_get(cur, &k, &v, MDB_NEXT_DUP); + } + + mdb_cursor_get(cur, &k, &v, MDB_GET_CURRENT); + + blobdata b; + b = *(blobdata*)v.mv_data; + + cur.close(); + txn.commit(); + + return output_from_blob(b); +} + +// As this is not used, its return is now a blank output. +// This will save on space in the db. +tx_out BlockchainLMDB::get_output(const uint64_t& index) const +{ + return tx_out(); + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + MDB_val_copy<uint64_t> k(index); + MDB_val v; + auto get_result = mdb_get(txn, m_outputs, &k, &v); + if (get_result == MDB_NOTFOUND) + { + throw OUTPUT_DNE("Attempting to get output by global index, but output does not exist"); + } + else if (get_result) + throw0(DB_ERROR("Error attempting to retrieve an output from the db")); + + blobdata b = *(blobdata*)v.mv_data; + + return output_from_blob(b); +} + +tx_out_index BlockchainLMDB::get_output_tx_and_index_from_global(const uint64_t& index) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + txn_safe* txn_ptr = &txn; + if (m_batch_active) + txn_ptr = m_write_txn; + else + { + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + } + MDB_val_copy<uint64_t> k(index); + MDB_val v; + + auto get_result = mdb_get(*txn_ptr, m_output_txs, &k, &v); + if (get_result == MDB_NOTFOUND) + throw1(OUTPUT_DNE("output with given index not in db")); + else if (get_result) + throw0(DB_ERROR("DB error attempting to fetch output tx hash")); + + crypto::hash tx_hash = *(crypto::hash*)v.mv_data; + + get_result = mdb_get(*txn_ptr, m_output_indices, &k, &v); + if (get_result == MDB_NOTFOUND) + throw1(OUTPUT_DNE("output with given index not in db")); + else if (get_result) + throw0(DB_ERROR("DB error attempting to fetch output tx index")); + if (! m_batch_active) + txn.commit(); + + return tx_out_index(tx_hash, *(const uint64_t *)v.mv_data); +} + +tx_out_index BlockchainLMDB::get_output_tx_and_index(const uint64_t& amount, const uint64_t& index) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + txn_safe* txn_ptr = &txn; + if (m_batch_active) + txn_ptr = m_write_txn; + else + { + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + } + lmdb_cur cur(*txn_ptr, m_output_amounts); + + MDB_val_copy<uint64_t> k(amount); + MDB_val v; + + auto result = mdb_cursor_get(cur, &k, &v, MDB_SET); + if (result == MDB_NOTFOUND) + throw1(OUTPUT_DNE("Attempting to get an output index by amount and amount index, but amount not found")); + else if (result) + throw0(DB_ERROR("DB error attempting to get an output")); + + size_t num_elems = 0; + mdb_cursor_count(cur, &num_elems); + if (num_elems <= index) + throw1(OUTPUT_DNE("Attempting to get an output index by amount and amount index, but output not found")); + + mdb_cursor_get(cur, &k, &v, MDB_FIRST_DUP); + + for (uint64_t i = 0; i < index; ++i) + { + mdb_cursor_get(cur, &k, &v, MDB_NEXT_DUP); + } + + mdb_cursor_get(cur, &k, &v, MDB_GET_CURRENT); + + uint64_t glob_index = *(const uint64_t*)v.mv_data; + + cur.close(); + + if (! m_batch_active) + txn.commit(); + + return get_output_tx_and_index_from_global(glob_index); +} + +std::vector<uint64_t> BlockchainLMDB::get_tx_output_indices(const crypto::hash& h) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + std::vector<uint64_t> index_vec; + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + lmdb_cur cur(txn, m_tx_outputs); + + MDB_val_copy<crypto::hash> k(h); + MDB_val v; + auto result = mdb_cursor_get(cur, &k, &v, MDB_SET); + if (result == MDB_NOTFOUND) + throw1(OUTPUT_DNE("Attempting to get an output by tx hash and tx index, but output not found")); + else if (result) + throw0(DB_ERROR("DB error attempting to get an output")); + + size_t num_elems = 0; + mdb_cursor_count(cur, &num_elems); + + mdb_cursor_get(cur, &k, &v, MDB_FIRST_DUP); + + for (uint64_t i = 0; i < num_elems; ++i) + { + mdb_cursor_get(cur, &k, &v, MDB_GET_CURRENT); + index_vec.push_back(*(const uint64_t *)v.mv_data); + mdb_cursor_get(cur, &k, &v, MDB_NEXT_DUP); + } + + cur.close(); + txn.commit(); + + return index_vec; +} + +std::vector<uint64_t> BlockchainLMDB::get_tx_amount_output_indices(const crypto::hash& h) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + std::vector<uint64_t> index_vec; + std::vector<uint64_t> index_vec2; + + // get the transaction's global output indices first + index_vec = get_tx_output_indices(h); + // these are next used to obtain the amount output indices + + transaction tx = get_tx(h); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + uint64_t i = 0; + uint64_t global_index; + BOOST_FOREACH(const auto& vout, tx.vout) + { + uint64_t amount = vout.amount; + + global_index = index_vec[i]; + + lmdb_cur cur(txn, m_output_amounts); + + MDB_val_copy<uint64_t> k(amount); + MDB_val v; + + auto result = mdb_cursor_get(cur, &k, &v, MDB_SET); + if (result == MDB_NOTFOUND) + throw1(OUTPUT_DNE("Attempting to get an output index by amount and amount index, but amount not found")); + else if (result) + throw0(DB_ERROR("DB error attempting to get an output")); + + size_t num_elems = 0; + mdb_cursor_count(cur, &num_elems); + + mdb_cursor_get(cur, &k, &v, MDB_FIRST_DUP); + + uint64_t amount_output_index = 0; + uint64_t output_index = 0; + bool found_index = false; + for (uint64_t j = 0; j < num_elems; ++j) + { + mdb_cursor_get(cur, &k, &v, MDB_GET_CURRENT); + output_index = *(const uint64_t *)v.mv_data; + if (output_index == global_index) + { + amount_output_index = j; + found_index = true; + break; + } + mdb_cursor_get(cur, &k, &v, MDB_NEXT_DUP); + } + if (found_index) + { + index_vec2.push_back(amount_output_index); + } + else + { + // not found + cur.close(); + txn.commit(); + throw1(OUTPUT_DNE("specified output not found in db")); + } + + cur.close(); + ++i; + } + + txn.commit(); + + return index_vec2; +} + + + +bool BlockchainLMDB::has_key_image(const crypto::key_image& img) const +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (mdb_txn_begin(m_env, NULL, MDB_RDONLY, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + + MDB_val_copy<crypto::key_image> val_key(img); + MDB_val unused; + if (mdb_get(txn, m_spent_keys, &val_key, &unused) == 0) + { + txn.commit(); + return true; + } + + txn.commit(); + return false; +} + +void BlockchainLMDB::batch_start() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + if (! m_batch_transactions) + throw0(DB_ERROR("batch transactions not enabled")); + if (m_batch_active) + throw0(DB_ERROR("batch transaction already in progress")); + if (m_write_txn) + throw0(DB_ERROR("batch transaction attempted, but m_write_txn already in use")); + check_open(); + // NOTE: need to make sure it's destroyed properly when done + if (mdb_txn_begin(m_env, NULL, 0, m_write_batch_txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + // indicates this transaction is for batch transactions, but not whether it's + // active + m_write_batch_txn.m_batch_txn = true; + m_write_txn = &m_write_batch_txn; + m_batch_active = true; + LOG_PRINT_L3("batch transaction: begin"); +} + +void BlockchainLMDB::batch_commit() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + if (! m_batch_transactions) + throw0(DB_ERROR("batch transactions not enabled")); + if (! m_batch_active) + throw0(DB_ERROR("batch transaction not in progress")); + check_open(); + LOG_PRINT_L3("batch transaction: committing..."); + TIME_MEASURE_START(time1); + m_write_txn->commit(); + TIME_MEASURE_FINISH(time1); + time_commit1 += time1; + LOG_PRINT_L3("batch transaction: committed"); + + if (mdb_txn_begin(m_env, NULL, 0, m_write_batch_txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + if (! m_write_batch_txn.m_batch_txn) + throw0(DB_ERROR("m_write_batch_txn not marked as a batch transaction")); + m_write_txn = &m_write_batch_txn; +} + +void BlockchainLMDB::batch_stop() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + if (! m_batch_transactions) + throw0(DB_ERROR("batch transactions not enabled")); + if (! m_batch_active) + throw0(DB_ERROR("batch transaction not in progress")); + check_open(); + LOG_PRINT_L3("batch transaction: committing..."); + TIME_MEASURE_START(time1); + m_write_txn->commit(); + TIME_MEASURE_FINISH(time1); + time_commit1 += time1; + // for destruction of batch transaction + m_write_txn = nullptr; + m_batch_active = false; + LOG_PRINT_L3("batch transaction: end"); +} + +void BlockchainLMDB::batch_abort() +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + if (! m_batch_transactions) + throw0(DB_ERROR("batch transactions not enabled")); + if (! m_batch_active) + throw0(DB_ERROR("batch transaction not in progress")); + check_open(); + // for destruction of batch transaction + m_write_txn = nullptr; + // explicitly call in case mdb_env_close() (BlockchainLMDB::close()) called before BlockchainLMDB destructor called. + m_write_batch_txn.abort(); + m_batch_active = false; + LOG_PRINT_L3("batch transaction: aborted"); +} + +void BlockchainLMDB::set_batch_transactions(bool batch_transactions) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + m_batch_transactions = batch_transactions; + LOG_PRINT_L3("batch transactions " << (m_batch_transactions ? "enabled" : "disabled")); +} + +uint64_t BlockchainLMDB::add_block( const block& blk + , const size_t& block_size + , const difficulty_type& cumulative_difficulty + , const uint64_t& coins_generated + , const std::vector<transaction>& txs + ) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (! m_batch_active) + { + if (mdb_txn_begin(m_env, NULL, 0, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + m_write_txn = &txn; + } + + uint64_t num_outputs = m_num_outputs; + try + { + BlockchainDB::add_block(blk, block_size, cumulative_difficulty, coins_generated, txs); + if (! m_batch_active) + { + m_write_txn = NULL; + + TIME_MEASURE_START(time1); + txn.commit(); + TIME_MEASURE_FINISH(time1); + time_commit1 += time1; + } + } + catch (...) + { + m_num_outputs = num_outputs; + if (! m_batch_active) + m_write_txn = NULL; + throw; + } + + return ++m_height; +} + +void BlockchainLMDB::pop_block(block& blk, std::vector<transaction>& txs) +{ + LOG_PRINT_L3("BlockchainLMDB::" << __func__); + check_open(); + + txn_safe txn; + if (! m_batch_active) + { + if (mdb_txn_begin(m_env, NULL, 0, txn)) + throw0(DB_ERROR("Failed to create a transaction for the db")); + m_write_txn = &txn; + } + + uint64_t num_outputs = m_num_outputs; + try + { + BlockchainDB::pop_block(blk, txs); + if (! m_batch_active) + { + m_write_txn = NULL; + + txn.commit(); + } + } + catch (...) + { + m_num_outputs = num_outputs; + m_write_txn = NULL; + throw; + } + + --m_height; +} + +} // namespace cryptonote diff --git a/src/blockchain_db/lmdb/db_lmdb.h b/src/blockchain_db/lmdb/db_lmdb.h new file mode 100644 index 000000000..fa7cb988a --- /dev/null +++ b/src/blockchain_db/lmdb/db_lmdb.h @@ -0,0 +1,316 @@ +// Copyright (c) 2014, The Monero Project +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "blockchain_db/blockchain_db.h" +#include "cryptonote_protocol/blobdatatype.h" // for type blobdata + +#include <lmdb.h> + +namespace cryptonote +{ + +struct txn_safe +{ + txn_safe() : m_txn(NULL) { } + ~txn_safe() + { + LOG_PRINT_L3("txn_safe: destructor"); + if (m_txn != NULL) + { + if (m_batch_txn) // this is a batch txn and should have been handled before this point for safety + { + LOG_PRINT_L0("WARNING: txn_safe: m_txn is a batch txn and it's not NULL in destructor - calling mdb_txn_abort()"); + } + else + { + // Example of when this occurs: a lookup fails, so a read-only txn is + // aborted through this destructor. However, successful read-only txns + // ideally should have been committed when done and not end up here. + // + // NOTE: not sure if this is ever reached for a non-batch write + // transaction, but it's probably not ideal if it did. + LOG_PRINT_L3("txn_safe: m_txn not NULL in destructor - calling mdb_txn_abort()"); + } + mdb_txn_abort(m_txn); + } + } + + void commit(std::string message = "") + { + if (message.size() == 0) + { + message = "Failed to commit a transaction to the db"; + } + + if (mdb_txn_commit(m_txn)) + { + m_txn = NULL; + LOG_PRINT_L0(message); + throw DB_ERROR(message.c_str()); + } + m_txn = NULL; + } + + // This should only be needed for batch transaction which must be ensured to + // be aborted before mdb_env_close, not after. So we can't rely on + // BlockchainLMDB destructor to call txn_safe destructor, as that's too late + // to properly abort, since mdb_env_close would have been called earlier. + void abort() + { + LOG_PRINT_L3("txn_safe: abort()"); + if(m_txn != NULL) + { + mdb_txn_abort(m_txn); + m_txn = NULL; + } + else + { + LOG_PRINT_L0("WARNING: txn_safe: abort() called, but m_txn is NULL"); + } + } + + operator MDB_txn*() + { + return m_txn; + } + + operator MDB_txn**() + { + return &m_txn; + } + + MDB_txn* m_txn; + bool m_batch_txn = false; +}; + + +class BlockchainLMDB : public BlockchainDB +{ +public: + BlockchainLMDB(bool batch_transactions=false); + ~BlockchainLMDB(); + + virtual void open(const std::string& filename, const int mdb_flags=0); + + virtual void create(const std::string& filename); + + virtual void close(); + + virtual void sync(); + + virtual void reset(); + + virtual std::vector<std::string> get_filenames() const; + + virtual std::string get_db_name() const; + + virtual bool lock(); + + virtual void unlock(); + + virtual bool block_exists(const crypto::hash& h) const; + + virtual block get_block(const crypto::hash& h) const; + + virtual uint64_t get_block_height(const crypto::hash& h) const; + + virtual block_header get_block_header(const crypto::hash& h) const; + + virtual block get_block_from_height(const uint64_t& height) const; + + virtual uint64_t get_block_timestamp(const uint64_t& height) const; + + virtual uint64_t get_top_block_timestamp() const; + + virtual size_t get_block_size(const uint64_t& height) const; + + virtual difficulty_type get_block_cumulative_difficulty(const uint64_t& height) const; + + virtual difficulty_type get_block_difficulty(const uint64_t& height) const; + + virtual uint64_t get_block_already_generated_coins(const uint64_t& height) const; + + virtual crypto::hash get_block_hash_from_height(const uint64_t& height) const; + + virtual std::vector<block> get_blocks_range(const uint64_t& h1, const uint64_t& h2) const; + + virtual std::vector<crypto::hash> get_hashes_range(const uint64_t& h1, const uint64_t& h2) const; + + virtual crypto::hash top_block_hash() const; + + virtual block get_top_block() const; + + virtual uint64_t height() const; + + virtual bool tx_exists(const crypto::hash& h) const; + + virtual uint64_t get_tx_unlock_time(const crypto::hash& h) const; + + virtual transaction get_tx(const crypto::hash& h) const; + + virtual uint64_t get_tx_count() const; + + virtual std::vector<transaction> get_tx_list(const std::vector<crypto::hash>& hlist) const; + + virtual uint64_t get_tx_block_height(const crypto::hash& h) const; + + virtual uint64_t get_random_output(const uint64_t& amount) const; + + virtual uint64_t get_num_outputs(const uint64_t& amount) const; + + virtual crypto::public_key get_output_key(const uint64_t& amount, const uint64_t& index) const; + + virtual tx_out get_output(const crypto::hash& h, const uint64_t& index) const; + + /** + * @brief get an output from its global index + * + * @param index global index of the output desired + * + * @return the output associated with the index. + * Will throw OUTPUT_DNE if not output has that global index. + * Will throw DB_ERROR if there is a non-specific LMDB error in fetching + */ + tx_out get_output(const uint64_t& index) const; + + virtual tx_out_index get_output_tx_and_index_from_global(const uint64_t& index) const; + + virtual tx_out_index get_output_tx_and_index(const uint64_t& amount, const uint64_t& index) const; + + virtual std::vector<uint64_t> get_tx_output_indices(const crypto::hash& h) const; + virtual std::vector<uint64_t> get_tx_amount_output_indices(const crypto::hash& h) const; + + virtual bool has_key_image(const crypto::key_image& img) const; + + virtual uint64_t add_block( const block& blk + , const size_t& block_size + , const difficulty_type& cumulative_difficulty + , const uint64_t& coins_generated + , const std::vector<transaction>& txs + ); + + virtual void set_batch_transactions(bool batch_transactions); + virtual void batch_start(); + virtual void batch_commit(); + virtual void batch_stop(); + virtual void batch_abort(); + + virtual void pop_block(block& blk, std::vector<transaction>& txs); + +private: + virtual void add_block( const block& blk + , const size_t& block_size + , const difficulty_type& cumulative_difficulty + , const uint64_t& coins_generated + , const crypto::hash& block_hash + ); + + virtual void remove_block(); + + virtual void add_transaction_data(const crypto::hash& blk_hash, const transaction& tx, const crypto::hash& tx_hash); + + virtual void remove_transaction_data(const crypto::hash& tx_hash, const transaction& tx); + + virtual void add_output(const crypto::hash& tx_hash, const tx_out& tx_output, const uint64_t& local_index); + + virtual void remove_output(const tx_out& tx_output); + + void remove_tx_outputs(const crypto::hash& tx_hash, const transaction& tx); + + void remove_output(const uint64_t& out_index, const uint64_t amount); + void remove_amount_output_index(const uint64_t amount, const uint64_t global_output_index); + + virtual void add_spent_key(const crypto::key_image& k_image); + + virtual void remove_spent_key(const crypto::key_image& k_image); + + /** + * @brief convert a tx output to a blob for storage + * + * @param output the output to convert + * + * @return the resultant blob + */ + blobdata output_to_blob(const tx_out& output); + + /** + * @brief convert a tx output blob to a tx output + * + * @param blob the blob to convert + * + * @return the resultant tx output + */ + tx_out output_from_blob(const blobdata& blob) const; + + /** + * @brief get the global index of the index-th output of the given amount + * + * @param amount the output amount + * @param index the index into the set of outputs of that amount + * + * @return the global index of the desired output + */ + uint64_t get_output_global_index(const uint64_t& amount, const uint64_t& index) const; + + void check_open() const; + + MDB_env* m_env; + + MDB_dbi m_blocks; + MDB_dbi m_block_heights; + MDB_dbi m_block_hashes; + MDB_dbi m_block_timestamps; + MDB_dbi m_block_sizes; + MDB_dbi m_block_diffs; + MDB_dbi m_block_coins; + + MDB_dbi m_txs; + MDB_dbi m_tx_unlocks; + MDB_dbi m_tx_heights; + MDB_dbi m_tx_outputs; + + MDB_dbi m_output_txs; + MDB_dbi m_output_indices; + MDB_dbi m_output_gindices; + MDB_dbi m_output_amounts; + MDB_dbi m_output_keys; + MDB_dbi m_outputs; + + MDB_dbi m_spent_keys; + + bool m_open; + uint64_t m_height; + uint64_t m_num_outputs; + std::string m_folder; + txn_safe* m_write_txn; // may point to either a short-lived txn or a batch txn + txn_safe m_write_batch_txn; // persist batch txn outside of BlockchainLMDB + + bool m_batch_transactions; // support for batch transactions + bool m_batch_active; // whether batch transaction is in progress +}; + +} // namespace cryptonote diff --git a/src/cryptonote_core/CMakeLists.txt b/src/cryptonote_core/CMakeLists.txt index 3abf93f3c..26122e367 100644 --- a/src/cryptonote_core/CMakeLists.txt +++ b/src/cryptonote_core/CMakeLists.txt @@ -29,6 +29,7 @@ set(cryptonote_core_sources account.cpp blockchain_storage.cpp + blockchain.cpp checkpoints.cpp checkpoints_create.cpp cryptonote_basic_impl.cpp @@ -45,6 +46,7 @@ set(cryptonote_core_private_headers account_boost_serialization.h blockchain_storage.h blockchain_storage_boost_serialization.h + blockchain.h checkpoints.h checkpoints_create.h connection_context.h @@ -70,6 +72,7 @@ target_link_libraries(cryptonote_core LINK_PUBLIC common crypto + blockchain_db ${Boost_DATE_TIME_LIBRARY} ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SERIALIZATION_LIBRARY} diff --git a/src/cryptonote_core/blockchain.cpp b/src/cryptonote_core/blockchain.cpp new file mode 100644 index 000000000..57934b3fc --- /dev/null +++ b/src/cryptonote_core/blockchain.cpp @@ -0,0 +1,2363 @@ +// Copyright (c) 2014, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Parts of this file are originally copyright (c) 2012-2013 The Cryptonote developers + +#include <algorithm> +#include <cstdio> +#include <boost/archive/binary_oarchive.hpp> +#include <boost/archive/binary_iarchive.hpp> +#include <boost/filesystem.hpp> + +#include "include_base_utils.h" +#include "cryptonote_basic_impl.h" +#include "tx_pool.h" +#include "blockchain.h" +#include "blockchain_db/blockchain_db.h" +#include "blockchain_db/lmdb/db_lmdb.h" +#include "cryptonote_format_utils.h" +#include "cryptonote_boost_serialization.h" +#include "cryptonote_config.h" +#include "miner.h" +#include "misc_language.h" +#include "profile_tools.h" +#include "file_io_utils.h" +#include "common/boost_serialization_helper.h" +#include "warnings.h" +#include "crypto/hash.h" +#include "cryptonote_core/checkpoints_create.h" +//#include "serialization/json_archive.h" + +/* TODO: + * Clean up code: + * Possibly change how outputs are referred to/indexed in blockchain and wallets + * + */ + +using namespace cryptonote; +using epee::string_tools::pod_to_hex; + +DISABLE_VS_WARNINGS(4267) + +//------------------------------------------------------------------ +// TODO: initialize m_db with a concrete implementation of BlockchainDB +Blockchain::Blockchain(tx_memory_pool& tx_pool):m_db(), m_tx_pool(tx_pool), m_current_block_cumul_sz_limit(0), m_is_in_checkpoint_zone(false), m_is_blockchain_storing(false), m_testnet(false), m_enforce_dns_checkpoints(false) +{ + LOG_PRINT_L3("Blockchain::" << __func__); +} +//------------------------------------------------------------------ +//TODO: is this still needed? I don't think so - tewinget +template<class archive_t> +void Blockchain::serialize(archive_t & ar, const unsigned int version) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + if(version < 11) + return; + CRITICAL_REGION_LOCAL(m_blockchain_lock); + ar & m_blocks; + ar & m_blocks_index; + ar & m_transactions; + ar & m_spent_keys; + ar & m_alternative_chains; + ar & m_outputs; + ar & m_invalid_blocks; + ar & m_current_block_cumul_sz_limit; + /*serialization bug workaround*/ + if(version > 11) + { + uint64_t total_check_count = m_db->height() + m_blocks_index.size() + m_transactions.size() + m_spent_keys.size() + m_alternative_chains.size() + m_outputs.size() + m_invalid_blocks.size() + m_current_block_cumul_sz_limit; + if(archive_t::is_saving::value) + { + ar & total_check_count; + }else + { + uint64_t total_check_count_loaded = 0; + ar & total_check_count_loaded; + if(total_check_count != total_check_count_loaded) + { + LOG_ERROR("Blockchain storage data corruption detected. total_count loaded from file = " << total_check_count_loaded << ", expected = " << total_check_count); + + LOG_PRINT_L0("Blockchain storage:" << std::endl << + "m_blocks: " << m_db->height() << std::endl << + "m_blocks_index: " << m_blocks_index.size() << std::endl << + "m_transactions: " << m_transactions.size() << std::endl << + "m_spent_keys: " << m_spent_keys.size() << std::endl << + "m_alternative_chains: " << m_alternative_chains.size() << std::endl << + "m_outputs: " << m_outputs.size() << std::endl << + "m_invalid_blocks: " << m_invalid_blocks.size() << std::endl << + "m_current_block_cumul_sz_limit: " << m_current_block_cumul_sz_limit); + + throw std::runtime_error("Blockchain data corruption"); + } + } + } + + + LOG_PRINT_L3("Blockchain storage:" << std::endl << + "m_blocks: " << m_db->height() << std::endl << + "m_blocks_index: " << m_blocks_index.size() << std::endl << + "m_transactions: " << m_transactions.size() << std::endl << + "m_spent_keys: " << m_spent_keys.size() << std::endl << + "m_alternative_chains: " << m_alternative_chains.size() << std::endl << + "m_outputs: " << m_outputs.size() << std::endl << + "m_invalid_blocks: " << m_invalid_blocks.size() << std::endl << + "m_current_block_cumul_sz_limit: " << m_current_block_cumul_sz_limit); +} +//------------------------------------------------------------------ +bool Blockchain::have_tx(const crypto::hash &id) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + return m_db->tx_exists(id); +} +//------------------------------------------------------------------ +bool Blockchain::have_tx_keyimg_as_spent(const crypto::key_image &key_im) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + return m_db->has_key_image(key_im); +} +//------------------------------------------------------------------ +// This function makes sure that each "input" in an input (mixins) exists +// and collects the public key for each from the transaction it was included in +// via the visitor passed to it. +template<class visitor_t> +bool Blockchain::scan_outputkeys_for_indexes(const txin_to_key& tx_in_to_key, visitor_t& vis, uint64_t* pmax_related_block_height) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + // verify that the input has key offsets (that it exists properly, really) + if(!tx_in_to_key.key_offsets.size()) + return false; + + // cryptonote_format_utils uses relative offsets for indexing to the global + // outputs list. that is to say that absolute offset #2 is absolute offset + // #1 plus relative offset #2. + // TODO: Investigate if this is necessary / why this is done. + std::vector<uint64_t> absolute_offsets = relative_output_offsets_to_absolute(tx_in_to_key.key_offsets); + + + //std::vector<std::pair<crypto::hash, size_t> >& amount_outs_vec = it->second; + size_t count = 0; + for (const uint64_t& i : absolute_offsets) + { + try + { + // get tx hash and output index for output + auto output_index = m_db->get_output_tx_and_index(tx_in_to_key.amount, i); + + // get tx that output is from + auto tx = m_db->get_tx(output_index.first); + + // make sure output index is within range for the given transaction + if (output_index.second >= tx.vout.size()) + { + LOG_PRINT_L0("Output does not exist. tx = " << output_index.first << ", index = " << output_index.second); + return false; + } + + // call to the passed boost visitor to grab the public key for the output + if(!vis.handle_output(tx, tx.vout[output_index.second])) + { + LOG_PRINT_L0("Failed to handle_output for output no = " << count << ", with absolute offset " << i); + return false; + } + + // if on last output and pmax_related_block_height not null pointer + if(++count == absolute_offsets.size() && pmax_related_block_height) + { + // set *pmax_related_block_height to tx block height for this output + auto h = m_db->get_tx_block_height(output_index.first); + if(*pmax_related_block_height < h) + { + *pmax_related_block_height = h; + } + } + + } + catch (const OUTPUT_DNE& e) + { + LOG_PRINT_L0("Output does not exist: " << e.what()); + return false; + } + catch (const TX_DNE& e) + { + LOG_PRINT_L0("Transaction does not exist: " << e.what()); + return false; + } + + } + + return true; +} +//------------------------------------------------------------------ +uint64_t Blockchain::get_current_blockchain_height() const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + return m_db->height(); +} +//------------------------------------------------------------------ +//FIXME: possibly move this into the constructor, to avoid accidentally +// dereferencing a null BlockchainDB pointer +bool Blockchain::init(const std::string& config_folder, const bool testnet, const int db_flags) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + // TODO: make this configurable + m_db = new BlockchainLMDB(); + + m_config_folder = config_folder; + m_testnet = testnet; + + boost::filesystem::path folder(m_config_folder); + + folder /= m_db->get_db_name(); + + LOG_PRINT_L0("Loading blockchain from folder " << folder.c_str() << " ..."); + + const std::string filename = folder.string(); + try + { + m_db->open(filename, db_flags); + } + catch (const DB_OPEN_FAILURE& e) + { + LOG_PRINT_L0("No blockchain file found, attempting to create one."); + try + { + m_db->create(filename); + } + catch (const DB_CREATE_FAILURE& db_create_error) + { + LOG_PRINT_L0("Unable to create BlockchainDB! This is not good..."); + //TODO: make sure whatever calls this handles the return value properly + return false; + } + } + + // if the blockchain is new, add the genesis block + // this feels kinda kludgy to do it this way, but can be looked at later. + // TODO: add function to create and store genesis block, + // taking testnet into account + if(!m_db->height()) + { + LOG_PRINT_L0("Blockchain not loaded, generating genesis block."); + block bl = boost::value_initialized<block>(); + block_verification_context bvc = boost::value_initialized<block_verification_context>(); + if (testnet) + { + generate_genesis_block(bl, config::testnet::GENESIS_TX, config::testnet::GENESIS_NONCE); + } + else + { + generate_genesis_block(bl, config::GENESIS_TX, config::GENESIS_NONCE); + } + add_new_block(bl, bvc); + CHECK_AND_ASSERT_MES(!bvc.m_verifivation_failed, false, "Failed to add genesis block to blockchain"); + } + // TODO: if blockchain load successful, verify blockchain against both + // hard-coded and runtime-loaded (and enforced) checkpoints. + else + { + } + + // check how far behind we are + uint64_t top_block_timestamp = m_db->get_top_block_timestamp(); + uint64_t timestamp_diff = time(NULL) - top_block_timestamp; + + // genesis block has no timestamp, could probably change it to have timestamp of 1341378000... + if(!top_block_timestamp) + timestamp_diff = time(NULL) - 1341378000; + LOG_PRINT_GREEN("Blockchain initialized. last block: " << m_db->height() - 1 << ", " << epee::misc_utils::get_time_interval_string(timestamp_diff) << " time ago, current difficulty: " << get_difficulty_for_next_block(), LOG_LEVEL_0); + + return true; +} +//------------------------------------------------------------------ +bool Blockchain::store_blockchain() +{ + LOG_PRINT_L3("Blockchain::" << __func__); + // TODO: make sure if this throws that it is not simply ignored higher + // up the call stack + try + { + m_db->sync(); + } + catch (const std::exception& e) + { + LOG_PRINT_L0(std::string("Error syncing blockchain db: ") + e.what() + "-- shutting down now to prevent issues!"); + throw; + } + catch (...) + { + LOG_PRINT_L0("There was an issue storing the blockchain, shutting down now to prevent issues!"); + throw; + } + LOG_PRINT_L0("Blockchain stored OK."); + return true; +} +//------------------------------------------------------------------ +bool Blockchain::deinit() +{ + LOG_PRINT_L3("Blockchain::" << __func__); + // as this should be called if handling a SIGSEGV, need to check + // if m_db is a NULL pointer (and thus may have caused the illegal + // memory operation), otherwise we may cause a loop. + if (m_db == NULL) + { + throw new DB_ERROR("The db pointer is null in Blockchain, the blockchain may be corrupt!"); + } + + try + { + m_db->close(); + } + catch (const std::exception& e) + { + LOG_PRINT_L0(std::string("Error closing blockchain db: ") + e.what()); + } + catch (...) + { + LOG_PRINT_L0("There was an issue closing/storing the blockchain, shutting down now to prevent issues!"); + } + + delete m_db; + return true; +} +//------------------------------------------------------------------ +// This function tells BlockchainDB to remove the top block from the +// blockchain and then returns all transactions (except the miner tx, of course) +// from it to the tx_pool +block Blockchain::pop_block_from_blockchain() +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + block popped_block; + std::vector<transaction> popped_txs; + + try + { + m_db->pop_block(popped_block, popped_txs); + } + // anything that could cause this to throw is likely catastrophic, + // so we re-throw + catch (const std::exception& e) + { + LOG_ERROR("Error popping block from blockchain: " << e.what()); + throw; + } + catch (...) + { + LOG_ERROR("Error popping block from blockchain, throwing!"); + throw; + } + + // return transactions from popped block to the tx_pool + for (transaction& tx : popped_txs) + { + if (!is_coinbase(tx)) + { + cryptonote::tx_verification_context tvc = AUTO_VAL_INIT(tvc); + bool r = m_tx_pool.add_tx(tx, tvc, true); + if (!r) + { + LOG_ERROR("Error returning transaction to tx_pool"); + } + } + } + + return popped_block; +} +//------------------------------------------------------------------ +bool Blockchain::reset_and_set_genesis_block(const block& b) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + m_transactions.clear(); + m_spent_keys.clear(); + m_blocks.clear(); + m_blocks_index.clear(); + m_alternative_chains.clear(); + m_outputs.clear(); + m_db->reset(); + + block_verification_context bvc = boost::value_initialized<block_verification_context>(); + add_new_block(b, bvc); + return bvc.m_added_to_main_chain && !bvc.m_verifivation_failed; +} +//------------------------------------------------------------------ +//TODO: move to BlockchainDB subclass +bool Blockchain::purge_transaction_keyimages_from_blockchain(const transaction& tx, bool strict_check) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + struct purge_transaction_visitor: public boost::static_visitor<bool> + { + key_images_container& m_spent_keys; + bool m_strict_check; + purge_transaction_visitor(key_images_container& spent_keys, bool strict_check):m_spent_keys(spent_keys), m_strict_check(strict_check){} + + bool operator()(const txin_to_key& inp) const + { + //const crypto::key_image& ki = inp.k_image; + auto r = m_spent_keys.find(inp.k_image); + if(r != m_spent_keys.end()) + { + m_spent_keys.erase(r); + }else + { + CHECK_AND_ASSERT_MES(!m_strict_check, false, "purge_block_data_from_blockchain: key image in transaction not found"); + } + return true; + } + bool operator()(const txin_gen& inp) const + { + return true; + } + bool operator()(const txin_to_script& tx) const + { + return false; + } + + bool operator()(const txin_to_scripthash& tx) const + { + return false; + } + }; + + BOOST_FOREACH(const txin_v& in, tx.vin) + { + bool r = boost::apply_visitor(purge_transaction_visitor(m_spent_keys, strict_check), in); + CHECK_AND_ASSERT_MES(!strict_check || r, false, "failed to process purge_transaction_visitor"); + } + return true; +} +//------------------------------------------------------------------ +crypto::hash Blockchain::get_tail_id(uint64_t& height) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + height = m_db->height() - 1; + return get_tail_id(); +} +//------------------------------------------------------------------ +crypto::hash Blockchain::get_tail_id() const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + return m_db->top_block_hash(); +} +//------------------------------------------------------------------ +/*TODO: this function was...poorly written. As such, I'm not entirely + * certain on what it was supposed to be doing. Need to look into this, + * but it doesn't seem terribly important just yet. + * + * puts into list <ids> a list of hashes representing certain blocks + * from the blockchain in reverse chronological order + * + * the blocks chosen, at the time of this writing, are: + * the most recent 11 + * powers of 2 less recent from there, so 13, 17, 25, etc... + * + */ +bool Blockchain::get_short_chain_history(std::list<crypto::hash>& ids) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + uint64_t i = 0; + uint64_t current_multiplier = 1; + uint64_t sz = m_db->height(); + + if(!sz) + return true; + + bool genesis_included = false; + uint64_t current_back_offset = 1; + while(current_back_offset < sz) + { + ids.push_back(m_db->get_block_hash_from_height(sz - current_back_offset)); + + if(sz-current_back_offset == 0) + { + genesis_included = true; + } + if(i < 10) + { + ++current_back_offset; + } + else + { + current_multiplier *= 2; + current_back_offset += current_multiplier; + } + ++i; + } + + if (!genesis_included) + { + ids.push_back(m_db->get_block_hash_from_height(0)); + } + + return true; +} +//------------------------------------------------------------------ +crypto::hash Blockchain::get_block_id_by_height(uint64_t height) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + try + { + return m_db->get_block_hash_from_height(height); + } + catch (const BLOCK_DNE& e) + { + } + catch (const std::exception& e) + { + LOG_PRINT_L0(std::string("Something went wrong fetching block hash by height: ") + e.what()); + throw; + } + catch (...) + { + LOG_PRINT_L0(std::string("Something went wrong fetching block hash by height")); + throw; + } + return null_hash; +} +//------------------------------------------------------------------ +bool Blockchain::get_block_by_hash(const crypto::hash &h, block &blk) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + // try to find block in main chain + try + { + blk = m_db->get_block(h); + return true; + } + // try to find block in alternative chain + catch (const BLOCK_DNE& e) + { + blocks_ext_by_hash::const_iterator it_alt = m_alternative_chains.find(h); + if (m_alternative_chains.end() != it_alt) { + blk = it_alt->second.bl; + return true; + } + } + catch (const std::exception& e) + { + LOG_PRINT_L0(std::string("Something went wrong fetching block by hash: ") + e.what()); + throw; + } + catch (...) + { + LOG_PRINT_L0(std::string("Something went wrong fetching block hash by hash")); + throw; + } + + return false; +} +//------------------------------------------------------------------ +//FIXME: this function does not seem to be called from anywhere, but +// if it ever is, should probably change std::list for std::vector +void Blockchain::get_all_known_block_ids(std::list<crypto::hash> &main, std::list<crypto::hash> &alt, std::list<crypto::hash> &invalid) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + for (auto& a : m_db->get_hashes_range(0, m_db->height() - 1)) + { + main.push_back(a); + } + + BOOST_FOREACH(const blocks_ext_by_hash::value_type &v, m_alternative_chains) + alt.push_back(v.first); + + BOOST_FOREACH(const blocks_ext_by_hash::value_type &v, m_invalid_blocks) + invalid.push_back(v.first); +} +//------------------------------------------------------------------ +// This function aggregates the cumulative difficulties and timestamps of the +// last DIFFICULTY_BLOCKS_COUNT blocks and passes them to next_difficulty, +// returning the result of that call. Ignores the genesis block, and can use +// less blocks than desired if there aren't enough. +difficulty_type Blockchain::get_difficulty_for_next_block() const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + std::vector<uint64_t> timestamps; + std::vector<difficulty_type> cumulative_difficulties; + auto h = m_db->height(); + + size_t offset = h - std::min<size_t>(h, static_cast<size_t>(DIFFICULTY_BLOCKS_COUNT)); + + if (offset == 0) + { + ++offset; + } + + for(; offset < h; offset++) + { + timestamps.push_back(m_db->get_block_timestamp(offset)); + cumulative_difficulties.push_back(m_db->get_block_cumulative_difficulty(offset)); + } + return next_difficulty(timestamps, cumulative_difficulties); +} +//------------------------------------------------------------------ +// This function removes blocks from the blockchain until it gets to the +// position where the blockchain switch started and then re-adds the blocks +// that had been removed. +bool Blockchain::rollback_blockchain_switching(std::list<block>& original_chain, uint64_t rollback_height) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + // remove blocks from blockchain until we get back to where we should be. + while (m_db->height() != rollback_height) + { + pop_block_from_blockchain(); + } + + //return back original chain + for (auto& bl : original_chain) + { + block_verification_context bvc = boost::value_initialized<block_verification_context>(); + bool r = handle_block_to_main_chain(bl, bvc); + CHECK_AND_ASSERT_MES(r && bvc.m_added_to_main_chain, false, "PANIC! failed to add (again) block while chain switching during the rollback!"); + } + + LOG_PRINT_L1("Rollback to height " << rollback_height << " was successful."); + if (original_chain.size()) + { + LOG_PRINT_L1("Restoration to previous blockchain successful as well."); + } + return true; +} +//------------------------------------------------------------------ +// This function attempts to switch to an alternate chain, returning +// boolean based on success therein. +bool Blockchain::switch_to_alternative_blockchain(std::list<blocks_ext_by_hash::iterator>& alt_chain, bool discard_disconnected_chain) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + // if empty alt chain passed (not sure how that could happen), return false + CHECK_AND_ASSERT_MES(alt_chain.size(), false, "switch_to_alternative_blockchain: empty chain passed"); + + // verify that main chain has front of alt chain's parent block + if (!m_db->block_exists(alt_chain.front()->second.bl.prev_id)) + { + LOG_ERROR("Attempting to move to an alternate chain, but it doesn't appear to connect to the main chain!"); + return false; + } + + // pop blocks from the blockchain until the top block is the parent + // of the front block of the alt chain. + std::list<block> disconnected_chain; + while (m_db->top_block_hash() != alt_chain.front()->second.bl.prev_id) + { + block b = pop_block_from_blockchain(); + disconnected_chain.push_front(b); + } + + auto split_height = m_db->height(); + + //connecting new alternative chain + for(auto alt_ch_iter = alt_chain.begin(); alt_ch_iter != alt_chain.end(); alt_ch_iter++) + { + auto ch_ent = *alt_ch_iter; + block_verification_context bvc = boost::value_initialized<block_verification_context>(); + + // add block to main chain + bool r = handle_block_to_main_chain(ch_ent->second.bl, bvc); + + // if adding block to main chain failed, rollback to previous state and + // return false + if(!r || !bvc.m_added_to_main_chain) + { + LOG_PRINT_L1("Failed to switch to alternative blockchain"); + + // rollback_blockchain_switching should be moved to two different + // functions: rollback and apply_chain, but for now we pretend it is + // just the latter (because the rollback was done above). + rollback_blockchain_switching(disconnected_chain, m_db->height()); + + // FIXME: Why do we keep invalid blocks around? Possibly in case we hear + // about them again so we can immediately dismiss them, but needs some + // looking into. + add_block_as_invalid(ch_ent->second, get_block_hash(ch_ent->second.bl)); + LOG_PRINT_L1("The block was inserted as invalid while connecting new alternative chain, block_id: " << get_block_hash(ch_ent->second.bl)); + m_alternative_chains.erase(ch_ent); + + for(auto alt_ch_to_orph_iter = ++alt_ch_iter; alt_ch_to_orph_iter != alt_chain.end(); alt_ch_to_orph_iter++) + { + add_block_as_invalid((*alt_ch_iter)->second, (*alt_ch_iter)->first); + m_alternative_chains.erase(*alt_ch_to_orph_iter); + } + return false; + } + } + + // if we're to keep the disconnected blocks, add them as alternates + if(!discard_disconnected_chain) + { + //pushing old chain as alternative chain + for (auto& old_ch_ent : disconnected_chain) + { + block_verification_context bvc = boost::value_initialized<block_verification_context>(); + bool r = handle_alternative_block(old_ch_ent, get_block_hash(old_ch_ent), bvc); + if(!r) + { + LOG_PRINT_L1("Failed to push ex-main chain blocks to alternative chain "); + // previously this would fail the blockchain switching, but I don't + // think this is bad enough to warrant that. + } + } + } + + //removing alt_chain entries from alternative chain + BOOST_FOREACH(auto ch_ent, alt_chain) + { + m_alternative_chains.erase(ch_ent); + } + + LOG_PRINT_GREEN("REORGANIZE SUCCESS! on height: " << split_height << ", new blockchain size: " << m_db->height(), LOG_LEVEL_0); + return true; +} +//------------------------------------------------------------------ +// This function calculates the difficulty target for the block being added to +// an alternate chain. +difficulty_type Blockchain::get_next_difficulty_for_alternative_chain(const std::list<blocks_ext_by_hash::iterator>& alt_chain, block_extended_info& bei) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + std::vector<uint64_t> timestamps; + std::vector<difficulty_type> cumulative_difficulties; + + // if the alt chain isn't long enough to calculate the difficulty target + // based on its blocks alone, need to get more blocks from the main chain + if(alt_chain.size()< DIFFICULTY_BLOCKS_COUNT) + { + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + // Figure out start and stop offsets for main chain blocks + size_t main_chain_stop_offset = alt_chain.size() ? alt_chain.front()->second.height : bei.height; + size_t main_chain_count = DIFFICULTY_BLOCKS_COUNT - std::min(static_cast<size_t>(DIFFICULTY_BLOCKS_COUNT), alt_chain.size()); + main_chain_count = std::min(main_chain_count, main_chain_stop_offset); + size_t main_chain_start_offset = main_chain_stop_offset - main_chain_count; + + if(!main_chain_start_offset) + ++main_chain_start_offset; //skip genesis block + + // get difficulties and timestamps from relevant main chain blocks + for(; main_chain_start_offset < main_chain_stop_offset; ++main_chain_start_offset) + { + timestamps.push_back(m_db->get_block_timestamp(main_chain_start_offset)); + cumulative_difficulties.push_back(m_db->get_block_cumulative_difficulty(main_chain_start_offset)); + } + + // make sure we haven't accidentally grabbed too many blocks...maybe don't need this check? + CHECK_AND_ASSERT_MES((alt_chain.size() + timestamps.size()) <= DIFFICULTY_BLOCKS_COUNT, false, + "Internal error, alt_chain.size()[" << alt_chain.size() + << "] + vtimestampsec.size()[" << timestamps.size() + << "] NOT <= DIFFICULTY_WINDOW[]" << DIFFICULTY_BLOCKS_COUNT + ); + + for (auto it : alt_chain) + { + timestamps.push_back(it->second.bl.timestamp); + cumulative_difficulties.push_back(it->second.cumulative_difficulty); + } + } + // if the alt chain is long enough for the difficulty calc, grab difficulties + // and timestamps from it alone + else + { + timestamps.resize(static_cast<size_t>(DIFFICULTY_BLOCKS_COUNT)); + cumulative_difficulties.resize(static_cast<size_t>(DIFFICULTY_BLOCKS_COUNT)); + size_t count = 0; + size_t max_i = timestamps.size()-1; + // get difficulties and timestamps from most recent blocks in alt chain + BOOST_REVERSE_FOREACH(auto it, alt_chain) + { + timestamps[max_i - count] = it->second.bl.timestamp; + cumulative_difficulties[max_i - count] = it->second.cumulative_difficulty; + count++; + if(count >= DIFFICULTY_BLOCKS_COUNT) + break; + } + } + + // calculate the difficulty target for the block and return it + return next_difficulty(timestamps, cumulative_difficulties); +} +//------------------------------------------------------------------ +// This function does a sanity check on basic things that all miner +// transactions have in common, such as: +// one input, of type txin_gen, with height set to the block's height +// correct miner tx unlock time +// a non-overflowing tx amount (dubious necessity on this check) +bool Blockchain::prevalidate_miner_transaction(const block& b, uint64_t height) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CHECK_AND_ASSERT_MES(b.miner_tx.vin.size() == 1, false, "coinbase transaction in the block has no inputs"); + CHECK_AND_ASSERT_MES(b.miner_tx.vin[0].type() == typeid(txin_gen), false, "coinbase transaction in the block has the wrong type"); + if(boost::get<txin_gen>(b.miner_tx.vin[0]).height != height) + { + LOG_PRINT_RED_L1("The miner transaction in block has invalid height: " << boost::get<txin_gen>(b.miner_tx.vin[0]).height << ", expected: " << height); + return false; + } + CHECK_AND_ASSERT_MES(b.miner_tx.unlock_time == height + CRYPTONOTE_MINED_MONEY_UNLOCK_WINDOW, + false, + "coinbase transaction transaction has the wrong unlock time=" << b.miner_tx.unlock_time << ", expected " << height + CRYPTONOTE_MINED_MONEY_UNLOCK_WINDOW); + + + //check outs overflow + //NOTE: not entirely sure this is necessary, given that this function is + // designed simply to make sure the total amount for a transaction + // does not overflow a uint64_t, and this transaction *is* a uint64_t... + if(!check_outs_overflow(b.miner_tx)) + { + LOG_PRINT_RED_L1("miner transaction has money overflow in block " << get_block_hash(b)); + return false; + } + + return true; +} +//------------------------------------------------------------------ +// This function validates the miner transaction reward +bool Blockchain::validate_miner_transaction(const block& b, size_t cumulative_block_size, uint64_t fee, uint64_t& base_reward, uint64_t already_generated_coins) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + //validate reward + uint64_t money_in_use = 0; + BOOST_FOREACH(auto& o, b.miner_tx.vout) + money_in_use += o.amount; + + std::vector<size_t> last_blocks_sizes; + get_last_n_blocks_sizes(last_blocks_sizes, CRYPTONOTE_REWARD_BLOCKS_WINDOW); + if (!get_block_reward(epee::misc_utils::median(last_blocks_sizes), cumulative_block_size, already_generated_coins, base_reward)) { + LOG_PRINT_L1("block size " << cumulative_block_size << " is bigger than allowed for this blockchain"); + return false; + } + if(base_reward + fee < money_in_use) + { + LOG_PRINT_L1("coinbase transaction spend too much money (" << print_money(money_in_use) << "). Block reward is " << print_money(base_reward + fee) << "(" << print_money(base_reward) << "+" << print_money(fee) << ")"); + return false; + } + if(base_reward + fee != money_in_use) + { + LOG_PRINT_L1("coinbase transaction doesn't use full amount of block reward: spent: " + << money_in_use << ", block reward " << base_reward + fee << "(" << base_reward << "+" << fee << ")"); + return false; + } + return true; +} +//------------------------------------------------------------------ +// get the block sizes of the last <count> blocks, starting at <from_height> +// and return by reference <sz>. +void Blockchain::get_last_n_blocks_sizes(std::vector<size_t>& sz, size_t count) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + auto h = m_db->height(); + + // this function is meaningless for an empty blockchain...granted it should never be empty + if(h == 0) + return; + + // add size of last <count> blocks to vector <sz> (or less, if blockchain size < count) + size_t start_offset = h - std::min<size_t>(h, count); + for(size_t i = start_offset; i < h; i++) + { + sz.push_back(m_db->get_block_size(i)); + } +} +//------------------------------------------------------------------ +uint64_t Blockchain::get_current_cumulative_blocksize_limit() const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + return m_current_block_cumul_sz_limit; +} +//------------------------------------------------------------------ +//TODO: This function only needed minor modification to work with BlockchainDB, +// and *works*. As such, to reduce the number of things that might break +// in moving to BlockchainDB, this function will remain otherwise +// unchanged for the time being. +// +// This function makes a new block for a miner to mine the hash for +// +// FIXME: this codebase references #if defined(DEBUG_CREATE_BLOCK_TEMPLATE) +// in a lot of places. That flag is not referenced in any of the code +// nor any of the makefiles, howeve. Need to look into whether or not it's +// necessary at all. +bool Blockchain::create_block_template(block& b, const account_public_address& miner_address, difficulty_type& diffic, uint64_t& height, const blobdata& ex_nonce) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + size_t median_size; + uint64_t already_generated_coins; + + CRITICAL_REGION_BEGIN(m_blockchain_lock); + b.major_version = CURRENT_BLOCK_MAJOR_VERSION; + b.minor_version = CURRENT_BLOCK_MINOR_VERSION; + b.prev_id = get_tail_id(); + b.timestamp = time(NULL); + + height = m_db->height(); + diffic = get_difficulty_for_next_block(); + CHECK_AND_ASSERT_MES(diffic, false, "difficulty owverhead."); + + median_size = m_current_block_cumul_sz_limit / 2; + already_generated_coins = m_db->get_block_already_generated_coins(height - 1); + + CRITICAL_REGION_END(); + + size_t txs_size; + uint64_t fee; + if (!m_tx_pool.fill_block_template(b, median_size, already_generated_coins, txs_size, fee)) { + return false; + } +#if defined(DEBUG_CREATE_BLOCK_TEMPLATE) + size_t real_txs_size = 0; + uint64_t real_fee = 0; + CRITICAL_REGION_BEGIN(m_tx_pool.m_transactions_lock); + BOOST_FOREACH(crypto::hash &cur_hash, b.tx_hashes) { + auto cur_res = m_tx_pool.m_transactions.find(cur_hash); + if (cur_res == m_tx_pool.m_transactions.end()) { + LOG_ERROR("Creating block template: error: transaction not found"); + continue; + } + tx_memory_pool::tx_details &cur_tx = cur_res->second; + real_txs_size += cur_tx.blob_size; + real_fee += cur_tx.fee; + if (cur_tx.blob_size != get_object_blobsize(cur_tx.tx)) { + LOG_ERROR("Creating block template: error: invalid transaction size"); + } + uint64_t inputs_amount; + if (!get_inputs_money_amount(cur_tx.tx, inputs_amount)) { + LOG_ERROR("Creating block template: error: cannot get inputs amount"); + } else if (cur_tx.fee != inputs_amount - get_outs_money_amount(cur_tx.tx)) { + LOG_ERROR("Creating block template: error: invalid fee"); + } + } + if (txs_size != real_txs_size) { + LOG_ERROR("Creating block template: error: wrongly calculated transaction size"); + } + if (fee != real_fee) { + LOG_ERROR("Creating block template: error: wrongly calculated fee"); + } + CRITICAL_REGION_END(); + LOG_PRINT_L1("Creating block template: height " << height << + ", median size " << median_size << + ", already generated coins " << already_generated_coins << + ", transaction size " << txs_size << + ", fee " << fee); +#endif + + /* + two-phase miner transaction generation: we don't know exact block size until we prepare block, but we don't know reward until we know + block size, so first miner transaction generated with fake amount of money, and with phase we know think we know expected block size + */ + //make blocks coin-base tx looks close to real coinbase tx to get truthful blob size + bool r = construct_miner_tx(height, median_size, already_generated_coins, txs_size, fee, miner_address, b.miner_tx, ex_nonce, 11); + CHECK_AND_ASSERT_MES(r, false, "Failed to construc miner tx, first chance"); + size_t cumulative_size = txs_size + get_object_blobsize(b.miner_tx); +#if defined(DEBUG_CREATE_BLOCK_TEMPLATE) + LOG_PRINT_L1("Creating block template: miner tx size " << get_object_blobsize(b.miner_tx) << + ", cumulative size " << cumulative_size); +#endif + for (size_t try_count = 0; try_count != 10; ++try_count) { + r = construct_miner_tx(height, median_size, already_generated_coins, cumulative_size, fee, miner_address, b.miner_tx, ex_nonce, 11); + + CHECK_AND_ASSERT_MES(r, false, "Failed to construc miner tx, second chance"); + size_t coinbase_blob_size = get_object_blobsize(b.miner_tx); + if (coinbase_blob_size > cumulative_size - txs_size) { + cumulative_size = txs_size + coinbase_blob_size; +#if defined(DEBUG_CREATE_BLOCK_TEMPLATE) + LOG_PRINT_L1("Creating block template: miner tx size " << coinbase_blob_size << + ", cumulative size " << cumulative_size << " is greater then before"); +#endif + continue; + } + + if (coinbase_blob_size < cumulative_size - txs_size) { + size_t delta = cumulative_size - txs_size - coinbase_blob_size; +#if defined(DEBUG_CREATE_BLOCK_TEMPLATE) + LOG_PRINT_L1("Creating block template: miner tx size " << coinbase_blob_size << + ", cumulative size " << txs_size + coinbase_blob_size << + " is less then before, adding " << delta << " zero bytes"); +#endif + b.miner_tx.extra.insert(b.miner_tx.extra.end(), delta, 0); + //here could be 1 byte difference, because of extra field counter is varint, and it can become from 1-byte len to 2-bytes len. + if (cumulative_size != txs_size + get_object_blobsize(b.miner_tx)) { + CHECK_AND_ASSERT_MES(cumulative_size + 1 == txs_size + get_object_blobsize(b.miner_tx), false, "unexpected case: cumulative_size=" << cumulative_size << " + 1 is not equal txs_cumulative_size=" << txs_size << " + get_object_blobsize(b.miner_tx)=" << get_object_blobsize(b.miner_tx)); + b.miner_tx.extra.resize(b.miner_tx.extra.size() - 1); + if (cumulative_size != txs_size + get_object_blobsize(b.miner_tx)) { + //fuck, not lucky, -1 makes varint-counter size smaller, in that case we continue to grow with cumulative_size + LOG_PRINT_RED("Miner tx creation has no luck with delta_extra size = " << delta << " and " << delta - 1 , LOG_LEVEL_2); + cumulative_size += delta - 1; + continue; + } + LOG_PRINT_GREEN("Setting extra for block: " << b.miner_tx.extra.size() << ", try_count=" << try_count, LOG_LEVEL_1); + } + } + CHECK_AND_ASSERT_MES(cumulative_size == txs_size + get_object_blobsize(b.miner_tx), false, "unexpected case: cumulative_size=" << cumulative_size << " is not equal txs_cumulative_size=" << txs_size << " + get_object_blobsize(b.miner_tx)=" << get_object_blobsize(b.miner_tx)); +#if defined(DEBUG_CREATE_BLOCK_TEMPLATE) + LOG_PRINT_L1("Creating block template: miner tx size " << coinbase_blob_size << + ", cumulative size " << cumulative_size << " is now good"); +#endif + return true; + } + LOG_ERROR("Failed to create_block_template with " << 10 << " tries"); + return false; +} +//------------------------------------------------------------------ +// for an alternate chain, get the timestamps from the main chain to complete +// the needed number of timestamps for the BLOCKCHAIN_TIMESTAMP_CHECK_WINDOW. +bool Blockchain::complete_timestamps_vector(uint64_t start_top_height, std::vector<uint64_t>& timestamps) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + + if(timestamps.size() >= BLOCKCHAIN_TIMESTAMP_CHECK_WINDOW) + return true; + + CRITICAL_REGION_LOCAL(m_blockchain_lock); + size_t need_elements = BLOCKCHAIN_TIMESTAMP_CHECK_WINDOW - timestamps.size(); + CHECK_AND_ASSERT_MES(start_top_height < m_db->height(), false, "internal error: passed start_height not < " << " m_db->height() -- " << start_top_height << " >= " << m_db->height()); + size_t stop_offset = start_top_height > need_elements ? start_top_height - need_elements : 0; + while (start_top_height != stop_offset) + { + timestamps.push_back(m_db->get_block_timestamp(start_top_height)); + --start_top_height; + } + return true; +} +//------------------------------------------------------------------ +// If a block is to be added and its parent block is not the current +// main chain top block, then we need to see if we know about its parent block. +// If its parent block is part of a known forked chain, then we need to see +// if that chain is long enough to become the main chain and re-org accordingly +// if so. If not, we need to hang on to the block in case it becomes part of +// a long forked chain eventually. +bool Blockchain::handle_alternative_block(const block& b, const crypto::hash& id, block_verification_context& bvc) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + uint64_t block_height = get_block_height(b); + if(0 == block_height) + { + LOG_PRINT_L1("Block with id: " << epee::string_tools::pod_to_hex(id) << " (as alternative), but miner tx says height is 0."); + bvc.m_verifivation_failed = true; + return false; + } + // this basically says if the blockchain is smaller than the first + // checkpoint then alternate blocks are allowed. Alternatively, if the + // last checkpoint *before* the end of the current chain is also before + // the block to be added, then this is fine. + if (!m_checkpoints.is_alternative_block_allowed(get_current_blockchain_height(), block_height)) + { + LOG_PRINT_RED_L1("Block with id: " << id + << std::endl << " can't be accepted for alternative chain, block height: " << block_height + << std::endl << " blockchain height: " << get_current_blockchain_height()); + bvc.m_verifivation_failed = true; + return false; + } + + //block is not related with head of main chain + //first of all - look in alternative chains container + auto it_prev = m_alternative_chains.find(b.prev_id); + bool parent_in_main = m_db->block_exists(b.prev_id); + if(it_prev != m_alternative_chains.end() || parent_in_main) + { + //we have new block in alternative chain + + //build alternative subchain, front -> mainchain, back -> alternative head + blocks_ext_by_hash::iterator alt_it = it_prev; //m_alternative_chains.find() + std::list<blocks_ext_by_hash::iterator> alt_chain; + std::vector<uint64_t> timestamps; + while(alt_it != m_alternative_chains.end()) + { + alt_chain.push_front(alt_it); + timestamps.push_back(alt_it->second.bl.timestamp); + alt_it = m_alternative_chains.find(alt_it->second.bl.prev_id); + } + + // if block to be added connects to known blocks that aren't part of the + // main chain -- that is, if we're adding on to an alternate chain + if(alt_chain.size()) + { + // make sure alt chain doesn't somehow start past the end of the main chain + CHECK_AND_ASSERT_MES(m_db->height() > alt_chain.front()->second.height, false, "main blockchain wrong height"); + + // make sure that the blockchain contains the block that should connect + // this alternate chain with it. + if (!m_db->block_exists(alt_chain.front()->second.bl.prev_id)) + { + LOG_PRINT_L1("alternate chain does not appear to connect to main chain..."); + return false; + } + + // make sure block connects correctly to the main chain + auto h = m_db->get_block_hash_from_height(alt_chain.front()->second.height - 1); + CHECK_AND_ASSERT_MES(h == alt_chain.front()->second.bl.prev_id, false, "alternative chain has wrong connection to main chain"); + complete_timestamps_vector(m_db->get_block_height(alt_chain.front()->second.bl.prev_id), timestamps); + } + // if block not associated with known alternate chain + else + { + // if block parent is not part of main chain or an alternate chain, + // we ignore it + CHECK_AND_ASSERT_MES(parent_in_main, false, "internal error: broken imperative condition it_main_prev != m_blocks_index.end()"); + + complete_timestamps_vector(m_db->get_block_height(b.prev_id), timestamps); + } + + // verify that the block's timestamp is within the acceptable range + // (not earlier than the median of the last X blocks) + if(!check_block_timestamp(timestamps, b)) + { + LOG_PRINT_RED_L1("Block with id: " << id + << std::endl << " for alternative chain, has invalid timestamp: " << b.timestamp); + bvc.m_verifivation_failed = true; + return false; + } + + // FIXME: consider moving away from block_extended_info at some point + block_extended_info bei = boost::value_initialized<block_extended_info>(); + bei.bl = b; + bei.height = alt_chain.size() ? it_prev->second.height + 1 : m_db->get_block_height(b.prev_id) + 1; + + bool is_a_checkpoint; + if(!m_checkpoints.check_block(bei.height, id, is_a_checkpoint)) + { + LOG_ERROR("CHECKPOINT VALIDATION FAILED"); + bvc.m_verifivation_failed = true; + return false; + } + + // Check the block's hash against the difficulty target for its alt chain + m_is_in_checkpoint_zone = false; + difficulty_type current_diff = get_next_difficulty_for_alternative_chain(alt_chain, bei); + CHECK_AND_ASSERT_MES(current_diff, false, "!!!!!!! DIFFICULTY OVERHEAD !!!!!!!"); + crypto::hash proof_of_work = null_hash; + get_block_longhash(bei.bl, proof_of_work, bei.height); + if(!check_hash(proof_of_work, current_diff)) + { + LOG_PRINT_RED_L1("Block with id: " << id + << std::endl << " for alternative chain, does not have enough proof of work: " << proof_of_work + << std::endl << " expected difficulty: " << current_diff); + bvc.m_verifivation_failed = true; + return false; + } + + if(!prevalidate_miner_transaction(b, bei.height)) + { + LOG_PRINT_RED_L1("Block with id: " << epee::string_tools::pod_to_hex(id) + << " (as alternative) has incorrect miner transaction."); + bvc.m_verifivation_failed = true; + return false; + + } + + // FIXME: + // this brings up an interesting point: consider allowing to get block + // difficulty both by height OR by hash, not just height. + difficulty_type main_chain_cumulative_difficulty = m_db->get_block_cumulative_difficulty(m_db->height() - 1); + if (alt_chain.size()) + { + bei.cumulative_difficulty = it_prev->second.cumulative_difficulty; + } + else + { + // passed-in block's previous block's cumulative difficulty, found on the main chain + bei.cumulative_difficulty = m_db->get_block_cumulative_difficulty(m_db->get_block_height(b.prev_id)); + } + bei.cumulative_difficulty += current_diff; + + // add block to alternate blocks storage, + // as well as the current "alt chain" container + auto i_res = m_alternative_chains.insert(blocks_ext_by_hash::value_type(id, bei)); + CHECK_AND_ASSERT_MES(i_res.second, false, "insertion of new alternative block returned as it already exist"); + alt_chain.push_back(i_res.first); + + // FIXME: is it even possible for a checkpoint to show up not on the main chain? + if(is_a_checkpoint) + { + //do reorganize! + LOG_PRINT_GREEN("###### REORGANIZE on height: " << alt_chain.front()->second.height << " of " << m_db->height() - 1 << + ", checkpoint is found in alternative chain on height " << bei.height, LOG_LEVEL_0); + + bool r = switch_to_alternative_blockchain(alt_chain, true); + + bvc.m_added_to_main_chain = r; + bvc.m_verifivation_failed = !r; + + return r; + } + else if(main_chain_cumulative_difficulty < bei.cumulative_difficulty) //check if difficulty bigger then in main chain + { + //do reorganize! + LOG_PRINT_GREEN("###### REORGANIZE on height: " + << alt_chain.front()->second.height << " of " << m_db->height() - 1 + << " with cum_difficulty " << m_db->get_block_cumulative_difficulty(m_db->height() - 1) + << std::endl << " alternative blockchain size: " << alt_chain.size() + << " with cum_difficulty " << bei.cumulative_difficulty, LOG_LEVEL_0 + ); + + bool r = switch_to_alternative_blockchain(alt_chain, false); + if(r) bvc.m_added_to_main_chain = true; + else bvc.m_verifivation_failed = true; + return r; + } + else + { + LOG_PRINT_BLUE("----- BLOCK ADDED AS ALTERNATIVE ON HEIGHT " << bei.height + << std::endl << "id:\t" << id + << std::endl << "PoW:\t" << proof_of_work + << std::endl << "difficulty:\t" << current_diff, LOG_LEVEL_0); + return true; + } + } + else + { + //block orphaned + bvc.m_marked_as_orphaned = true; + LOG_PRINT_RED_L1("Block recognized as orphaned and rejected, id = " << id); + } + + return true; +} +//------------------------------------------------------------------ +bool Blockchain::get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks, std::list<transaction>& txs) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + if(start_offset > m_db->height()) + return false; + + if (!get_blocks(start_offset, count, blocks)) + { + return false; + } + + for(const block& blk : blocks) + { + std::list<crypto::hash> missed_ids; + get_transactions(blk.tx_hashes, txs, missed_ids); + CHECK_AND_ASSERT_MES(!missed_ids.size(), false, "has missed transactions in own block in main blockchain"); + } + + return true; +} +//------------------------------------------------------------------ +bool Blockchain::get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + if(start_offset > m_db->height()) + return false; + + for(size_t i = start_offset; i < start_offset + count && i <= m_db->height();i++) + { + blocks.push_back(m_db->get_block_from_height(i)); + } + return true; +} +//------------------------------------------------------------------ +//TODO: This function *looks* like it won't need to be rewritten +// to use BlockchainDB, as it calls other functions that were, +// but it warrants some looking into later. +bool Blockchain::handle_get_objects(NOTIFY_REQUEST_GET_OBJECTS::request& arg, NOTIFY_RESPONSE_GET_OBJECTS::request& rsp) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + rsp.current_blockchain_height = get_current_blockchain_height(); + std::list<block> blocks; + get_blocks(arg.blocks, blocks, rsp.missed_ids); + + BOOST_FOREACH(const auto& bl, blocks) + { + std::list<crypto::hash> missed_tx_id; + std::list<transaction> txs; + get_transactions(bl.tx_hashes, txs, rsp.missed_ids); + CHECK_AND_ASSERT_MES(!missed_tx_id.size(), false, "Internal error: has missed missed_tx_id.size()=" << missed_tx_id.size() + << std::endl << "for block id = " << get_block_hash(bl)); + rsp.blocks.push_back(block_complete_entry()); + block_complete_entry& e = rsp.blocks.back(); + //pack block + e.block = t_serializable_object_to_blob(bl); + //pack transactions + BOOST_FOREACH(transaction& tx, txs) + e.txs.push_back(t_serializable_object_to_blob(tx)); + + } + //get another transactions, if need + std::list<transaction> txs; + get_transactions(arg.txs, txs, rsp.missed_ids); + //pack aside transactions + BOOST_FOREACH(const auto& tx, txs) + rsp.txs.push_back(t_serializable_object_to_blob(tx)); + + return true; +} +//------------------------------------------------------------------ +bool Blockchain::get_alternative_blocks(std::list<block>& blocks) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + BOOST_FOREACH(const auto& alt_bl, m_alternative_chains) + { + blocks.push_back(alt_bl.second.bl); + } + return true; +} +//------------------------------------------------------------------ +size_t Blockchain::get_alternative_blocks_count() const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + return m_alternative_chains.size(); +} +//------------------------------------------------------------------ +// This function adds the output specified by <amount, i> to the result_outs container +// unlocked and other such checks should be done by here. +void Blockchain::add_out_to_get_random_outs(COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::outs_for_amount& result_outs, uint64_t amount, size_t i) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::out_entry& oen = *result_outs.outs.insert(result_outs.outs.end(), COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::out_entry()); + oen.global_amount_index = i; + oen.out_key = m_db->get_output_key(amount, i); +} +//------------------------------------------------------------------ +// This function takes an RPC request for mixins and creates an RPC response +// with the requested mixins. +// TODO: figure out why this returns boolean / if we should be returning false +// in some cases +bool Blockchain::get_random_outs_for_amounts(const COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::request& req, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::response& res) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + srand(static_cast<unsigned int>(time(NULL))); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + // for each amount that we need to get mixins for, get <n> random outputs + // from BlockchainDB where <n> is req.outs_count (number of mixins). + for (uint64_t amount : req.amounts) + { + // create outs_for_amount struct and populate amount field + COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::outs_for_amount& result_outs = *res.outs.insert(res.outs.end(), COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::outs_for_amount()); + result_outs.amount = amount; + + std::unordered_set<uint64_t> seen_indices; + + // if there aren't enough outputs to mix with (or just enough), + // use all of them. Eventually this should become impossible. + if (m_db->get_num_outputs(amount) <= req.outs_count) + { + for (uint64_t i = 0; i < m_db->get_num_outputs(amount); i++) + { + // get tx_hash, tx_out_index from DB + tx_out_index toi = m_db->get_output_tx_and_index(amount, i); + + // if tx is unlocked, add output to result_outs + if (is_tx_spendtime_unlocked(m_db->get_tx_unlock_time(toi.first))) + { + add_out_to_get_random_outs(result_outs, amount, i); + } + + } + } + else + { + // while we still need more mixins + auto num_outs = m_db->get_num_outputs(amount); + while (result_outs.outs.size() < req.outs_count) + { + // if we've gone through every possible output, we've gotten all we can + if (seen_indices.size() == num_outs) + { + break; + } + + // get a random output index from the DB. If we've already seen it, + // return to the top of the loop and try again, otherwise add it to the + // list of output indices we've seen. + uint64_t i = m_db->get_random_output(amount); + if (seen_indices.count(i)) + { + continue; + } + seen_indices.emplace(i); + + // get tx_hash, tx_out_index from DB + tx_out_index toi = m_db->get_output_tx_and_index(amount, i); + + // if the output's transaction is unlocked, add the output's index to + // our list. + if (is_tx_spendtime_unlocked(m_db->get_tx_unlock_time(toi.first))) + { + add_out_to_get_random_outs(result_outs, amount, i); + } + } + } + } + return true; +} +//------------------------------------------------------------------ +// This function takes a list of block hashes from another node +// on the network to find where the split point is between us and them. +// This is used to see what to send another node that needs to sync. +bool Blockchain::find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, uint64_t& starter_offset) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + // make sure the request includes at least the genesis block, otherwise + // how can we expect to sync from the client that the block list came from? + if(!qblock_ids.size() /*|| !req.m_total_height*/) + { + LOG_PRINT_L1("Client sent wrong NOTIFY_REQUEST_CHAIN: m_block_ids.size()=" << qblock_ids.size() << /*", m_height=" << req.m_total_height <<*/ ", dropping connection"); + return false; + } + + // make sure that the last block in the request's block list matches + // the genesis block + auto gen_hash = m_db->get_block_hash_from_height(0); + if(qblock_ids.back() != gen_hash) + { + LOG_PRINT_L1("Client sent wrong NOTIFY_REQUEST_CHAIN: genesis block missmatch: " << std::endl << "id: " + << qblock_ids.back() << ", " << std::endl << "expected: " << gen_hash + << "," << std::endl << " dropping connection"); + return false; + } + + // Find the first block the foreign chain has that we also have. + // Assume qblock_ids is in reverse-chronological order. + auto bl_it = qblock_ids.begin(); + uint64_t split_height = 0; + for(; bl_it != qblock_ids.end(); bl_it++) + { + try + { + split_height = m_db->get_block_height(*bl_it); + break; + } + catch (const BLOCK_DNE& e) + { + continue; + } + catch (const std::exception& e) + { + LOG_PRINT_L1("Non-critical error trying to find block by hash in BlockchainDB, hash: " << *bl_it); + return false; + } + } + + // this should be impossible, as we checked that we share the genesis block, + // but just in case... + if(bl_it == qblock_ids.end()) + { + LOG_PRINT_L1("Internal error handling connection, can't find split point"); + return false; + } + + // if split_height remains 0, we didn't have any but the genesis block in common + // which is only fine if the blocks just have the genesis block + if(split_height == 0 && qblock_ids.size() > 1) + { + LOG_ERROR("Ours and foreign blockchain have only genesis block in common... o.O"); + return false; + } + + //we start to put block ids INCLUDING last known id, just to make other side be sure + starter_offset = split_height; + return true; +} +//------------------------------------------------------------------ +uint64_t Blockchain::block_difficulty(uint64_t i) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + try + { + return m_db->get_block_difficulty(i); + } + catch (const BLOCK_DNE& e) + { + LOG_PRINT_L0("Attempted to get block difficulty for height above blockchain height"); + } + return 0; +} +//------------------------------------------------------------------ +template<class t_ids_container, class t_blocks_container, class t_missed_container> +bool Blockchain::get_blocks(const t_ids_container& block_ids, t_blocks_container& blocks, t_missed_container& missed_bs) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + for (const auto& block_hash : block_ids) + { + try + { + blocks.push_back(m_db->get_block(block_hash)); + } + catch (const BLOCK_DNE& e) + { + missed_bs.push_back(block_hash); + } + catch (const std::exception& e) + { + return false; + } + } + return true; +} +//------------------------------------------------------------------ +template<class t_ids_container, class t_tx_container, class t_missed_container> +bool Blockchain::get_transactions(const t_ids_container& txs_ids, t_tx_container& txs, t_missed_container& missed_txs) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + for (const auto& tx_hash : txs_ids) + { + try + { + txs.push_back(m_db->get_tx(tx_hash)); + } + catch (const TX_DNE& e) + { + missed_txs.push_back(tx_hash); + } + //FIXME: is this the correct way to handle this? + catch (const std::exception& e) + { + return false; + } + } + return true; +} +//------------------------------------------------------------------ +void Blockchain::print_blockchain(uint64_t start_index, uint64_t end_index) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + std::stringstream ss; + CRITICAL_REGION_LOCAL(m_blockchain_lock); + auto h = m_db->height(); + if(start_index > h) + { + LOG_PRINT_L1("Wrong starter index set: " << start_index << ", expected max index " << h); + return; + } + + for(size_t i = start_index; i <= h && i != end_index; i++) + { + ss << "height " << i + << ", timestamp " << m_db->get_block_timestamp(i) + << ", cumul_dif " << m_db->get_block_cumulative_difficulty(i) + << ", size " << m_db->get_block_size(i) + << "\nid\t\t" << m_db->get_block_hash_from_height(i) + << "\ndifficulty\t\t" << m_db->get_block_difficulty(i) + << ", nonce " << m_db->get_block_from_height(i).nonce + << ", tx_count " << m_db->get_block_from_height(i).tx_hashes.size() + << std::endl; + } + LOG_PRINT_L1("Current blockchain:" << std::endl << ss.str()); + LOG_PRINT_L0("Blockchain printed with log level 1"); +} +//------------------------------------------------------------------ +void Blockchain::print_blockchain_index() +{ + LOG_PRINT_L3("Blockchain::" << __func__); + std::stringstream ss; + CRITICAL_REGION_LOCAL(m_blockchain_lock); + auto height = m_db->height(); + if (height != 0) + { + for(uint64_t i = 0; i <= height; i++) + { + ss << "height: " << i << ", hash: " << m_db->get_block_hash_from_height(i); + } + } + + LOG_PRINT_L0("Current blockchain index:" << std::endl + << ss.str() + ); +} +//------------------------------------------------------------------ +//TODO: remove this function and references to it +void Blockchain::print_blockchain_outs(const std::string& file) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + return; +} +//------------------------------------------------------------------ +// Find the split point between us and foreign blockchain and return +// (by reference) the most recent common block hash along with up to +// BLOCKS_IDS_SYNCHRONIZING_DEFAULT_COUNT additional (more recent) hashes. +bool Blockchain::find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, NOTIFY_RESPONSE_CHAIN_ENTRY::request& resp) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + // if we can't find the split point, return false + if(!find_blockchain_supplement(qblock_ids, resp.start_height)) + { + return false; + } + + resp.total_height = get_current_blockchain_height(); + size_t count = 0; + for(size_t i = resp.start_height; i < resp.total_height && count < BLOCKS_IDS_SYNCHRONIZING_DEFAULT_COUNT; i++, count++) + { + resp.m_block_ids.push_back(m_db->get_block_hash_from_height(i)); + } + return true; +} +//------------------------------------------------------------------ +//FIXME: change argument to std::vector, low priority +// find split point between ours and foreign blockchain (or start at +// blockchain height <req_start_block>), and return up to max_count FULL +// blocks by reference. +bool Blockchain::find_blockchain_supplement(const uint64_t req_start_block, const std::list<crypto::hash>& qblock_ids, std::list<std::pair<block, std::list<transaction> > >& blocks, uint64_t& total_height, uint64_t& start_height, size_t max_count) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + // if a specific start height has been requested + if(req_start_block > 0) + { + // if requested height is higher than our chain, return false -- we can't help + if (req_start_block >= m_db->height()) + { + return false; + } + start_height = req_start_block; + } + else + { + if(!find_blockchain_supplement(qblock_ids, start_height)) + { + return false; + } + } + + total_height = get_current_blockchain_height(); + size_t count = 0; + for(size_t i = start_height; i < total_height && count < max_count; i++, count++) + { + blocks.resize(blocks.size()+1); + blocks.back().first = m_db->get_block_from_height(i); + std::list<crypto::hash> mis; + get_transactions(blocks.back().first.tx_hashes, blocks.back().second, mis); + CHECK_AND_ASSERT_MES(!mis.size(), false, "internal error, transaction from block not found"); + } + return true; +} +//------------------------------------------------------------------ +bool Blockchain::add_block_as_invalid(const block& bl, const crypto::hash& h) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + block_extended_info bei = AUTO_VAL_INIT(bei); + bei.bl = bl; + return add_block_as_invalid(bei, h); +} +//------------------------------------------------------------------ +bool Blockchain::add_block_as_invalid(const block_extended_info& bei, const crypto::hash& h) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + auto i_res = m_invalid_blocks.insert(std::map<crypto::hash, block_extended_info>::value_type(h, bei)); + CHECK_AND_ASSERT_MES(i_res.second, false, "at insertion invalid by tx returned status existed"); + LOG_PRINT_L1("BLOCK ADDED AS INVALID: " << h << std::endl << ", prev_id=" << bei.bl.prev_id << ", m_invalid_blocks count=" << m_invalid_blocks.size()); + return true; +} +//------------------------------------------------------------------ +bool Blockchain::have_block(const crypto::hash& id) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + if(m_db->block_exists(id)) + { + LOG_PRINT_L3("block exists in main chain"); + return true; + } + + if(m_alternative_chains.count(id)) + { + LOG_PRINT_L3("block found in m_alternative_chains"); + return true; + } + + if(m_invalid_blocks.count(id)) + { + LOG_PRINT_L3("block found in m_invalid_blocks"); + return true; + } + + return false; +} +//------------------------------------------------------------------ +bool Blockchain::handle_block_to_main_chain(const block& bl, block_verification_context& bvc) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + crypto::hash id = get_block_hash(bl); + return handle_block_to_main_chain(bl, id, bvc); +} +//------------------------------------------------------------------ +size_t Blockchain::get_total_transactions() const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + return m_db->get_tx_count(); +} +//------------------------------------------------------------------ +// This function checks each input in the transaction <tx> to make sure it +// has not been used already, and adds its key to the container <keys_this_block>. +// +// This container should be managed by the code that validates blocks so we don't +// have to store the used keys in a given block in the permanent storage only to +// remove them later if the block fails validation. +bool Blockchain::check_for_double_spend(const transaction& tx, key_images_container& keys_this_block) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + struct add_transaction_input_visitor: public boost::static_visitor<bool> + { + key_images_container& m_spent_keys; + BlockchainDB* m_db; + add_transaction_input_visitor(key_images_container& spent_keys, BlockchainDB* db):m_spent_keys(spent_keys), m_db(db) + {} + bool operator()(const txin_to_key& in) const + { + const crypto::key_image& ki = in.k_image; + + // attempt to insert the newly-spent key into the container of + // keys spent this block. If this fails, the key was spent already + // in this block, return false to flag that a double spend was detected. + // + // if the insert into the block-wide spent keys container succeeds, + // check the blockchain-wide spent keys container and make sure the + // key wasn't used in another block already. + auto r = m_spent_keys.insert(ki); + if(!r.second || m_db->has_key_image(ki)) + { + //double spend detected + return false; + } + + // if no double-spend detected, return true + return true; + } + + bool operator()(const txin_gen& tx) const{return true;} + bool operator()(const txin_to_script& tx) const{return false;} + bool operator()(const txin_to_scripthash& tx) const{return false;} + }; + + for (const txin_v& in : tx.vin) + { + if(!boost::apply_visitor(add_transaction_input_visitor(keys_this_block, m_db), in)) + { + LOG_ERROR("Double spend detected!"); + return false; + } + } + + return true; +} +//------------------------------------------------------------------ +bool Blockchain::get_tx_outputs_gindexs(const crypto::hash& tx_id, std::vector<uint64_t>& indexs) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + if (!m_db->tx_exists(tx_id)) + { + LOG_PRINT_RED_L1("warning: get_tx_outputs_gindexs failed to find transaction with id = " << tx_id); + return false; + } + + // get amount output indexes, currently referred to in parts as "output global indices", but they are actually specific to amounts + indexs = m_db->get_tx_amount_output_indices(tx_id); + CHECK_AND_ASSERT_MES(indexs.size(), false, "internal error: global indexes for transaction " << tx_id << " is empty"); + + return true; +} +//------------------------------------------------------------------ +// This function overloads its sister function with +// an extra value (hash of highest block that holds an output used as input) +// as a return-by-reference. +bool Blockchain::check_tx_inputs(const transaction& tx, uint64_t& max_used_block_height, crypto::hash& max_used_block_id) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + bool res = check_tx_inputs(tx, &max_used_block_height); + if(!res) return false; + CHECK_AND_ASSERT_MES(max_used_block_height < m_db->height(), false, "internal error: max used block index=" << max_used_block_height << " is not less then blockchain size = " << m_db->height()); + max_used_block_id = m_db->get_block_hash_from_height(max_used_block_height); + return true; +} +//------------------------------------------------------------------ +bool Blockchain::have_tx_keyimges_as_spent(const transaction &tx) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + BOOST_FOREACH(const txin_v& in, tx.vin) + { + CHECKED_GET_SPECIFIC_VARIANT(in, const txin_to_key, in_to_key, true); + if(have_tx_keyimg_as_spent(in_to_key.k_image)) + return true; + } + return false; +} +//------------------------------------------------------------------ +// This function validates transaction inputs and their keys. Previously +// it also performed double spend checking, but that has been moved to its +// own function. +bool Blockchain::check_tx_inputs(const transaction& tx, uint64_t* pmax_used_block_height) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + size_t sig_index = 0; + if(pmax_used_block_height) + *pmax_used_block_height = 0; + + crypto::hash tx_prefix_hash = get_transaction_prefix_hash(tx); + + for (const auto& txin : tx.vin) + { + // make sure output being spent is of type txin_to_key, rather than + // e.g. txin_gen, which is only used for miner transactions + CHECK_AND_ASSERT_MES(txin.type() == typeid(txin_to_key), false, "wrong type id in tx input at Blockchain::check_tx_inputs"); + const txin_to_key& in_to_key = boost::get<txin_to_key>(txin); + + // make sure tx output has key offset(s) (is signed to be used) + CHECK_AND_ASSERT_MES(in_to_key.key_offsets.size(), false, "empty in_to_key.key_offsets in transaction with id " << get_transaction_hash(tx)); + + // basically, make sure number of inputs == number of signatures + CHECK_AND_ASSERT_MES(sig_index < tx.signatures.size(), false, "wrong transaction: not signature entry for input with index= " << sig_index); + + // make sure that output being spent matches up correctly with the + // signature spending it. + if(!check_tx_input(in_to_key, tx_prefix_hash, tx.signatures[sig_index], pmax_used_block_height)) + { + LOG_PRINT_L1("Failed to check ring signature for tx " << get_transaction_hash(tx) << " vin key with k_image: " << in_to_key.k_image << " sig_index: " << sig_index); + if (pmax_used_block_height) // a default value of NULL is used when called from Blockchain::handle_block_to_main_chain() + { + LOG_PRINT_L1(" *pmax_used_block_height: " << *pmax_used_block_height); + } + return false; + } + + sig_index++; + } + + return true; +} +//------------------------------------------------------------------ +// This function checks to see if a tx is unlocked. unlock_time is either +// a block index or a unix time. +bool Blockchain::is_tx_spendtime_unlocked(uint64_t unlock_time) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + if(unlock_time < CRYPTONOTE_MAX_BLOCK_NUMBER) + { + //interpret as block index + if(get_current_blockchain_height() + CRYPTONOTE_LOCKED_TX_ALLOWED_DELTA_BLOCKS >= unlock_time) + return true; + else + return false; + }else + { + //interpret as time + uint64_t current_time = static_cast<uint64_t>(time(NULL)); + if(current_time + CRYPTONOTE_LOCKED_TX_ALLOWED_DELTA_SECONDS >= unlock_time) + return true; + else + return false; + } + return false; +} +//------------------------------------------------------------------ +// This function locates all outputs associated with a given input (mixins) +// and validates that they exist and are usable. It also checks the ring +// signature for each input. +bool Blockchain::check_tx_input(const txin_to_key& txin, const crypto::hash& tx_prefix_hash, const std::vector<crypto::signature>& sig, uint64_t* pmax_related_block_height) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + + struct outputs_visitor + { + std::vector<const crypto::public_key *>& m_p_output_keys; + std::vector<crypto::public_key >& m_output_keys; + const Blockchain& m_bch; + outputs_visitor(std::vector<crypto::public_key >& output_keys, std::vector<const crypto::public_key *>& p_output_keys, const Blockchain& bch) : m_output_keys(output_keys), m_p_output_keys(p_output_keys), m_bch(bch) + {} + bool handle_output(const transaction& tx, const tx_out& out) + { + //check tx unlock time + if(!m_bch.is_tx_spendtime_unlocked(tx.unlock_time)) + { + LOG_PRINT_L1("One of outputs for one of inputs has wrong tx.unlock_time = " << tx.unlock_time); + return false; + } + + if(out.target.type() != typeid(txout_to_key)) + { + LOG_PRINT_L1("Output has wrong type id, which=" << out.target.which()); + return false; + } + + m_output_keys.push_back(boost::get<txout_to_key>(out.target).key); + return true; + } + }; + + //check ring signature + std::vector<crypto::public_key> output_keys; + std::vector<const crypto::public_key *> p_output_keys; + outputs_visitor vi(output_keys, p_output_keys, *this); + if(!scan_outputkeys_for_indexes(txin, vi, pmax_related_block_height)) + { + LOG_PRINT_L1("Failed to get output keys for tx with amount = " << print_money(txin.amount) << " and count indexes " << txin.key_offsets.size()); + return false; + } + + for (auto& k : output_keys) + { + p_output_keys.push_back(&k); + } + + if(txin.key_offsets.size() != output_keys.size()) + { + LOG_PRINT_L1("Output keys for tx with amount = " << txin.amount << " and count indexes " << txin.key_offsets.size() << " returned wrong keys count " << output_keys.size()); + return false; + } + CHECK_AND_ASSERT_MES(sig.size() == output_keys.size(), false, "internal error: tx signatures count=" << sig.size() << " mismatch with outputs keys count for inputs=" << output_keys.size()); + if(m_is_in_checkpoint_zone) + return true; + return crypto::check_ring_signature(tx_prefix_hash, txin.k_image, p_output_keys, sig.data()); +} +//------------------------------------------------------------------ +//TODO: Is this intended to do something else? Need to look into the todo there. +uint64_t Blockchain::get_adjusted_time() const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + //TODO: add collecting median time + return time(NULL); +} +//------------------------------------------------------------------ +//TODO: revisit, has changed a bit on upstream +bool Blockchain::check_block_timestamp(std::vector<uint64_t>& timestamps, const block& b) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + uint64_t median_ts = epee::misc_utils::median(timestamps); + + if(b.timestamp < median_ts) + { + LOG_PRINT_L1("Timestamp of block with id: " << get_block_hash(b) << ", " << b.timestamp << ", less than median of last " << BLOCKCHAIN_TIMESTAMP_CHECK_WINDOW << " blocks, " << median_ts); + return false; + } + + return true; +} +//------------------------------------------------------------------ +// This function grabs the timestamps from the most recent <n> blocks, +// where n = BLOCKCHAIN_TIMESTAMP_CHECK_WINDOW. If there are not those many +// blocks in the blockchain, the timestap is assumed to be valid. If there +// are, this function returns: +// true if the block's timestamp is not less than the timestamp of the +// median of the selected blocks +// false otherwise +bool Blockchain::check_block_timestamp(const block& b) const +{ + LOG_PRINT_L3("Blockchain::" << __func__); + if(b.timestamp > get_adjusted_time() + CRYPTONOTE_BLOCK_FUTURE_TIME_LIMIT) + { + LOG_PRINT_L1("Timestamp of block with id: " << get_block_hash(b) << ", " << b.timestamp << ", bigger than adjusted time + 2 hours"); + return false; + } + + // if not enough blocks, no proper median yet, return true + if(m_db->height() < BLOCKCHAIN_TIMESTAMP_CHECK_WINDOW + 1) + { + return true; + } + + std::vector<uint64_t> timestamps; + auto h = m_db->height(); + + // need most recent 60 blocks, get index of first of those + // using +1 because BlockchainDB::height() returns the index of the top block, + // not the size of the blockchain (0-indexed) + size_t offset = h - BLOCKCHAIN_TIMESTAMP_CHECK_WINDOW - 1; + for(;offset < h; ++offset) + { + timestamps.push_back(m_db->get_block_timestamp(offset)); + } + + return check_block_timestamp(timestamps, b); +} +//------------------------------------------------------------------ +// Needs to validate the block and acquire each transaction from the +// transaction mem_pool, then pass the block and transactions to +// m_db->add_block() +bool Blockchain::handle_block_to_main_chain(const block& bl, const crypto::hash& id, block_verification_context& bvc) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + + TIME_MEASURE_START(block_processing_time); + CRITICAL_REGION_LOCAL(m_blockchain_lock); + if(bl.prev_id != get_tail_id()) + { + LOG_PRINT_L1("Block with id: " << id << std::endl + << "has wrong prev_id: " << bl.prev_id << std::endl + << "expected: " << get_tail_id()); + return false; + } + + // make sure block timestamp is not less than the median timestamp + // of a set number of the most recent blocks. + if(!check_block_timestamp(bl)) + { + LOG_PRINT_L1("Block with id: " << id << std::endl + << "has invalid timestamp: " << bl.timestamp); + bvc.m_verifivation_failed = true; + return false; + } + + //check proof of work + TIME_MEASURE_START(target_calculating_time); + + // get the target difficulty for the block. + // the calculation can overflow, among other failure cases, + // so we need to check the return type. + // FIXME: get_difficulty_for_next_block can also assert, look into + // changing this to throwing exceptions instead so we can clean up. + difficulty_type current_diffic = get_difficulty_for_next_block(); + CHECK_AND_ASSERT_MES(current_diffic, false, "!!!!!!!!! difficulty overhead !!!!!!!!!"); + + TIME_MEASURE_FINISH(target_calculating_time); + + TIME_MEASURE_START(longhash_calculating_time); + + crypto::hash proof_of_work = null_hash; + + // Formerly the code below contained an if loop with the following condition + // !m_checkpoints.is_in_checkpoint_zone(get_current_blockchain_height()) + // however, this caused the daemon to not bother checking PoW for blocks + // before checkpoints, which is very dangerous behaviour. We moved the PoW + // validation out of the next chunk of code to make sure that we correctly + // check PoW now. + // FIXME: height parameter is not used...should it be used or should it not + // be a parameter? + proof_of_work = get_block_longhash(bl, m_db->height()); + + // validate proof_of_work versus difficulty target + if(!check_hash(proof_of_work, current_diffic)) + { + LOG_PRINT_L1("Block with id: " << id << std::endl + << "does not have enough proof of work: " << proof_of_work << std::endl + << "unexpected difficulty: " << current_diffic ); + bvc.m_verifivation_failed = true; + return false; + } + + // If we're at a checkpoint, ensure that our hardcoded checkpoint hash + // is correct. + if(m_checkpoints.is_in_checkpoint_zone(get_current_blockchain_height())) + { + if(!m_checkpoints.check_block(get_current_blockchain_height(), id)) + { + LOG_ERROR("CHECKPOINT VALIDATION FAILED"); + bvc.m_verifivation_failed = true; + return false; + } + } + + TIME_MEASURE_FINISH(longhash_calculating_time); + + // sanity check basic miner tx properties + if(!prevalidate_miner_transaction(bl, m_db->height())) + { + LOG_PRINT_L1("Block with id: " << id + << " failed to pass prevalidation"); + bvc.m_verifivation_failed = true; + return false; + } + + size_t coinbase_blob_size = get_object_blobsize(bl.miner_tx); + size_t cumulative_block_size = coinbase_blob_size; + + std::vector<transaction> txs; + key_images_container keys; + + uint64_t fee_summary = 0; + + // Iterate over the block's transaction hashes, grabbing each + // from the tx_pool and validating them. Each is then added + // to txs. Keys spent in each are added to <keys> by the double spend check. + for (const crypto::hash& tx_id : bl.tx_hashes) + { + transaction tx; + size_t blob_size = 0; + uint64_t fee = 0; + + if (m_db->tx_exists(tx_id)) + { + LOG_PRINT_L1("Block with id: " << id << " attempting to add transaction already in blockchain with id: " << tx_id); + bvc.m_verifivation_failed = true; + break; + } + + // get transaction with hash <tx_id> from tx_pool + if(!m_tx_pool.take_tx(tx_id, tx, blob_size, fee)) + { + LOG_PRINT_L1("Block with id: " << id << " has at least one unknown transaction with id: " << tx_id); + bvc.m_verifivation_failed = true; + break; + } + + // add the transaction to the temp list of transactions, so we can either + // store the list of transactions all at once or return the ones we've + // taken from the tx_pool back to it if the block fails verification. + txs.push_back(tx); + + // validate that transaction inputs and the keys spending them are correct. + if(!check_tx_inputs(tx)) + { + LOG_PRINT_L1("Block with id: " << id << " has at least one transaction (id: " << tx_id << ") with wrong inputs."); + + //TODO: why is this done? make sure that keeping invalid blocks makes sense. + add_block_as_invalid(bl, id); + LOG_PRINT_L1("Block with id " << id << " added as invalid becouse of wrong inputs in transactions"); + bvc.m_verifivation_failed = true; + break; + } + + if (!check_for_double_spend(tx, keys)) + { + LOG_PRINT_L0("Double spend detected in transaction (id: " << tx_id); + bvc.m_verifivation_failed = true; + break; + } + + fee_summary += fee; + cumulative_block_size += blob_size; + } + + uint64_t base_reward = 0; + uint64_t already_generated_coins = m_db->height() ? m_db->get_block_already_generated_coins(m_db->height() - 1) : 0; + if(!validate_miner_transaction(bl, cumulative_block_size, fee_summary, base_reward, already_generated_coins)) + { + LOG_PRINT_L1("Block with id: " << id + << " has incorrect miner transaction"); + bvc.m_verifivation_failed = true; + } + + + block_extended_info bei = boost::value_initialized<block_extended_info>(); + size_t block_size; + difficulty_type cumulative_difficulty; + + // populate various metadata about the block to be stored alongside it. + block_size = cumulative_block_size; + cumulative_difficulty = current_diffic; + already_generated_coins = already_generated_coins + base_reward; + if(m_db->height()) + cumulative_difficulty += m_db->get_block_cumulative_difficulty(m_db->height() - 1); + + update_next_cumulative_size_limit(); + + TIME_MEASURE_FINISH(block_processing_time); + + uint64_t new_height = 0; + bool add_success = true; + if (!bvc.m_verifivation_failed) + { + try + { + new_height = m_db->add_block(bl, block_size, cumulative_difficulty, already_generated_coins, txs); + } + catch (const std::exception& e) + { + //TODO: figure out the best way to deal with this failure + LOG_ERROR("Error adding block with hash: " << id << " to blockchain, what = " << e.what()); + add_success = false; + } + } + + // if we failed for any reason to verify the block, return taken + // transactions to the tx_pool. + if (bvc.m_verifivation_failed || !add_success) + { + // return taken transactions to transaction pool + for (auto& tx : txs) + { + cryptonote::tx_verification_context tvc = AUTO_VAL_INIT(tvc); + if (!m_tx_pool.add_tx(tx, tvc, true)) + { + LOG_PRINT_L0("Failed to return taken transaction with hash: " << get_transaction_hash(tx) << " to tx_pool"); + } + } + return false; + } + + LOG_PRINT_L1("+++++ BLOCK SUCCESSFULLY ADDED" << std::endl << "id:\t" << id + << std::endl << "PoW:\t" << proof_of_work + << std::endl << "HEIGHT " << new_height << ", difficulty:\t" << current_diffic + << std::endl << "block reward: " << print_money(fee_summary + base_reward) << "(" << print_money(base_reward) << " + " << print_money(fee_summary) + << "), coinbase_blob_size: " << coinbase_blob_size << ", cumulative size: " << cumulative_block_size + << ", " << block_processing_time << "("<< target_calculating_time << "/" << longhash_calculating_time << ")ms"); + + bvc.m_added_to_main_chain = true; + + // appears to be a NOP *and* is called elsewhere. wat? + m_tx_pool.on_blockchain_inc(new_height, id); + + return true; +} +//------------------------------------------------------------------ +bool Blockchain::update_next_cumulative_size_limit() +{ + LOG_PRINT_L3("Blockchain::" << __func__); + std::vector<size_t> sz; + get_last_n_blocks_sizes(sz, CRYPTONOTE_REWARD_BLOCKS_WINDOW); + + uint64_t median = epee::misc_utils::median(sz); + if(median <= CRYPTONOTE_BLOCK_GRANTED_FULL_REWARD_ZONE) + median = CRYPTONOTE_BLOCK_GRANTED_FULL_REWARD_ZONE; + + m_current_block_cumul_sz_limit = median*2; + return true; +} +//------------------------------------------------------------------ +bool Blockchain::add_new_block(const block& bl_, block_verification_context& bvc) +{ + LOG_PRINT_L3("Blockchain::" << __func__); + //copy block here to let modify block.target + block bl = bl_; + crypto::hash id = get_block_hash(bl); + CRITICAL_REGION_LOCAL(m_tx_pool);//to avoid deadlock lets lock tx_pool for whole add/reorganize process + CRITICAL_REGION_LOCAL1(m_blockchain_lock); + if(have_block(id)) + { + LOG_PRINT_L3("block with id = " << id << " already exists"); + bvc.m_already_exists = true; + return false; + } + + //check that block refers to chain tail + if(!(bl.prev_id == get_tail_id())) + { + //chain switching or wrong block + bvc.m_added_to_main_chain = false; + return handle_alternative_block(bl, id, bvc); + //never relay alternative blocks + } + + return handle_block_to_main_chain(bl, id, bvc); +} +//------------------------------------------------------------------ +void Blockchain::check_against_checkpoints(const checkpoints& points, bool enforce) +{ + const auto& pts = points.get_points(); + + for (const auto& pt : pts) + { + // if the checkpoint is for a block we don't have yet, move on + if (pt.first >= m_db->height()) + { + continue; + } + + if (!points.check_block(pt.first, m_db->get_block_hash_from_height(pt.first))) + { + // if asked to enforce checkpoints, roll back to a couple of blocks before the checkpoint + if (enforce) + { + LOG_ERROR("Local blockchain failed to pass a checkpoint, rolling back!"); + std::list<block> empty; + rollback_blockchain_switching(empty, pt.first - 2); + } + else + { + LOG_ERROR("WARNING: local blockchain failed to pass a MoneroPulse checkpoint, and you could be on a fork. You should either sync up from scratch, OR download a fresh blockchain bootstrap, OR enable checkpoint enforcing with the --enforce-dns-checkpointing command-line option"); + } + } + } +} +//------------------------------------------------------------------ +// returns false if any of the checkpoints loading returns false. +// That should happen only if a checkpoint is added that conflicts +// with an existing checkpoint. +bool Blockchain::update_checkpoints(const std::string& file_path, bool check_dns) +{ + if (!cryptonote::load_checkpoints_from_json(m_checkpoints, file_path)) + { + return false; + } + + // if we're checking both dns and json, load checkpoints from dns. + // if we're not hard-enforcing dns checkpoints, handle accordingly + if (m_enforce_dns_checkpoints && check_dns) + { + if (!cryptonote::load_checkpoints_from_dns(m_checkpoints)) + { + return false; + } + } + else if (check_dns) + { + checkpoints dns_points; + cryptonote::load_checkpoints_from_dns(dns_points); + if (m_checkpoints.check_for_conflicts(dns_points)) + { + check_against_checkpoints(dns_points, false); + } + else + { + LOG_PRINT_L0("One or more checkpoints fetched from DNS conflicted with existing checkpoints!"); + } + } + + check_against_checkpoints(m_checkpoints, true); + + return true; +} +//------------------------------------------------------------------ +void Blockchain::set_enforce_dns_checkpoints(bool enforce_checkpoints) +{ + m_enforce_dns_checkpoints = enforce_checkpoints; +} diff --git a/src/cryptonote_core/blockchain.h b/src/cryptonote_core/blockchain.h new file mode 100644 index 000000000..dc98a56a4 --- /dev/null +++ b/src/cryptonote_core/blockchain.h @@ -0,0 +1,231 @@ +// Copyright (c) 2014, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Parts of this file are originally copyright (c) 2012-2013 The Cryptonote developers + +#pragma once +#include <boost/serialization/serialization.hpp> +#include <boost/serialization/version.hpp> +#include <boost/serialization/list.hpp> +#include <boost/multi_index_container.hpp> +#include <boost/multi_index/global_fun.hpp> +#include <boost/multi_index/hashed_index.hpp> +#include <boost/multi_index/member.hpp> +#include <boost/foreach.hpp> +#include <atomic> + +#include "syncobj.h" +#include "string_tools.h" +#include "cryptonote_basic.h" +#include "common/util.h" +#include "cryptonote_protocol/cryptonote_protocol_defs.h" +#include "rpc/core_rpc_server_commands_defs.h" +#include "difficulty.h" +#include "cryptonote_core/cryptonote_format_utils.h" +#include "verification_context.h" +#include "crypto/hash.h" +#include "checkpoints.h" +#include "blockchain_db/blockchain_db.h" + +namespace cryptonote +{ + class tx_memory_pool; + + /************************************************************************/ + /* */ + /************************************************************************/ + class Blockchain + { + public: + struct transaction_chain_entry + { + transaction tx; + uint64_t m_keeper_block_height; + size_t m_blob_size; + std::vector<uint64_t> m_global_output_indexes; + }; + + struct block_extended_info + { + block bl; + uint64_t height; + size_t block_cumulative_size; + difficulty_type cumulative_difficulty; + uint64_t already_generated_coins; + }; + + Blockchain(tx_memory_pool& tx_pool); + + bool init(const std::string& config_folder, const bool testnet = false, const int db_flags = 0); + bool deinit(); + + void set_checkpoints(checkpoints&& chk_pts) { m_checkpoints = chk_pts; } + + //bool push_new_block(); + bool get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks, std::list<transaction>& txs) const; + bool get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks) const; + bool get_alternative_blocks(std::list<block>& blocks) const; + size_t get_alternative_blocks_count() const; + crypto::hash get_block_id_by_height(uint64_t height) const; + bool get_block_by_hash(const crypto::hash &h, block &blk) const; + void get_all_known_block_ids(std::list<crypto::hash> &main, std::list<crypto::hash> &alt, std::list<crypto::hash> &invalid) const; + + template<class archive_t> + void serialize(archive_t & ar, const unsigned int version); + + bool have_tx(const crypto::hash &id) const; + bool have_tx_keyimges_as_spent(const transaction &tx) const; + bool have_tx_keyimg_as_spent(const crypto::key_image &key_im) const; + + template<class visitor_t> + bool scan_outputkeys_for_indexes(const txin_to_key& tx_in_to_key, visitor_t& vis, uint64_t* pmax_related_block_height = NULL) const; + + uint64_t get_current_blockchain_height() const; + crypto::hash get_tail_id() const; + crypto::hash get_tail_id(uint64_t& height) const; + difficulty_type get_difficulty_for_next_block() const; + bool add_new_block(const block& bl_, block_verification_context& bvc); + bool reset_and_set_genesis_block(const block& b); + bool create_block_template(block& b, const account_public_address& miner_address, difficulty_type& di, uint64_t& height, const blobdata& ex_nonce) const; + bool have_block(const crypto::hash& id) const; + size_t get_total_transactions() const; + bool get_short_chain_history(std::list<crypto::hash>& ids) const; + bool find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, NOTIFY_RESPONSE_CHAIN_ENTRY::request& resp) const; + bool find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, uint64_t& starter_offset) const; + bool find_blockchain_supplement(const uint64_t req_start_block, const std::list<crypto::hash>& qblock_ids, std::list<std::pair<block, std::list<transaction> > >& blocks, uint64_t& total_height, uint64_t& start_height, size_t max_count) const; + bool handle_get_objects(NOTIFY_REQUEST_GET_OBJECTS::request& arg, NOTIFY_RESPONSE_GET_OBJECTS::request& rsp); + bool handle_get_objects(const COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::request& req, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::response& res); + bool get_random_outs_for_amounts(const COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::request& req, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::response& res) const; + bool get_tx_outputs_gindexs(const crypto::hash& tx_id, std::vector<uint64_t>& indexs) const; + bool store_blockchain(); + bool check_tx_input(const txin_to_key& txin, const crypto::hash& tx_prefix_hash, const std::vector<crypto::signature>& sig, uint64_t* pmax_related_block_height = NULL) const; + bool check_tx_inputs(const transaction& tx, uint64_t* pmax_used_block_height = NULL) const; + bool check_tx_inputs(const transaction& tx, uint64_t& pmax_used_block_height, crypto::hash& max_used_block_id) const; + uint64_t get_current_cumulative_blocksize_limit() const; + bool is_storing_blockchain()const{return m_is_blockchain_storing;} + uint64_t block_difficulty(uint64_t i) const; + + template<class t_ids_container, class t_blocks_container, class t_missed_container> + bool get_blocks(const t_ids_container& block_ids, t_blocks_container& blocks, t_missed_container& missed_bs) const; + + template<class t_ids_container, class t_tx_container, class t_missed_container> + bool get_transactions(const t_ids_container& txs_ids, t_tx_container& txs, t_missed_container& missed_txs) const; + + //debug functions + void print_blockchain(uint64_t start_index, uint64_t end_index); + void print_blockchain_index(); + void print_blockchain_outs(const std::string& file); + + void check_against_checkpoints(const checkpoints& points, bool enforce); + void set_enforce_dns_checkpoints(bool enforce); + bool update_checkpoints(const std::string& file_path, bool check_dns); + + BlockchainDB& get_db() + { + return *m_db; + } + + private: + typedef std::unordered_map<crypto::hash, size_t> blocks_by_id_index; + typedef std::unordered_map<crypto::hash, transaction_chain_entry> transactions_container; + typedef std::unordered_set<crypto::key_image> key_images_container; + typedef std::vector<block_extended_info> blocks_container; + typedef std::unordered_map<crypto::hash, block_extended_info> blocks_ext_by_hash; + typedef std::unordered_map<crypto::hash, block> blocks_by_hash; + typedef std::map<uint64_t, std::vector<std::pair<crypto::hash, size_t>>> outputs_container; //crypto::hash - tx hash, size_t - index of out in transaction + + BlockchainDB* m_db; + + tx_memory_pool& m_tx_pool; + mutable epee::critical_section m_blockchain_lock; // TODO: add here reader/writer lock + + // main chain + blocks_container m_blocks; // height -> block_extended_info + blocks_by_id_index m_blocks_index; // crypto::hash -> height + transactions_container m_transactions; + key_images_container m_spent_keys; + size_t m_current_block_cumul_sz_limit; + + + // all alternative chains + blocks_ext_by_hash m_alternative_chains; // crypto::hash -> block_extended_info + + // some invalid blocks + blocks_ext_by_hash m_invalid_blocks; // crypto::hash -> block_extended_info + outputs_container m_outputs; + + + std::string m_config_folder; + checkpoints m_checkpoints; + std::atomic<bool> m_is_in_checkpoint_zone; + std::atomic<bool> m_is_blockchain_storing; + bool m_testnet; + bool m_enforce_dns_checkpoints; + + bool switch_to_alternative_blockchain(std::list<blocks_ext_by_hash::iterator>& alt_chain, bool discard_disconnected_chain); + block pop_block_from_blockchain(); + bool purge_transaction_from_blockchain(const crypto::hash& tx_id); + bool purge_transaction_keyimages_from_blockchain(const transaction& tx, bool strict_check); + + bool handle_block_to_main_chain(const block& bl, block_verification_context& bvc); + bool handle_block_to_main_chain(const block& bl, const crypto::hash& id, block_verification_context& bvc); + bool handle_alternative_block(const block& b, const crypto::hash& id, block_verification_context& bvc); + difficulty_type get_next_difficulty_for_alternative_chain(const std::list<blocks_ext_by_hash::iterator>& alt_chain, block_extended_info& bei) const; + bool prevalidate_miner_transaction(const block& b, uint64_t height); + bool validate_miner_transaction(const block& b, size_t cumulative_block_size, uint64_t fee, uint64_t& base_reward, uint64_t already_generated_coins); + bool validate_transaction(const block& b, uint64_t height, const transaction& tx); + bool rollback_blockchain_switching(std::list<block>& original_chain, uint64_t rollback_height); + bool add_transaction_from_block(const transaction& tx, const crypto::hash& tx_id, const crypto::hash& bl_id, uint64_t bl_height); + bool push_transaction_to_global_outs_index(const transaction& tx, const crypto::hash& tx_id, std::vector<uint64_t>& global_indexes); + bool pop_transaction_from_global_index(const transaction& tx, const crypto::hash& tx_id); + void get_last_n_blocks_sizes(std::vector<size_t>& sz, size_t count) const; + void add_out_to_get_random_outs(COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::outs_for_amount& result_outs, uint64_t amount, size_t i) const; + bool is_tx_spendtime_unlocked(uint64_t unlock_time) const; + bool add_block_as_invalid(const block& bl, const crypto::hash& h); + bool add_block_as_invalid(const block_extended_info& bei, const crypto::hash& h); + bool check_block_timestamp(const block& b) const; + bool check_block_timestamp(std::vector<uint64_t>& timestamps, const block& b) const; + uint64_t get_adjusted_time() const; + bool complete_timestamps_vector(uint64_t start_height, std::vector<uint64_t>& timestamps); + bool update_next_cumulative_size_limit(); + + bool check_for_double_spend(const transaction& tx, key_images_container& keys_this_block) const; + }; + + + /************************************************************************/ + /* */ + /************************************************************************/ + + #define CURRENT_BLOCKCHAIN_STORAGE_ARCHIVE_VER 12 + + //------------------------------------------------------------------ + +} // namespace cryptonote + +BOOST_CLASS_VERSION(cryptonote::Blockchain, CURRENT_BLOCKCHAIN_STORAGE_ARCHIVE_VER) diff --git a/src/cryptonote_core/blockchain_storage.cpp b/src/cryptonote_core/blockchain_storage.cpp index 72dd35df7..76438020b 100644 --- a/src/cryptonote_core/blockchain_storage.cpp +++ b/src/cryptonote_core/blockchain_storage.cpp @@ -55,19 +55,19 @@ using namespace cryptonote; DISABLE_VS_WARNINGS(4267) //------------------------------------------------------------------ -bool blockchain_storage::have_tx(const crypto::hash &id) +bool blockchain_storage::have_tx(const crypto::hash &id) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); return m_transactions.find(id) != m_transactions.end(); } //------------------------------------------------------------------ -bool blockchain_storage::have_tx_keyimg_as_spent(const crypto::key_image &key_im) +bool blockchain_storage::have_tx_keyimg_as_spent(const crypto::key_image &key_im) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); return m_spent_keys.find(key_im) != m_spent_keys.end(); } //------------------------------------------------------------------ -transaction *blockchain_storage::get_tx(const crypto::hash &id) +const transaction *blockchain_storage::get_tx(const crypto::hash &id) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); auto it = m_transactions.find(id); @@ -77,7 +77,7 @@ transaction *blockchain_storage::get_tx(const crypto::hash &id) return &it->second.tx; } //------------------------------------------------------------------ -uint64_t blockchain_storage::get_current_blockchain_height() +uint64_t blockchain_storage::get_current_blockchain_height() const { CRITICAL_REGION_LOCAL(m_blockchain_lock); return m_blocks.size(); @@ -114,24 +114,14 @@ bool blockchain_storage::init(const std::string& config_folder, bool testnet) else { LOG_PRINT_L0("Can't load blockchain storage from file, generating genesis block."); - block bl = boost::value_initialized<block>(); - block_verification_context bvc = boost::value_initialized<block_verification_context>(); - if (testnet) - { - generate_genesis_block(bl, config::testnet::GENESIS_TX, config::testnet::GENESIS_NONCE); - } - else - { - generate_genesis_block(bl, config::GENESIS_TX, config::GENESIS_NONCE); - } - add_new_block(bl, bvc); - CHECK_AND_ASSERT_MES(!bvc.m_verifivation_failed && bvc.m_added_to_main_chain, false, "Failed to add genesis block to blockchain"); + if (!store_genesis_block(testnet, true)) + return false; } if(!m_blocks.size()) { LOG_PRINT_L0("Blockchain not loaded, generating genesis block."); - if (!store_genesis_block(testnet)) { + if (!store_genesis_block(testnet, false)) { return false; } } else { @@ -160,7 +150,7 @@ bool blockchain_storage::init(const std::string& config_folder, bool testnet) return true; } //------------------------------------------------------------------ -bool blockchain_storage::store_genesis_block(bool testnet) { +bool blockchain_storage::store_genesis_block(bool testnet, bool check_added) { block bl = ::boost::value_initialized<block>(); block_verification_context bvc = boost::value_initialized<block_verification_context>(); @@ -174,7 +164,7 @@ bool blockchain_storage::store_genesis_block(bool testnet) { } add_new_block(bl, bvc); - CHECK_AND_ASSERT_MES(!bvc.m_verifivation_failed, false, "Failed to add genesis block to blockchain"); + CHECK_AND_ASSERT_MES(!bvc.m_verifivation_failed && (bvc.m_added_to_main_chain || !check_added), false, "Failed to add genesis block to blockchain"); return true; } //------------------------------------------------------------------ @@ -232,7 +222,7 @@ bool blockchain_storage::pop_block_from_blockchain() m_blocks_index.erase(bl_ind); //pop block from core m_blocks.pop_back(); - m_tx_pool.on_blockchain_dec(m_blocks.size()-1, get_tail_id()); + m_tx_pool->on_blockchain_dec(m_blocks.size()-1, get_tail_id()); return true; } //------------------------------------------------------------------ @@ -308,7 +298,7 @@ bool blockchain_storage::purge_transaction_from_blockchain(const crypto::hash& t if(!is_coinbase(tx)) { cryptonote::tx_verification_context tvc = AUTO_VAL_INIT(tvc); - bool r = m_tx_pool.add_tx(tx, tvc, true); + bool r = m_tx_pool->add_tx(tx, tvc, true); CHECK_AND_ASSERT_MES(r, false, "purge_block_data_from_blockchain: failed to add transaction to transaction pool"); } @@ -334,14 +324,14 @@ bool blockchain_storage::purge_block_data_from_blockchain(const block& bl, size_ return res; } //------------------------------------------------------------------ -crypto::hash blockchain_storage::get_tail_id(uint64_t& height) +crypto::hash blockchain_storage::get_tail_id(uint64_t& height) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); height = get_current_blockchain_height()-1; return get_tail_id(); } //------------------------------------------------------------------ -crypto::hash blockchain_storage::get_tail_id() +crypto::hash blockchain_storage::get_tail_id() const { CRITICAL_REGION_LOCAL(m_blockchain_lock); crypto::hash id = null_hash; @@ -352,7 +342,7 @@ crypto::hash blockchain_storage::get_tail_id() return id; } //------------------------------------------------------------------ -bool blockchain_storage::get_short_chain_history(std::list<crypto::hash>& ids) +bool blockchain_storage::get_short_chain_history(std::list<crypto::hash>& ids) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); size_t i = 0; @@ -382,7 +372,7 @@ bool blockchain_storage::get_short_chain_history(std::list<crypto::hash>& ids) return true; } //------------------------------------------------------------------ -crypto::hash blockchain_storage::get_block_id_by_height(uint64_t height) +crypto::hash blockchain_storage::get_block_id_by_height(uint64_t height) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); if(height >= m_blocks.size()) @@ -391,7 +381,7 @@ crypto::hash blockchain_storage::get_block_id_by_height(uint64_t height) return get_block_hash(m_blocks[height].bl); } //------------------------------------------------------------------ -bool blockchain_storage::get_block_by_hash(const crypto::hash &h, block &blk) { +bool blockchain_storage::get_block_by_hash(const crypto::hash &h, block &blk) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); // try to find block in main chain @@ -411,20 +401,20 @@ bool blockchain_storage::get_block_by_hash(const crypto::hash &h, block &blk) { return false; } //------------------------------------------------------------------ -void blockchain_storage::get_all_known_block_ids(std::list<crypto::hash> &main, std::list<crypto::hash> &alt, std::list<crypto::hash> &invalid) { +void blockchain_storage::get_all_known_block_ids(std::list<crypto::hash> &main, std::list<crypto::hash> &alt, std::list<crypto::hash> &invalid) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); - BOOST_FOREACH(blocks_by_id_index::value_type &v, m_blocks_index) + BOOST_FOREACH(const blocks_by_id_index::value_type &v, m_blocks_index) main.push_back(v.first); - BOOST_FOREACH(blocks_ext_by_hash::value_type &v, m_alternative_chains) + BOOST_FOREACH(const blocks_ext_by_hash::value_type &v, m_alternative_chains) alt.push_back(v.first); - BOOST_FOREACH(blocks_ext_by_hash::value_type &v, m_invalid_blocks) + BOOST_FOREACH(const blocks_ext_by_hash::value_type &v, m_invalid_blocks) invalid.push_back(v.first); } //------------------------------------------------------------------ -difficulty_type blockchain_storage::get_difficulty_for_next_block() +difficulty_type blockchain_storage::get_difficulty_for_next_block() const { CRITICAL_REGION_LOCAL(m_blockchain_lock); std::vector<uint64_t> timestamps; @@ -536,7 +526,7 @@ bool blockchain_storage::switch_to_alternative_blockchain(std::list<blocks_ext_b return true; } //------------------------------------------------------------------ -difficulty_type blockchain_storage::get_next_difficulty_for_alternative_chain(const std::list<blocks_ext_by_hash::iterator>& alt_chain, block_extended_info& bei) +difficulty_type blockchain_storage::get_next_difficulty_for_alternative_chain(const std::list<blocks_ext_by_hash::iterator>& alt_chain, block_extended_info& bei) const { std::vector<uint64_t> timestamps; std::vector<difficulty_type> commulative_difficulties; @@ -581,7 +571,7 @@ difficulty_type blockchain_storage::get_next_difficulty_for_alternative_chain(co return next_difficulty(timestamps, commulative_difficulties); } //------------------------------------------------------------------ -bool blockchain_storage::prevalidate_miner_transaction(const block& b, uint64_t height) +bool blockchain_storage::prevalidate_miner_transaction(const block& b, uint64_t height) const { CHECK_AND_ASSERT_MES(b.miner_tx.vin.size() == 1, false, "coinbase transaction in the block has no inputs"); CHECK_AND_ASSERT_MES(b.miner_tx.vin[0].type() == typeid(txin_gen), false, "coinbase transaction in the block has the wrong type"); @@ -604,7 +594,7 @@ bool blockchain_storage::prevalidate_miner_transaction(const block& b, uint64_t return true; } //------------------------------------------------------------------ -bool blockchain_storage::validate_miner_transaction(const block& b, size_t cumulative_block_size, uint64_t fee, uint64_t& base_reward, uint64_t already_generated_coins) +bool blockchain_storage::validate_miner_transaction(const block& b, size_t cumulative_block_size, uint64_t fee, uint64_t& base_reward, uint64_t already_generated_coins) const { //validate reward uint64_t money_in_use = 0; @@ -631,7 +621,7 @@ bool blockchain_storage::validate_miner_transaction(const block& b, size_t cumul return true; } //------------------------------------------------------------------ -bool blockchain_storage::get_backward_blocks_sizes(size_t from_height, std::vector<size_t>& sz, size_t count) +bool blockchain_storage::get_backward_blocks_sizes(size_t from_height, std::vector<size_t>& sz, size_t count) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); CHECK_AND_ASSERT_MES(from_height < m_blocks.size(), false, "Internal error: get_backward_blocks_sizes called with from_height=" << from_height << ", blockchain height = " << m_blocks.size()); @@ -643,7 +633,7 @@ bool blockchain_storage::get_backward_blocks_sizes(size_t from_height, std::vect return true; } //------------------------------------------------------------------ -bool blockchain_storage::get_last_n_blocks_sizes(std::vector<size_t>& sz, size_t count) +bool blockchain_storage::get_last_n_blocks_sizes(std::vector<size_t>& sz, size_t count) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); if(!m_blocks.size()) @@ -651,12 +641,12 @@ bool blockchain_storage::get_last_n_blocks_sizes(std::vector<size_t>& sz, size_t return get_backward_blocks_sizes(m_blocks.size() -1, sz, count); } //------------------------------------------------------------------ -uint64_t blockchain_storage::get_current_comulative_blocksize_limit() +uint64_t blockchain_storage::get_current_cumulative_blocksize_limit() const { return m_current_block_cumul_sz_limit; } //------------------------------------------------------------------ -bool blockchain_storage::create_block_template(block& b, const account_public_address& miner_address, difficulty_type& diffic, uint64_t& height, const blobdata& ex_nonce) +bool blockchain_storage::create_block_template(block& b, const account_public_address& miner_address, difficulty_type& diffic, uint64_t& height, const blobdata& ex_nonce) const { size_t median_size; uint64_t already_generated_coins; @@ -677,16 +667,16 @@ bool blockchain_storage::create_block_template(block& b, const account_public_ad size_t txs_size; uint64_t fee; - if (!m_tx_pool.fill_block_template(b, median_size, already_generated_coins, txs_size, fee)) { + if (!m_tx_pool->fill_block_template(b, median_size, already_generated_coins, txs_size, fee)) { return false; } #if defined(DEBUG_CREATE_BLOCK_TEMPLATE) size_t real_txs_size = 0; uint64_t real_fee = 0; - CRITICAL_REGION_BEGIN(m_tx_pool.m_transactions_lock); + CRITICAL_REGION_BEGIN(m_tx_pool->m_transactions_lock); BOOST_FOREACH(crypto::hash &cur_hash, b.tx_hashes) { - auto cur_res = m_tx_pool.m_transactions.find(cur_hash); - if (cur_res == m_tx_pool.m_transactions.end()) { + auto cur_res = m_tx_pool->m_transactions.find(cur_hash); + if (cur_res == m_tx_pool->m_transactions.end()) { LOG_ERROR("Creating block template: error: transaction not found"); continue; } @@ -775,7 +765,7 @@ bool blockchain_storage::create_block_template(block& b, const account_public_ad return false; } //------------------------------------------------------------------ -bool blockchain_storage::complete_timestamps_vector(uint64_t start_top_height, std::vector<uint64_t>& timestamps) +bool blockchain_storage::complete_timestamps_vector(uint64_t start_top_height, std::vector<uint64_t>& timestamps) const { if(timestamps.size() >= BLOCKCHAIN_TIMESTAMP_CHECK_WINDOW) @@ -940,7 +930,7 @@ bool blockchain_storage::handle_alternative_block(const block& b, const crypto:: return true; } //------------------------------------------------------------------ -bool blockchain_storage::get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks, std::list<transaction>& txs) +bool blockchain_storage::get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks, std::list<transaction>& txs) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); if(start_offset >= m_blocks.size()) @@ -956,7 +946,7 @@ bool blockchain_storage::get_blocks(uint64_t start_offset, size_t count, std::li return true; } //------------------------------------------------------------------ -bool blockchain_storage::get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks) +bool blockchain_storage::get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); if(start_offset >= m_blocks.size()) @@ -1000,7 +990,7 @@ bool blockchain_storage::handle_get_objects(NOTIFY_REQUEST_GET_OBJECTS::request& return true; } //------------------------------------------------------------------ -bool blockchain_storage::get_alternative_blocks(std::list<block>& blocks) +bool blockchain_storage::get_alternative_blocks(std::list<block>& blocks) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); @@ -1011,21 +1001,21 @@ bool blockchain_storage::get_alternative_blocks(std::list<block>& blocks) return true; } //------------------------------------------------------------------ -size_t blockchain_storage::get_alternative_blocks_count() +size_t blockchain_storage::get_alternative_blocks_count() const { CRITICAL_REGION_LOCAL(m_blockchain_lock); return m_alternative_chains.size(); } //------------------------------------------------------------------ -bool blockchain_storage::add_out_to_get_random_outs(std::vector<std::pair<crypto::hash, size_t> >& amount_outs, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::outs_for_amount& result_outs, uint64_t amount, size_t i) +bool blockchain_storage::add_out_to_get_random_outs(const std::vector<std::pair<crypto::hash, size_t> >& amount_outs, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::outs_for_amount& result_outs, uint64_t amount, size_t i) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); - transactions_container::iterator tx_it = m_transactions.find(amount_outs[i].first); + transactions_container::const_iterator tx_it = m_transactions.find(amount_outs[i].first); CHECK_AND_ASSERT_MES(tx_it != m_transactions.end(), false, "internal error: transaction with id " << amount_outs[i].first << ENDL << ", used in mounts global index for amount=" << amount << ": i=" << i << "not found in transactions index"); CHECK_AND_ASSERT_MES(tx_it->second.tx.vout.size() > amount_outs[i].second, false, "internal error: in global outs index, transaction out index=" << amount_outs[i].second << " more than transaction outputs = " << tx_it->second.tx.vout.size() << ", for tx id = " << amount_outs[i].first); - transaction& tx = tx_it->second.tx; + const transaction& tx = tx_it->second.tx; CHECK_AND_ASSERT_MES(tx.vout[amount_outs[i].second].target.type() == typeid(txout_to_key), false, "unknown tx out type"); //check if transaction is unlocked @@ -1038,7 +1028,7 @@ bool blockchain_storage::add_out_to_get_random_outs(std::vector<std::pair<crypto return true; } //------------------------------------------------------------------ -size_t blockchain_storage::find_end_of_allowed_index(const std::vector<std::pair<crypto::hash, size_t> >& amount_outs) +size_t blockchain_storage::find_end_of_allowed_index(const std::vector<std::pair<crypto::hash, size_t> >& amount_outs) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); if(!amount_outs.size()) @@ -1047,7 +1037,7 @@ size_t blockchain_storage::find_end_of_allowed_index(const std::vector<std::pair do { --i; - transactions_container::iterator it = m_transactions.find(amount_outs[i].first); + transactions_container::const_iterator it = m_transactions.find(amount_outs[i].first); CHECK_AND_ASSERT_MES(it != m_transactions.end(), 0, "internal error: failed to find transaction from outputs index with tx_id=" << amount_outs[i].first); if(it->second.m_keeper_block_height + CRYPTONOTE_MINED_MONEY_UNLOCK_WINDOW <= get_current_blockchain_height() ) return i+1; @@ -1055,7 +1045,7 @@ size_t blockchain_storage::find_end_of_allowed_index(const std::vector<std::pair return 0; } //------------------------------------------------------------------ -bool blockchain_storage::get_random_outs_for_amounts(const COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::request& req, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::response& res) +bool blockchain_storage::get_random_outs_for_amounts(const COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::request& req, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::response& res) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); BOOST_FOREACH(uint64_t amount, req.amounts) @@ -1068,7 +1058,7 @@ bool blockchain_storage::get_random_outs_for_amounts(const COMMAND_RPC_GET_RANDO LOG_PRINT_L1("COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS: not outs for amount " << amount << ", wallet should use some real outs when it lookup for some mix, so, at least one out for this amount should exist"); continue;//actually this is strange situation, wallet should use some real outs when it lookup for some mix, so, at least one out for this amount should exist } - std::vector<std::pair<crypto::hash, size_t> >& amount_outs = it->second; + const std::vector<std::pair<crypto::hash, size_t> >& amount_outs = it->second; //it is not good idea to use top fresh outs, because it increases possibility of transaction canceling on split //lets find upper bound of not fresh outs size_t up_index_limit = find_end_of_allowed_index(amount_outs); @@ -1097,7 +1087,7 @@ bool blockchain_storage::get_random_outs_for_amounts(const COMMAND_RPC_GET_RANDO return true; } //------------------------------------------------------------------ -bool blockchain_storage::find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, uint64_t& starter_offset) +bool blockchain_storage::find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, uint64_t& starter_offset) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); @@ -1144,7 +1134,7 @@ bool blockchain_storage::find_blockchain_supplement(const std::list<crypto::hash return true; } //------------------------------------------------------------------ -uint64_t blockchain_storage::block_difficulty(size_t i) +uint64_t blockchain_storage::block_difficulty(size_t i) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); CHECK_AND_ASSERT_MES(i < m_blocks.size(), false, "wrong block index i = " << i << " at blockchain_storage::block_difficulty()"); @@ -1207,7 +1197,7 @@ void blockchain_storage::print_blockchain_outs(const std::string& file) } } //------------------------------------------------------------------ -bool blockchain_storage::find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, NOTIFY_RESPONSE_CHAIN_ENTRY::request& resp) +bool blockchain_storage::find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, NOTIFY_RESPONSE_CHAIN_ENTRY::request& resp) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); if(!find_blockchain_supplement(qblock_ids, resp.start_height)) @@ -1220,7 +1210,7 @@ bool blockchain_storage::find_blockchain_supplement(const std::list<crypto::hash return true; } //------------------------------------------------------------------ -bool blockchain_storage::find_blockchain_supplement(const uint64_t req_start_block, const std::list<crypto::hash>& qblock_ids, std::list<std::pair<block, std::list<transaction> > >& blocks, uint64_t& total_height, uint64_t& start_height, size_t max_count) +bool blockchain_storage::find_blockchain_supplement(const uint64_t req_start_block, const std::list<crypto::hash>& qblock_ids, std::list<std::pair<block, std::list<transaction> > >& blocks, uint64_t& total_height, uint64_t& start_height, size_t max_count) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); if(req_start_block > 0) { @@ -1259,7 +1249,7 @@ bool blockchain_storage::add_block_as_invalid(const block_extended_info& bei, co return true; } //------------------------------------------------------------------ -bool blockchain_storage::have_block(const crypto::hash& id) +bool blockchain_storage::have_block(const crypto::hash& id) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); if(m_blocks_index.count(id)) @@ -1297,13 +1287,13 @@ bool blockchain_storage::push_transaction_to_global_outs_index(const transaction return true; } //------------------------------------------------------------------ -size_t blockchain_storage::get_total_transactions() +size_t blockchain_storage::get_total_transactions() const { CRITICAL_REGION_LOCAL(m_blockchain_lock); return m_transactions.size(); } //------------------------------------------------------------------ -bool blockchain_storage::get_outs(uint64_t amount, std::list<crypto::public_key>& pkeys) +bool blockchain_storage::get_outs(uint64_t amount, std::list<crypto::public_key>& pkeys) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); auto it = m_outputs.find(amount); @@ -1393,7 +1383,7 @@ bool blockchain_storage::add_transaction_from_block(const transaction& tx, const return true; } //------------------------------------------------------------------ -bool blockchain_storage::get_tx_outputs_gindexs(const crypto::hash& tx_id, std::vector<uint64_t>& indexs) +bool blockchain_storage::get_tx_outputs_gindexs(const crypto::hash& tx_id, std::vector<uint64_t>& indexs) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); auto it = m_transactions.find(tx_id); @@ -1408,7 +1398,7 @@ bool blockchain_storage::get_tx_outputs_gindexs(const crypto::hash& tx_id, std:: return true; } //------------------------------------------------------------------ -bool blockchain_storage::check_tx_inputs(const transaction& tx, uint64_t& max_used_block_height, crypto::hash& max_used_block_id) +bool blockchain_storage::check_tx_inputs(const transaction& tx, uint64_t& max_used_block_height, crypto::hash& max_used_block_id) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); bool res = check_tx_inputs(tx, &max_used_block_height); @@ -1418,7 +1408,7 @@ bool blockchain_storage::check_tx_inputs(const transaction& tx, uint64_t& max_us return true; } //------------------------------------------------------------------ -bool blockchain_storage::have_tx_keyimges_as_spent(const transaction &tx) +bool blockchain_storage::have_tx_keyimges_as_spent(const transaction &tx) const { BOOST_FOREACH(const txin_v& in, tx.vin) { @@ -1429,13 +1419,13 @@ bool blockchain_storage::have_tx_keyimges_as_spent(const transaction &tx) return false; } //------------------------------------------------------------------ -bool blockchain_storage::check_tx_inputs(const transaction& tx, uint64_t* pmax_used_block_height) +bool blockchain_storage::check_tx_inputs(const transaction& tx, uint64_t* pmax_used_block_height) const { crypto::hash tx_prefix_hash = get_transaction_prefix_hash(tx); return check_tx_inputs(tx, tx_prefix_hash, pmax_used_block_height); } //------------------------------------------------------------------ -bool blockchain_storage::check_tx_inputs(const transaction& tx, const crypto::hash& tx_prefix_hash, uint64_t* pmax_used_block_height) +bool blockchain_storage::check_tx_inputs(const transaction& tx, const crypto::hash& tx_prefix_hash, uint64_t* pmax_used_block_height) const { size_t sig_index = 0; if(pmax_used_block_height) @@ -1467,7 +1457,7 @@ bool blockchain_storage::check_tx_inputs(const transaction& tx, const crypto::ha return true; } //------------------------------------------------------------------ -bool blockchain_storage::is_tx_spendtime_unlocked(uint64_t unlock_time) +bool blockchain_storage::is_tx_spendtime_unlocked(uint64_t unlock_time) const { if(unlock_time < CRYPTONOTE_MAX_BLOCK_NUMBER) { @@ -1488,15 +1478,15 @@ bool blockchain_storage::is_tx_spendtime_unlocked(uint64_t unlock_time) return false; } //------------------------------------------------------------------ -bool blockchain_storage::check_tx_input(const txin_to_key& txin, const crypto::hash& tx_prefix_hash, const std::vector<crypto::signature>& sig, uint64_t* pmax_related_block_height) +bool blockchain_storage::check_tx_input(const txin_to_key& txin, const crypto::hash& tx_prefix_hash, const std::vector<crypto::signature>& sig, uint64_t* pmax_related_block_height) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); struct outputs_visitor { std::vector<const crypto::public_key *>& m_results_collector; - blockchain_storage& m_bch; - outputs_visitor(std::vector<const crypto::public_key *>& results_collector, blockchain_storage& bch):m_results_collector(results_collector), m_bch(bch) + const blockchain_storage& m_bch; + outputs_visitor(std::vector<const crypto::public_key *>& results_collector, const blockchain_storage& bch):m_results_collector(results_collector), m_bch(bch) {} bool handle_output(const transaction& tx, const tx_out& out) { @@ -1538,13 +1528,13 @@ bool blockchain_storage::check_tx_input(const txin_to_key& txin, const crypto::h return crypto::check_ring_signature(tx_prefix_hash, txin.k_image, output_keys, sig.data()); } //------------------------------------------------------------------ -uint64_t blockchain_storage::get_adjusted_time() +uint64_t blockchain_storage::get_adjusted_time() const { //TODO: add collecting median time return time(NULL); } //------------------------------------------------------------------ -bool blockchain_storage::check_block_timestamp_main(const block& b) +bool blockchain_storage::check_block_timestamp_main(const block& b) const { if(b.timestamp > get_adjusted_time() + CRYPTONOTE_BLOCK_FUTURE_TIME_LIMIT) { @@ -1560,7 +1550,7 @@ bool blockchain_storage::check_block_timestamp_main(const block& b) return check_block_timestamp(std::move(timestamps), b); } //------------------------------------------------------------------ -bool blockchain_storage::check_block_timestamp(std::vector<uint64_t> timestamps, const block& b) +bool blockchain_storage::check_block_timestamp(std::vector<uint64_t> timestamps, const block& b) const { if(timestamps.size() < BLOCKCHAIN_TIMESTAMP_CHECK_WINDOW) return true; @@ -1659,7 +1649,7 @@ bool blockchain_storage::handle_block_to_main_chain(const block& bl, const crypt transaction tx; size_t blob_size = 0; uint64_t fee = 0; - if(!m_tx_pool.take_tx(tx_id, tx, blob_size, fee)) + if(!m_tx_pool->take_tx(tx_id, tx, blob_size, fee)) { LOG_PRINT_L1("Block with id: " << id << "has at least one unknown transaction with id: " << tx_id); purge_block_data_from_blockchain(bl, tx_processed_count); @@ -1671,7 +1661,7 @@ bool blockchain_storage::handle_block_to_main_chain(const block& bl, const crypt { LOG_PRINT_L1("Block with id: " << id << "has at least one transaction (id: " << tx_id << ") with wrong inputs."); cryptonote::tx_verification_context tvc = AUTO_VAL_INIT(tvc); - bool add_res = m_tx_pool.add_tx(tx, tvc, true); + bool add_res = m_tx_pool->add_tx(tx, tvc, true); CHECK_AND_ASSERT_MES2(add_res, "handle_block_to_main_chain: failed to add transaction back to transaction pool"); purge_block_data_from_blockchain(bl, tx_processed_count); add_block_as_invalid(bl, id); @@ -1684,7 +1674,7 @@ bool blockchain_storage::handle_block_to_main_chain(const block& bl, const crypt { LOG_PRINT_L1("Block with id: " << id << " failed to add transaction to blockchain storage"); cryptonote::tx_verification_context tvc = AUTO_VAL_INIT(tvc); - bool add_res = m_tx_pool.add_tx(tx, tvc, true); + bool add_res = m_tx_pool->add_tx(tx, tvc, true); CHECK_AND_ASSERT_MES2(add_res, "handle_block_to_main_chain: failed to add transaction back to transaction pool"); purge_block_data_from_blockchain(bl, tx_processed_count); bvc.m_verifivation_failed = true; @@ -1746,7 +1736,7 @@ bool blockchain_storage::handle_block_to_main_chain(const block& bl, const crypt /*if(!m_orphanes_reorganize_in_work) review_orphaned_blocks_with_new_block_id(id, true);*/ - m_tx_pool.on_blockchain_inc(bei.height, id); + m_tx_pool->on_blockchain_inc(bei.height, id); //LOG_PRINT_L0("BLOCK: " << ENDL << "" << dump_obj_as_json(bei.bl)); return true; } @@ -1769,7 +1759,7 @@ bool blockchain_storage::add_new_block(const block& bl_, block_verification_cont //copy block here to let modify block.target block bl = bl_; crypto::hash id = get_block_hash(bl); - CRITICAL_REGION_LOCAL(m_tx_pool);//to avoid deadlock lets lock tx_pool for whole add/reorganize process + CRITICAL_REGION_LOCAL(*m_tx_pool);//to avoid deadlock lets lock tx_pool for whole add/reorganize process CRITICAL_REGION_LOCAL1(m_blockchain_lock); if(have_block(id)) { @@ -1790,7 +1780,7 @@ bool blockchain_storage::add_new_block(const block& bl_, block_verification_cont return handle_block_to_main_chain(bl, id, bvc); } //------------------------------------------------------------------ -void blockchain_storage::check_against_checkpoints(checkpoints& points, bool enforce) +void blockchain_storage::check_against_checkpoints(const checkpoints& points, bool enforce) { const auto& pts = points.get_points(); diff --git a/src/cryptonote_core/blockchain_storage.h b/src/cryptonote_core/blockchain_storage.h index 55f54d13b..25e6b2453 100644 --- a/src/cryptonote_core/blockchain_storage.h +++ b/src/cryptonote_core/blockchain_storage.h @@ -78,7 +78,7 @@ namespace cryptonote uint64_t already_generated_coins; }; - blockchain_storage(tx_memory_pool& tx_pool):m_tx_pool(tx_pool), m_current_block_cumul_sz_limit(0), m_is_in_checkpoint_zone(false), m_is_blockchain_storing(false), m_enforce_dns_checkpoints(false) + blockchain_storage(tx_memory_pool* tx_pool):m_tx_pool(tx_pool), m_current_block_cumul_sz_limit(0), m_is_in_checkpoint_zone(false), m_is_blockchain_storing(false), m_enforce_dns_checkpoints(false) {}; bool init() { return init(tools::get_default_data_dir(), true); } @@ -88,55 +88,55 @@ namespace cryptonote void set_checkpoints(checkpoints&& chk_pts) { m_checkpoints = chk_pts; } //bool push_new_block(); - bool get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks, std::list<transaction>& txs); - bool get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks); - bool get_alternative_blocks(std::list<block>& blocks); - size_t get_alternative_blocks_count(); - crypto::hash get_block_id_by_height(uint64_t height); - bool get_block_by_hash(const crypto::hash &h, block &blk); - void get_all_known_block_ids(std::list<crypto::hash> &main, std::list<crypto::hash> &alt, std::list<crypto::hash> &invalid); + bool get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks, std::list<transaction>& txs) const; + bool get_blocks(uint64_t start_offset, size_t count, std::list<block>& blocks) const; + bool get_alternative_blocks(std::list<block>& blocks) const; + size_t get_alternative_blocks_count() const; + crypto::hash get_block_id_by_height(uint64_t height) const; + bool get_block_by_hash(const crypto::hash &h, block &blk) const; + void get_all_known_block_ids(std::list<crypto::hash> &main, std::list<crypto::hash> &alt, std::list<crypto::hash> &invalid) const; template<class archive_t> void serialize(archive_t & ar, const unsigned int version); - bool have_tx(const crypto::hash &id); - bool have_tx_keyimges_as_spent(const transaction &tx); - bool have_tx_keyimg_as_spent(const crypto::key_image &key_im); - transaction *get_tx(const crypto::hash &id); + bool have_tx(const crypto::hash &id) const; + bool have_tx_keyimges_as_spent(const transaction &tx) const; + bool have_tx_keyimg_as_spent(const crypto::key_image &key_im) const; + const transaction *get_tx(const crypto::hash &id) const; template<class visitor_t> - bool scan_outputkeys_for_indexes(const txin_to_key& tx_in_to_key, visitor_t& vis, uint64_t* pmax_related_block_height = NULL); + bool scan_outputkeys_for_indexes(const txin_to_key& tx_in_to_key, visitor_t& vis, uint64_t* pmax_related_block_height = NULL) const; - uint64_t get_current_blockchain_height(); - crypto::hash get_tail_id(); - crypto::hash get_tail_id(uint64_t& height); - difficulty_type get_difficulty_for_next_block(); + uint64_t get_current_blockchain_height() const; + crypto::hash get_tail_id() const; + crypto::hash get_tail_id(uint64_t& height) const; + difficulty_type get_difficulty_for_next_block() const; bool add_new_block(const block& bl_, block_verification_context& bvc); bool reset_and_set_genesis_block(const block& b); - bool create_block_template(block& b, const account_public_address& miner_address, difficulty_type& di, uint64_t& height, const blobdata& ex_nonce); - bool have_block(const crypto::hash& id); - size_t get_total_transactions(); - bool get_outs(uint64_t amount, std::list<crypto::public_key>& pkeys); - bool get_short_chain_history(std::list<crypto::hash>& ids); - bool find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, NOTIFY_RESPONSE_CHAIN_ENTRY::request& resp); - bool find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, uint64_t& starter_offset); - bool find_blockchain_supplement(const uint64_t req_start_block, const std::list<crypto::hash>& qblock_ids, std::list<std::pair<block, std::list<transaction> > >& blocks, uint64_t& total_height, uint64_t& start_height, size_t max_count); + bool create_block_template(block& b, const account_public_address& miner_address, difficulty_type& di, uint64_t& height, const blobdata& ex_nonce) const; + bool have_block(const crypto::hash& id) const; + size_t get_total_transactions() const; + bool get_outs(uint64_t amount, std::list<crypto::public_key>& pkeys) const; + bool get_short_chain_history(std::list<crypto::hash>& ids) const; + bool find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, NOTIFY_RESPONSE_CHAIN_ENTRY::request& resp) const; + bool find_blockchain_supplement(const std::list<crypto::hash>& qblock_ids, uint64_t& starter_offset) const; + bool find_blockchain_supplement(const uint64_t req_start_block, const std::list<crypto::hash>& qblock_ids, std::list<std::pair<block, std::list<transaction> > >& blocks, uint64_t& total_height, uint64_t& start_height, size_t max_count) const; bool handle_get_objects(NOTIFY_REQUEST_GET_OBJECTS::request& arg, NOTIFY_RESPONSE_GET_OBJECTS::request& rsp); bool handle_get_objects(const COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::request& req, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::response& res); - bool get_random_outs_for_amounts(const COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::request& req, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::response& res); - bool get_backward_blocks_sizes(size_t from_height, std::vector<size_t>& sz, size_t count); - bool get_tx_outputs_gindexs(const crypto::hash& tx_id, std::vector<uint64_t>& indexs); + bool get_random_outs_for_amounts(const COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::request& req, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::response& res) const; + bool get_backward_blocks_sizes(size_t from_height, std::vector<size_t>& sz, size_t count) const; + bool get_tx_outputs_gindexs(const crypto::hash& tx_id, std::vector<uint64_t>& indexs) const; bool store_blockchain(); - bool check_tx_input(const txin_to_key& txin, const crypto::hash& tx_prefix_hash, const std::vector<crypto::signature>& sig, uint64_t* pmax_related_block_height = NULL); - bool check_tx_inputs(const transaction& tx, const crypto::hash& tx_prefix_hash, uint64_t* pmax_used_block_height = NULL); - bool check_tx_inputs(const transaction& tx, uint64_t* pmax_used_block_height = NULL); - bool check_tx_inputs(const transaction& tx, uint64_t& pmax_used_block_height, crypto::hash& max_used_block_id); - uint64_t get_current_comulative_blocksize_limit(); - bool is_storing_blockchain(){return m_is_blockchain_storing;} - uint64_t block_difficulty(size_t i); + bool check_tx_input(const txin_to_key& txin, const crypto::hash& tx_prefix_hash, const std::vector<crypto::signature>& sig, uint64_t* pmax_related_block_height = NULL) const; + bool check_tx_inputs(const transaction& tx, const crypto::hash& tx_prefix_hash, uint64_t* pmax_used_block_height = NULL) const; + bool check_tx_inputs(const transaction& tx, uint64_t* pmax_used_block_height = NULL) const; + bool check_tx_inputs(const transaction& tx, uint64_t& pmax_used_block_height, crypto::hash& max_used_block_id) const; + uint64_t get_current_cumulative_blocksize_limit() const; + bool is_storing_blockchain()const{return m_is_blockchain_storing;} + uint64_t block_difficulty(size_t i) const; template<class t_ids_container, class t_blocks_container, class t_missed_container> - bool get_blocks(const t_ids_container& block_ids, t_blocks_container& blocks, t_missed_container& missed_bs) + bool get_blocks(const t_ids_container& block_ids, t_blocks_container& blocks, t_missed_container& missed_bs) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); @@ -156,7 +156,7 @@ namespace cryptonote } template<class t_ids_container, class t_tx_container, class t_missed_container> - bool get_transactions(const t_ids_container& txs_ids, t_tx_container& txs, t_missed_container& missed_txs) + bool get_transactions(const t_ids_container& txs_ids, t_tx_container& txs, t_missed_container& missed_txs) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); @@ -166,7 +166,7 @@ namespace cryptonote if(it == m_transactions.end()) { transaction tx; - if(!m_tx_pool.get_transaction(tx_id, tx)) + if(!m_tx_pool->get_transaction(tx_id, tx)) missed_txs.push_back(tx_id); else txs.push_back(tx); @@ -180,10 +180,15 @@ namespace cryptonote void print_blockchain(uint64_t start_index, uint64_t end_index); void print_blockchain_index(); void print_blockchain_outs(const std::string& file); - void check_against_checkpoints(checkpoints& points, bool enforce); + void check_against_checkpoints(const checkpoints& points, bool enforce); bool update_checkpoints(const std::string& file_path, bool check_dns); void set_enforce_dns_checkpoints(bool enforce_checkpoints); + block get_block(uint64_t height) const { return m_blocks[height].bl; } + size_t get_block_size(uint64_t height) const { return m_blocks[height].block_cumulative_size; } + difficulty_type get_block_cumulative_difficulty(uint64_t height) const { return m_blocks[height].cumulative_difficulty; } + uint64_t get_block_coins_generated(uint64_t height) const { return m_blocks[height].already_generated_coins; } + private: typedef std::unordered_map<crypto::hash, size_t> blocks_by_id_index; typedef std::unordered_map<crypto::hash, transaction_chain_entry> transactions_container; @@ -193,8 +198,8 @@ namespace cryptonote typedef std::unordered_map<crypto::hash, block> blocks_by_hash; typedef std::map<uint64_t, std::vector<std::pair<crypto::hash, size_t>>> outputs_container; //crypto::hash - tx hash, size_t - index of out in transaction - tx_memory_pool& m_tx_pool; - epee::critical_section m_blockchain_lock; // TODO: add here reader/writer lock + tx_memory_pool* m_tx_pool; + mutable epee::critical_section m_blockchain_lock; // TODO: add here reader/writer lock // main chain blocks_container m_blocks; // height -> block_extended_info @@ -229,26 +234,26 @@ namespace cryptonote bool handle_block_to_main_chain(const block& bl, block_verification_context& bvc); bool handle_block_to_main_chain(const block& bl, const crypto::hash& id, block_verification_context& bvc); bool handle_alternative_block(const block& b, const crypto::hash& id, block_verification_context& bvc); - difficulty_type get_next_difficulty_for_alternative_chain(const std::list<blocks_ext_by_hash::iterator>& alt_chain, block_extended_info& bei); - bool prevalidate_miner_transaction(const block& b, uint64_t height); - bool validate_miner_transaction(const block& b, size_t cumulative_block_size, uint64_t fee, uint64_t& base_reward, uint64_t already_generated_coins); - bool validate_transaction(const block& b, uint64_t height, const transaction& tx); + difficulty_type get_next_difficulty_for_alternative_chain(const std::list<blocks_ext_by_hash::iterator>& alt_chain, block_extended_info& bei) const; + bool prevalidate_miner_transaction(const block& b, uint64_t height) const; + bool validate_miner_transaction(const block& b, size_t cumulative_block_size, uint64_t fee, uint64_t& base_reward, uint64_t already_generated_coins) const; + bool validate_transaction(const block& b, uint64_t height, const transaction& tx) const; bool rollback_blockchain_switching(std::list<block>& original_chain, size_t rollback_height); bool add_transaction_from_block(const transaction& tx, const crypto::hash& tx_id, const crypto::hash& bl_id, uint64_t bl_height); bool push_transaction_to_global_outs_index(const transaction& tx, const crypto::hash& tx_id, std::vector<uint64_t>& global_indexes); bool pop_transaction_from_global_index(const transaction& tx, const crypto::hash& tx_id); - bool get_last_n_blocks_sizes(std::vector<size_t>& sz, size_t count); - bool add_out_to_get_random_outs(std::vector<std::pair<crypto::hash, size_t> >& amount_outs, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::outs_for_amount& result_outs, uint64_t amount, size_t i); - bool is_tx_spendtime_unlocked(uint64_t unlock_time); + bool get_last_n_blocks_sizes(std::vector<size_t>& sz, size_t count) const; + bool add_out_to_get_random_outs(const std::vector<std::pair<crypto::hash, size_t> >& amount_outs, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::outs_for_amount& result_outs, uint64_t amount, size_t i) const; + bool is_tx_spendtime_unlocked(uint64_t unlock_time) const; bool add_block_as_invalid(const block& bl, const crypto::hash& h); bool add_block_as_invalid(const block_extended_info& bei, const crypto::hash& h); - size_t find_end_of_allowed_index(const std::vector<std::pair<crypto::hash, size_t> >& amount_outs); - bool check_block_timestamp_main(const block& b); - bool check_block_timestamp(std::vector<uint64_t> timestamps, const block& b); - uint64_t get_adjusted_time(); - bool complete_timestamps_vector(uint64_t start_height, std::vector<uint64_t>& timestamps); + size_t find_end_of_allowed_index(const std::vector<std::pair<crypto::hash, size_t> >& amount_outs) const; + bool check_block_timestamp_main(const block& b) const; + bool check_block_timestamp(std::vector<uint64_t> timestamps, const block& b) const; + uint64_t get_adjusted_time() const; + bool complete_timestamps_vector(uint64_t start_height, std::vector<uint64_t>& timestamps) const; bool update_next_comulative_size_limit(); - bool store_genesis_block(bool testnet); + bool store_genesis_block(bool testnet, bool check_added = false); }; @@ -316,7 +321,7 @@ namespace cryptonote //------------------------------------------------------------------ template<class visitor_t> - bool blockchain_storage::scan_outputkeys_for_indexes(const txin_to_key& tx_in_to_key, visitor_t& vis, uint64_t* pmax_related_block_height) + bool blockchain_storage::scan_outputkeys_for_indexes(const txin_to_key& tx_in_to_key, visitor_t& vis, uint64_t* pmax_related_block_height) const { CRITICAL_REGION_LOCAL(m_blockchain_lock); auto it = m_outputs.find(tx_in_to_key.amount); @@ -326,7 +331,7 @@ namespace cryptonote std::vector<uint64_t> absolute_offsets = relative_output_offsets_to_absolute(tx_in_to_key.key_offsets); - std::vector<std::pair<crypto::hash, size_t> >& amount_outs_vec = it->second; + const std::vector<std::pair<crypto::hash, size_t> >& amount_outs_vec = it->second; size_t count = 0; BOOST_FOREACH(uint64_t i, absolute_offsets) { @@ -335,7 +340,7 @@ namespace cryptonote LOG_PRINT_L0("Wrong index in transaction inputs: " << i << ", expected maximum " << amount_outs_vec.size() - 1); return false; } - transactions_container::iterator tx_it = m_transactions.find(amount_outs_vec[i].first); + transactions_container::const_iterator tx_it = m_transactions.find(amount_outs_vec[i].first); CHECK_AND_ASSERT_MES(tx_it != m_transactions.end(), false, "Wrong transaction id in output indexes: " << epee::string_tools::pod_to_hex(amount_outs_vec[i].first)); CHECK_AND_ASSERT_MES(amount_outs_vec[i].second < tx_it->second.tx.vout.size(), false, "Wrong index in transaction outputs: " << amount_outs_vec[i].second << ", expected less then " << tx_it->second.tx.vout.size()); diff --git a/src/cryptonote_core/checkpoints.cpp b/src/cryptonote_core/checkpoints.cpp index 73668ab36..e4223afb5 100644 --- a/src/cryptonote_core/checkpoints.cpp +++ b/src/cryptonote_core/checkpoints.cpp @@ -84,6 +84,10 @@ namespace cryptonote return check_block(height, h, ignored); } //--------------------------------------------------------------------------- + // this basically says if the blockchain is smaller than the first + // checkpoint then alternate blocks are allowed. Alternatively, if the + // last checkpoint *before* the end of the current chain is also before + // the block to be added, then this is fine. bool checkpoints::is_alternative_block_allowed(uint64_t blockchain_height, uint64_t block_height) const { if (0 == block_height) @@ -99,7 +103,7 @@ namespace cryptonote return checkpoint_height < block_height; } //--------------------------------------------------------------------------- - uint64_t checkpoints::get_max_height() + uint64_t checkpoints::get_max_height() const { std::map< uint64_t, crypto::hash >::const_iterator highest = std::max_element( m_points.begin(), m_points.end(), @@ -108,18 +112,18 @@ namespace cryptonote return highest->first; } //--------------------------------------------------------------------------- - const std::map<uint64_t, crypto::hash>& checkpoints::get_points() + const std::map<uint64_t, crypto::hash>& checkpoints::get_points() const { return m_points; } - bool checkpoints::check_for_conflicts(checkpoints& other) + bool checkpoints::check_for_conflicts(const checkpoints& other) const { for (auto& pt : other.get_points()) { if (m_points.count(pt.first)) { - CHECK_AND_ASSERT_MES(pt.second == m_points[pt.first], false, "Checkpoint at given height already exists, and hash for new checkpoint was different!"); + CHECK_AND_ASSERT_MES(pt.second == m_points.at(pt.first), false, "Checkpoint at given height already exists, and hash for new checkpoint was different!"); } } return true; diff --git a/src/cryptonote_core/checkpoints.h b/src/cryptonote_core/checkpoints.h index a39ce1964..55d765b71 100644 --- a/src/cryptonote_core/checkpoints.h +++ b/src/cryptonote_core/checkpoints.h @@ -45,9 +45,9 @@ namespace cryptonote bool check_block(uint64_t height, const crypto::hash& h) const; bool check_block(uint64_t height, const crypto::hash& h, bool& is_a_checkpoint) const; bool is_alternative_block_allowed(uint64_t blockchain_height, uint64_t block_height) const; - uint64_t get_max_height(); - const std::map<uint64_t, crypto::hash>& get_points(); - bool check_for_conflicts(checkpoints& other); + uint64_t get_max_height() const; + const std::map<uint64_t, crypto::hash>& get_points() const; + bool check_for_conflicts(const checkpoints& other) const; private: std::map<uint64_t, crypto::hash> m_points; }; diff --git a/src/cryptonote_core/cryptonote_basic.h b/src/cryptonote_core/cryptonote_basic.h index f50a19f9e..2be76c0de 100644 --- a/src/cryptonote_core/cryptonote_basic.h +++ b/src/cryptonote_core/cryptonote_basic.h @@ -50,6 +50,8 @@ #include "misc_language.h" #include "tx_extra.h" +#define DB_MEMORY 1 +#define DB_LMDB 2 namespace cryptonote { diff --git a/src/cryptonote_core/cryptonote_core.cpp b/src/cryptonote_core/cryptonote_core.cpp index 11127290e..f9b2b19ff 100644 --- a/src/cryptonote_core/cryptonote_core.cpp +++ b/src/cryptonote_core/cryptonote_core.cpp @@ -53,7 +53,11 @@ namespace cryptonote //----------------------------------------------------------------------------------------------- core::core(i_cryptonote_protocol* pprotocol): m_mempool(m_blockchain_storage), +#if BLOCKCHAIN_DB == DB_LMDB m_blockchain_storage(m_mempool), +#else + m_blockchain_storage(&m_mempool), +#endif m_miner(this), m_miner_address(boost::value_initialized<account_public_address>()), m_starter_message_showed(false), @@ -319,9 +323,9 @@ namespace cryptonote return false; } - if(!keeped_by_block && get_object_blobsize(tx) >= m_blockchain_storage.get_current_comulative_blocksize_limit() - CRYPTONOTE_COINBASE_BLOB_RESERVED_SIZE) + if(!keeped_by_block && get_object_blobsize(tx) >= m_blockchain_storage.get_current_cumulative_blocksize_limit() - CRYPTONOTE_COINBASE_BLOB_RESERVED_SIZE) { - LOG_PRINT_RED_L1("tx is too large " << get_object_blobsize(tx) << ", expected not bigger than " << m_blockchain_storage.get_current_comulative_blocksize_limit() - CRYPTONOTE_COINBASE_BLOB_RESERVED_SIZE); + LOG_PRINT_RED_L1("tx is too large " << get_object_blobsize(tx) << ", expected not bigger than " << m_blockchain_storage.get_current_cumulative_blocksize_limit() - CRYPTONOTE_COINBASE_BLOB_RESERVED_SIZE); return false; } @@ -606,7 +610,11 @@ namespace cryptonote m_starter_message_showed = true; } +#if BLOCKCHAIN_DB == DB_LMDB + m_store_blockchain_interval.do_call(boost::bind(&Blockchain::store_blockchain, &m_blockchain_storage)); +#else m_store_blockchain_interval.do_call(boost::bind(&blockchain_storage::store_blockchain, &m_blockchain_storage)); +#endif m_miner.on_idle(); m_mempool.on_idle(); return true; diff --git a/src/cryptonote_core/cryptonote_core.h b/src/cryptonote_core/cryptonote_core.h index 1921ef14d..a50f46a36 100644 --- a/src/cryptonote_core/cryptonote_core.h +++ b/src/cryptonote_core/cryptonote_core.h @@ -39,7 +39,11 @@ #include "cryptonote_protocol/cryptonote_protocol_handler_common.h" #include "storages/portable_storage_template_helper.h" #include "tx_pool.h" +#if BLOCKCHAIN_DB == DB_LMDB +#include "blockchain.h" +#else #include "blockchain_storage.h" +#endif #include "miner.h" #include "connection_context.h" #include "cryptonote_core/cryptonote_stat_info.h" @@ -112,7 +116,11 @@ namespace cryptonote bool get_random_outs_for_amounts(const COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::request& req, COMMAND_RPC_GET_RANDOM_OUTPUTS_FOR_AMOUNTS::response& res); void pause_mine(); void resume_mine(); +#if BLOCKCHAIN_DB == DB_LMDB + Blockchain& get_blockchain_storage(){return m_blockchain_storage;} +#else blockchain_storage& get_blockchain_storage(){return m_blockchain_storage;} +#endif //debug functions void print_blockchain(uint64_t start_index, uint64_t end_index); void print_blockchain_index(); @@ -151,7 +159,11 @@ namespace cryptonote tx_memory_pool m_mempool; +#if BLOCKCHAIN_DB == DB_LMDB + Blockchain m_blockchain_storage; +#else blockchain_storage m_blockchain_storage; +#endif i_cryptonote_protocol* m_pprotocol; epee::critical_section m_incoming_tx_lock; //m_miner and m_miner_addres are probably temporary here diff --git a/src/cryptonote_core/tx_pool.cpp b/src/cryptonote_core/tx_pool.cpp index 691d16492..03ced2c2e 100644 --- a/src/cryptonote_core/tx_pool.cpp +++ b/src/cryptonote_core/tx_pool.cpp @@ -37,7 +37,11 @@ #include "cryptonote_format_utils.h" #include "cryptonote_boost_serialization.h" #include "cryptonote_config.h" +#if BLOCKCHAIN_DB == DB_LMDB +#include "blockchain.h" +#else #include "blockchain_storage.h" +#endif #include "common/boost_serialization_helper.h" #include "common/int-util.h" #include "misc_language.h" @@ -52,12 +56,19 @@ namespace cryptonote { size_t const TRANSACTION_SIZE_LIMIT = (((CRYPTONOTE_BLOCK_GRANTED_FULL_REWARD_ZONE * 125) / 100) - CRYPTONOTE_COINBASE_BLOB_RESERVED_SIZE); } - //--------------------------------------------------------------------------------- +#if BLOCKCHAIN_DB == DB_LMDB + //--------------------------------------------------------------------------------- + tx_memory_pool::tx_memory_pool(Blockchain& bchs): m_blockchain(bchs) + { + + } +#else tx_memory_pool::tx_memory_pool(blockchain_storage& bchs): m_blockchain(bchs) { } +#endif //--------------------------------------------------------------------------------- bool tx_memory_pool::add_tx(const transaction &tx, /*const crypto::hash& tx_prefix_hash,*/ const crypto::hash &id, size_t blob_size, tx_verification_context& tvc, bool kept_by_block) { diff --git a/src/cryptonote_core/tx_pool.h b/src/cryptonote_core/tx_pool.h index 9c1c2b1aa..b867a1a7d 100644 --- a/src/cryptonote_core/tx_pool.h +++ b/src/cryptonote_core/tx_pool.h @@ -44,10 +44,13 @@ #include "verification_context.h" #include "crypto/hash.h" - namespace cryptonote { +#if BLOCKCHAIN_DB == DB_LMDB + class Blockchain; +#else class blockchain_storage; +#endif /************************************************************************/ /* */ /************************************************************************/ @@ -55,7 +58,11 @@ namespace cryptonote class tx_memory_pool: boost::noncopyable { public: +#if BLOCKCHAIN_DB == DB_LMDB + tx_memory_pool(Blockchain& bchs); +#else tx_memory_pool(blockchain_storage& bchs); +#endif bool add_tx(const transaction &tx, const crypto::hash &id, size_t blob_size, tx_verification_context& tvc, bool keeped_by_block); bool add_tx(const transaction &tx, tx_verification_context& tvc, bool keeped_by_block); //gets tx and remove it from pool @@ -127,7 +134,11 @@ namespace cryptonote //transactions_container m_alternative_transactions; std::string m_config_folder; +#if BLOCKCHAIN_DB == DB_LMDB + Blockchain& m_blockchain; +#else blockchain_storage& m_blockchain; +#endif /************************************************************************/ /* */ /************************************************************************/ @@ -170,9 +181,12 @@ namespace cryptonote uint64_t operator()(const txin_to_scripthash& tx) const {return 0;} }; +#if BLOCKCHAIN_DB == DB_LMDB +#else #if defined(DEBUG_CREATE_BLOCK_TEMPLATE) friend class blockchain_storage; #endif +#endif }; } diff --git a/src/daemon/CMakeLists.txt b/src/daemon/CMakeLists.txt index 4de8b82b8..afb65b5f6 100644 --- a/src/daemon/CMakeLists.txt +++ b/src/daemon/CMakeLists.txt @@ -74,6 +74,7 @@ bitmonero_add_executable(daemon target_link_libraries(daemon LINK_PRIVATE rpc + blockchain_db cryptonote_core crypto common diff --git a/tests/unit_tests/BlockchainDB.cpp b/tests/unit_tests/BlockchainDB.cpp new file mode 100644 index 000000000..4d39d7da8 --- /dev/null +++ b/tests/unit_tests/BlockchainDB.cpp @@ -0,0 +1,306 @@ +// Copyright (c) 2014, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <boost/filesystem.hpp> +#include <boost/algorithm/string/predicate.hpp> +#include <cstdio> +#include <iostream> + +#include "gtest/gtest.h" + +#include "blockchain_db/blockchain_db.h" +#include "blockchain_db/lmdb/db_lmdb.h" +#include "cryptonote_core/cryptonote_format_utils.h" + +using namespace cryptonote; +using epee::string_tools::pod_to_hex; + +#define ASSERT_HASH_EQ(a,b) ASSERT_EQ(pod_to_hex(a), pod_to_hex(b)) + +namespace { // anonymous namespace + +const std::vector<std::string> t_blocks = + { + "0100d5adc49a053b8818b2b6023cd2d532c6774e164a8fcacd603651cb3ea0cb7f9340b28ec016b4bc4ca301aa0101ff6e08acbb2702eab03067870349139bee7eab2ca2e030a6bb73d4f68ab6a3b6ca937214054cdac0843d028bbe23b57ea9bae53f12da93bb57bf8a2e40598d9fccd10c2921576e987d93cd80b4891302468738e391f07c4f2b356f7957160968e0bfef6e907c3cee2d8c23cbf04b089680c6868f01025a0f41f063e195a966051e3a29e17130a9ce97d48f55285b9bb04bdd55a09ae78088aca3cf0202d0f26169290450fe17e08974789c3458910b4db18361cdc564f8f2d0bdd2cf568090cad2c60e02d6f3483ec45505cc3be841046c7a12bf953ac973939bc7b727e54258e1881d4d80e08d84ddcb0102dae6dfb16d3e28aaaf43e00170b90606b36f35f38f8a3dceb5ee18199dd8f17c80c0caf384a30202385d7e57a4daba4cdd9e550a92dcc188838386e7581f13f09de796cbed4716a42101c052492a077abf41996b50c1b2e67fd7288bcd8c55cdc657b4e22d0804371f6901beb76a82ea17400cd6d7f595f70e1667d2018ed8f5a78d1ce07484222618c3cd" + , "0100f9adc49a057d3113f562eac36f14afa08c22ae20bbbf8cffa31a4466d24850732cb96f80e9762365ee01ab0101ff6f08cc953502be76deb845c431f2ed9a4862457654b914003693b8cd672abc935f0d97b16380c08db7010291819f2873e3efbae65ecd5a736f5e8a26318b591c21e39a03fb536520ac63ba80dac40902439a10fde02e39e48e0b31e57cc084a07eedbefb8cbea0143aedd0442b189caa80c6868f010227b84449de4cd7a48cbdce8974baf0b6646e03384e32055e705c243a86bef8a58088aca3cf0202fa7bd15e4e7e884307ab130bb9d50e33c5fcea6546042a26f948efd5952459ee8090cad2c60e028695583dbb8f8faab87e3ef3f88fa827db097bbf51761d91924f5c5b74c6631780e08d84ddcb010279d2f247b54690e3b491e488acff16014a825fd740c23988a25df7c4670c1f2580c0caf384a302022599dfa3f8788b66295051d85937816e1c320cdb347a0fba5219e3fe60c83b2421010576509c5672025d28fd5d3f38efce24e1f9aaf65dd3056b2504e6e2b7f19f7800" + }; + +const std::vector<size_t> t_sizes = + { + 1122 + , 347 + }; + +const std::vector<difficulty_type> t_diffs = + { + 4003674 + , 4051757 + }; + +const std::vector<uint64_t> t_coins = + { + 1952630229575370 + , 1970220553446486 + }; + +const std::vector<std::vector<std::string>> t_transactions = + { + { + "0100010280e08d84ddcb0106010401110701f254220bb50d901a5523eaed438af5d43f8c6d0e54ba0632eb539884f6b7c02008c0a8a50402f9c7cf807ae74e56f4ec84db2bd93cfb02c2249b38e306f5b54b6e05d00d543b8095f52a02b6abb84e00f47f0a72e37b6b29392d906a38468404c57db3dbc5e8dd306a27a880d293ad0302cfc40a86723e7d459e90e45d47818dc0e81a1f451ace5137a4af8110a89a35ea80b4c4c321026b19c796338607d5a2c1ba240a167134142d72d1640ef07902da64fed0b10cfc8088aca3cf02021f6f655254fee84161118b32e7b6f8c31de5eb88aa00c29a8f57c0d1f95a24dd80d0b8e1981a023321af593163cea2ae37168ab926efd87f195756e3b723e886bdb7e618f751c480a094a58d1d0295ed2b08d1cf44482ae0060a5dcc4b7d810a85dea8c62e274f73862f3d59f8ed80a0e5b9c2910102dc50f2f28d7ceecd9a1147f7106c8d5b4e08b2ec77150f52dd7130ee4f5f50d42101d34f90ac861d0ee9fe3891656a234ea86a8a93bf51a237db65baa00d3f4aa196a9e1d89bc06b40e94ea9a26059efc7ba5b2de7ef7c139831ca62f3fe0bb252008f8c7ee810d3e1e06313edf2db362fc39431755779466b635f12f9f32e44470a3e85e08a28fcd90633efc94aa4ae39153dfaf661089d045521343a3d63e8da08d7916753c66aaebd4eefcfe8e58e5b3d266b752c9ca110749fa33fce7c44270386fcf2bed4f03dd5dadb2dc1fd4c505419f8217b9eaec07521f0d8963e104603c926745039cf38d31de6ed95ace8e8a451f5a36f818c151f517546d55ac0f500e54d07b30ea7452f2e93fa4f60bdb30d71a0a97f97eb121e662006780fbf69002228224a96bff37893d47ec3707b17383906c0cd7d9e7412b3e6c8ccf1419b093c06c26f96e3453b424713cdc5c9575f81cda4e157052df11f4c40809edf420f88a3dd1f7909bbf77c8b184a933389094a88e480e900bcdbf6d1824742ee520fc0032e7d892a2b099b8c6edfd1123ce58a34458ee20cad676a7f7cfd80a28f0cb0888af88838310db372986bdcf9bfcae2324480ca7360d22bff21fb569a530e" + } + , { + } + }; + +// if the return type (blobdata for now) of block_to_blob ever changes +// from std::string, this might break. +bool compare_blocks(const block& a, const block& b) +{ + auto hash_a = pod_to_hex(get_block_hash(a)); + auto hash_b = pod_to_hex(get_block_hash(b)); + + return hash_a == hash_b; +} + +void print_block(const block& blk, const std::string& prefix = "") +{ + std::cerr << prefix << ": " << std::endl + << "\thash - " << pod_to_hex(get_block_hash(blk)) << std::endl + << "\tparent - " << pod_to_hex(blk.prev_id) << std::endl + << "\ttimestamp - " << blk.timestamp << std::endl + ; +} + +// if the return type (blobdata for now) of tx_to_blob ever changes +// from std::string, this might break. +bool compare_txs(const transaction& a, const transaction& b) +{ + auto ab = tx_to_blob(a); + auto bb = tx_to_blob(b); + + return ab == bb; +} + +// convert hex string to string that has values based on that hex +// thankfully should automatically ignore null-terminator. +std::string h2b(const std::string& s) +{ + bool upper = true; + std::string result; + unsigned char val = 0; + for (char c : s) + { + if (upper) + { + val = 0; + if (c <= 'f' && c >= 'a') + { + val = ((c - 'a') + 10) << 4; + } + else + { + val = (c - '0') << 4; + } + } + else + { + if (c <= 'f' && c >= 'a') + { + val |= (c - 'a') + 10; + } + else + { + val |= c - '0'; + } + result += (char)val; + } + upper = !upper; + } + return result; +} + +template <typename T> +class BlockchainDBTest : public testing::Test +{ +protected: + BlockchainDBTest() : m_db(new T()) + { + for (auto& i : t_blocks) + { + block bl; + blobdata bd = h2b(i); + parse_and_validate_block_from_blob(bd, bl); + m_blocks.push_back(bl); + } + for (auto& i : t_transactions) + { + std::vector<transaction> txs; + for (auto& j : i) + { + transaction tx; + blobdata bd = h2b(j); + parse_and_validate_tx_from_blob(bd, tx); + txs.push_back(tx); + } + m_txs.push_back(txs); + } + } + + ~BlockchainDBTest() { + auto files = m_db->get_filenames(); + delete m_db; + remove_files(files); + } + + BlockchainDB* m_db; + std::string m_prefix; + std::vector<block> m_blocks; + std::vector<std::vector<transaction> > m_txs; + + void remove_files(const std::vector<std::string>& files) + { + // remove each file the db created, making sure it starts with fname. + for (auto& f : files) + { + if (boost::starts_with(f, m_prefix)) + { + boost::filesystem::remove(f); + } + else + { + std::cerr << "File created by test not to be removed (for safety): " << f << std::endl; + } + } + + // remove directory if it still exists + boost::filesystem::remove(m_prefix); + } + + void set_prefix(const std::string& prefix) + { + m_prefix = prefix; + } +}; + +using testing::Types; + +typedef Types<BlockchainLMDB> implementations; + +TYPED_TEST_CASE(BlockchainDBTest, implementations); + +TYPED_TEST(BlockchainDBTest, OpenAndClose) +{ + std::string fname(tmpnam(NULL)); + + this->set_prefix(fname); + + // make sure open does not throw + ASSERT_NO_THROW(this->m_db->open(fname)); + + // make sure open when already open DOES throw + ASSERT_THROW(this->m_db->open(fname), DB_OPEN_FAILURE); + + ASSERT_NO_THROW(this->m_db->close()); +} + +TYPED_TEST(BlockchainDBTest, AddBlock) +{ + std::string fname(tmpnam(NULL)); + + // make sure open does not throw + ASSERT_NO_THROW(this->m_db->open(fname)); + + // adding a block with no parent in the blockchain should throw. + // note: this shouldn't be possible, but is a good (and cheap) failsafe. + // + // TODO: need at least one more block to make this reasonable, as the + // BlockchainDB implementation should not check for parent if + // no blocks have been added yet (because genesis has no parent). + //ASSERT_THROW(this->m_db->add_block(this->m_blocks[1], t_sizes[1], t_diffs[1], t_coins[1], this->m_txs[1]), BLOCK_PARENT_DNE); + + ASSERT_NO_THROW(this->m_db->add_block(this->m_blocks[0], t_sizes[0], t_diffs[0], t_coins[0], this->m_txs[0])); + ASSERT_NO_THROW(this->m_db->add_block(this->m_blocks[1], t_sizes[1], t_diffs[1], t_coins[1], this->m_txs[1])); + + block b; + ASSERT_TRUE(this->m_db->block_exists(get_block_hash(this->m_blocks[0]))); + ASSERT_NO_THROW(b = this->m_db->get_block(get_block_hash(this->m_blocks[0]))); + + ASSERT_TRUE(compare_blocks(this->m_blocks[0], b)); + + ASSERT_NO_THROW(b = this->m_db->get_block_from_height(0)); + + ASSERT_TRUE(compare_blocks(this->m_blocks[0], b)); + + // assert that we can't add the same block twice + ASSERT_THROW(this->m_db->add_block(this->m_blocks[0], t_sizes[0], t_diffs[0], t_coins[0], this->m_txs[0]), BLOCK_EXISTS); + + for (auto& h : this->m_blocks[0].tx_hashes) + { + transaction tx; + ASSERT_TRUE(this->m_db->tx_exists(h)); + ASSERT_NO_THROW(tx = this->m_db->get_tx(h)); + + ASSERT_HASH_EQ(h, get_transaction_hash(tx)); + } +} + +TYPED_TEST(BlockchainDBTest, RetrieveBlockData) +{ + std::string fname(tmpnam(NULL)); + + // make sure open does not throw + ASSERT_NO_THROW(this->m_db->open(fname)); + + ASSERT_NO_THROW(this->m_db->add_block(this->m_blocks[0], t_sizes[0], t_diffs[0], t_coins[0], this->m_txs[0])); + + ASSERT_EQ(t_sizes[0], this->m_db->get_block_size(0)); + ASSERT_EQ(t_diffs[0], this->m_db->get_block_cumulative_difficulty(0)); + ASSERT_EQ(t_diffs[0], this->m_db->get_block_difficulty(0)); + ASSERT_EQ(t_coins[0], this->m_db->get_block_already_generated_coins(0)); + + ASSERT_NO_THROW(this->m_db->add_block(this->m_blocks[1], t_sizes[1], t_diffs[1], t_coins[1], this->m_txs[1])); + ASSERT_EQ(t_diffs[1] - t_diffs[0], this->m_db->get_block_difficulty(1)); + + ASSERT_HASH_EQ(get_block_hash(this->m_blocks[0]), this->m_db->get_block_hash_from_height(0)); + + std::vector<block> blks; + ASSERT_NO_THROW(blks = this->m_db->get_blocks_range(0, 1)); + ASSERT_EQ(2, blks.size()); + + ASSERT_HASH_EQ(get_block_hash(this->m_blocks[0]), get_block_hash(blks[0])); + ASSERT_HASH_EQ(get_block_hash(this->m_blocks[1]), get_block_hash(blks[1])); + + std::vector<crypto::hash> hashes; + ASSERT_NO_THROW(hashes = this->m_db->get_hashes_range(0, 1)); + ASSERT_EQ(2, hashes.size()); + + ASSERT_HASH_EQ(get_block_hash(this->m_blocks[0]), hashes[0]); + ASSERT_HASH_EQ(get_block_hash(this->m_blocks[1]), hashes[1]); +} + +} // anonymous namespace diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt index c480a312d..54f25431d 100644 --- a/tests/unit_tests/CMakeLists.txt +++ b/tests/unit_tests/CMakeLists.txt @@ -29,6 +29,7 @@ set(unit_tests_sources address_from_url.cpp base58.cpp + BlockchainDB.cpp block_reward.cpp chacha8.cpp checkpoints.cpp |