diff options
Diffstat (limited to '')
240 files changed, 42513 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 00000000..1bacbd6f --- /dev/null +++ b/AUTHORS @@ -0,0 +1,18 @@ + +Authors of LZMA Utils +--------------------- + +Igor Pavlov + * designed LZMA as an algorithm; + * wrote an implementation known as LZMA SDK, which is part of + the bigger 7-Zip project. + +Ville Koskinen + * wrote the first version of the gzip-like lzma command line + utility (C++) + * helped a lot with the documentation. + +Lasse Collin + * ported LZMA SDK to C and zlib-like API (liblzma); + * rewrote the command line tool again to use liblzma and pthreads. + diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..a17989dc --- /dev/null +++ b/COPYING @@ -0,0 +1,24 @@ + +LZMA Utils Licenses +------------------- + + Different licenses apply to different files in this package. Here + is a rough summary of which license apply to which parts of this + package (but check the individual files to be sure!): + - Everything under src/liblzma/check is public domain. + - Everything else under the src directory is under the GNU LGPL + 2.1 or (at your opinion) any later version. + - Outside the src directory, there are some files that are under + the GNU GPL 2 or (at your opinion) any later version, or under + the GNU GPL 3 or (at your opinion) any later version. + - Most documentation files are under an all-permissive license. + + The following license texts are included in the following files + in this package: + - COPYING.LGPLv2.1: GNU Lesser General Public License version 2.1 + - COPYING.GPLv2: GNU General Public License version 2 + - COPYING.GPLv3: GNU General Public License version 3 + + If you have questions, don't hesitate to ask the copyright holder(s) + for more information. + diff --git a/COPYING.GPLv2 b/COPYING.GPLv2 new file mode 100644 index 00000000..d511905c --- /dev/null +++ b/COPYING.GPLv2 @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/COPYING.GPLv3 b/COPYING.GPLv3 new file mode 100644 index 00000000..94a9ed02 --- /dev/null +++ b/COPYING.GPLv3 @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<http://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<http://www.gnu.org/philosophy/why-not-lgpl.html>. diff --git a/COPYING.LGPLv2.1 b/COPYING.LGPLv2.1 new file mode 100644 index 00000000..5ab7695a --- /dev/null +++ b/COPYING.LGPLv2.1 @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 00000000..c6de9baf --- /dev/null +++ b/ChangeLog @@ -0,0 +1,2 @@ +See the commit log in the git repository: +git://ctrl.tukaani.org/lzma-utils.git diff --git a/Doxyfile.in b/Doxyfile.in new file mode 100644 index 00000000..8ca611b6 --- /dev/null +++ b/Doxyfile.in @@ -0,0 +1,1229 @@ +# Doxyfile 1.4.7 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = "@PACKAGE_NAME@" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = "@PACKAGE_VERSION@" + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = doc + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, +# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese, +# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian, +# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, +# Swedish, and Ukrainian. + +OUTPUT_LANGUAGE = English + +# This tag can be used to specify the encoding used in the generated output. +# The encoding is not always determined by the language that is chosen, +# but also whether or not the output is meant for Windows or non-Windows users. +# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES +# forces the Windows encoding (this is the default for the Windows binary), +# whereas setting the tag to NO uses a Unix-style encoding (the default for +# all platforms other than Windows). + +USE_WINDOWS_ENCODING = NO + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = YES + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an +# explicit @brief command for a brief description. + +JAVADOC_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the DETAILS_AT_TOP tag is set to YES then Doxygen +# will output the detailed description near the top, like JavaDoc. +# If set to NO, the detailed description appears after the member +# documentation. + +DETAILS_AT_TOP = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = YES + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = NO + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from the +# version control system). Doxygen will invoke the program by executing (via +# popen()) the command <command> <input-file>, where <command> is the value of +# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = @top_srcdir@/src + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py + +FILE_PATTERNS = *.h *.c + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = YES + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command <filter> <input-file>, where <filter> +# is the value of the INPUT_FILTER tag, and <input-file> is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. Otherwise they will link to the documentstion. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, +# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are +# probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will +# generate a call dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then doxygen will +# generate a caller dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable caller graphs for selected +# functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_WIDTH = 1024 + +# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_HEIGHT = 1024 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that a graph may be further truncated if the graph's +# image dimensions are not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH +# and MAX_DOT_GRAPH_HEIGHT). If 0 is used for the depth value (the default), +# the graph is not depth-constrained. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, which results in a white background. +# Warning: Depending on the platform used, enabling this option may lead to +# badly anti-aliased labels on the edges of a graph (i.e. they become hard to +# read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 00000000..df3d07d9 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,38 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +SUBDIRS = + +if COND_GNULIB +SUBDIRS += lib +endif + +SUBDIRS += src po tests + + +EXTRA_DIST = \ + m4 \ + extra \ + config.rpath \ + Doxyfile.in \ + FAQ \ + INSTALL.generic \ + COPYING.GPLv2 \ + COPYING.GPLv3 \ + COPYING.LGPLv2.1 \ + README-liblzma \ + README-lzma + +ACLOCAL_AMFLAGS = -I m4 +AUTOMAKE_OPTIONS = foreign @@ -0,0 +1,151 @@ + +LZMA Utils +---------- + +Warning + + This is an early alpha version. Don't trust the files produced by + this version of the software - not even if the software can + uncompress the files properly! This is because the file format + isn't completely frozen yet. + + So please test a lot, but don't use for anything serious yet. + + +Overview + + LZMA is a general purporse compression algorithm designed by + Igor Pavlov as part of 7-Zip. It provides high compression ratio + while keeping the decompression speed fast. + + LZMA Utils are an attempt to make LZMA compression easy to use + on free (as in freedom) operating systems. This is achieved by + providing tools and libraries which are similar to use than the + equivalents of the most popular existing compression algorithms. + + LZMA Utils consist of a few relatively separate parts: + * liblzma is an encoder/decoder library with support for several + filters (algorithm implementations). The primary filter is LZMA. + * libzfile enables reading from and writing to gzip, bzip2 and + LZMA compressed and uncompressed files with an API similar to + the standard ANSI-C file I/O. + [ NOTE: libzfile is not implemented yet. ] + * lzma command line tool has almost identical syntax than gzip + and bzip2. It makes LZMA easy for average users, but also + provides advanced options to finetune the compression settings. + * A few shell scripts make diffing and grepping LZMA compressed + files easy. The scripts were adapted from gzip and bzip2. + + +Supported platforms + + LZMA Utils are developed on GNU+Linux, but they should work at + least on *BSDs and Solaris. They probably work on some other + POSIX-like operating systems too. + + If you use GCC to compile LZMA Utils, you need at least version + 3.x.x. GCC version 2.xx.x doesn't support some C99 features used + in LZMA Utils source code, thus GCC 2 won't compile LZMA Utils. + + If you have written patches to make LZMA Utils to work on previously + unsupported platform, please send the patches to me! I will consider + including them to the official version. It's nice to minimize the + need of third-party patching. + + One exception: Don't request or send patches to change the whole + source package to C89. I find C99 substantially nicer to write and + maintain. However, the public library headers must be in C89 to + avoid frustrating those who maintain programs, which are strictly + in C89 or C++. + + +configure options + + If you are not familiar with `configure' scripts, read the file + INSTALL first. + + In most cases, the default --enable/--disable/--with/--without options + are what you want. Don't touch them if you are unsure. + + --disable-encoder + Do not compile the encoder component of liblzma. This + implies --disable-match-finders. If you need only + the decoder, you can decrease the library size + dramatically with this option. + + The default is to build the encoder. + + --disable-decoder + Do not compile the decoder component of liblzma. + + The default is to build the decoder. + + --enable-filters= + liblzma supports several filters. See liblzma-intro.txt + for a little more information about these. + + The default is to build all the filters. + + --enable-match-finders= + liblzma includes two categories of match finders: + hash chains and binary trees. Hash chains (hc3 and hc4) + are quite fast but they don't provide the best compression + ratio. Binary trees (bt2, bt3 and bt4) give excellent + compression ratio, but they are slower and need more + memory than hash chains. + + You need to enable at least one match finder to build the + LZMA filter encoder. Usually hash chains are used only in + the fast mode, while binary trees are used to when the best + compression ratio is wanted. + + The default is to build all the match finders. + + --enable-checks= + liblzma support multiple integrity checks. CRC32 is + mandatory, and cannot be omitted. See liblzma-intro.txt + for more information about usage of the integrity checks. + + --disable-assembler + liblzma includes some assembler optimizations. Currently + there is only assembler code for CRC32 and CRC64 for + 32-bit x86. + + All the assembler code in liblzma is position-independent + code, which is suitable for use in shared libraries and + position-independent executables. + + --enable-small + Omits precomputed tables. This makes liblzma a few KiB + smaller. Startup time increases, because the tables need + to be computed first. + + --enable-debug + This enables the assert() macro and possibly some other + run-time consistency checks. It slows down things somewhat, + so you normally don't want to have this enabled. + + --enable-werror + Makes all compiler warnings an error, that abort the + compilation. This may help catching bugs, and should work + on most systems. This has no effect on the resulting + binaries. + + +Static vs. dynamic linking of the command line tools + + By default, the command line tools are linked statically against + liblzma. There a are a few reasons: + + - The executable(s) can be in /bin while the shared liblzma can still + be in /usr/lib (if the distro uses such file system hierachy). + + - It's easier to copy the executables to other systems, since they + depend only on libc. + + - It's slightly faster on some architectures like x86. + + If you don't like this, you can get the command line tools linked + against the shared liblzma by specifying --disable-static to configure. + This disables building static liblzma completely. + @@ -0,0 +1,23 @@ + +Thanks +------ + +Some people have helped more, some less, some don't even know they have +been helpful, but nevertheless everyone's help has been important. :-) +In alphabetical order: + - Mark Adler + - Anders F. Björklund + - Ä°smail Dönmez + - Jean-loup Gailly + - Per Øyvind Karlsen + - Ville Koskinen + - Jim Meyering + - Igor Pavlov + - Mikko Pouru + - Alexandre Sauvé + - Julian Seward + - Mohammed Adnène Trojette + +Also thanks to all the people who have participated the Tukaani project +and others who I have forgot. + @@ -0,0 +1,109 @@ + +LZMA Utils TODO List +-------------------- + +Major missing features + + Memory limits in the command line tool apply only to compression. + + Threading support in the lzma command line tool is still primitive. + It cannot split a file in pieces yet. + + The --list mode isn't implemented in the command line tool. + + Handling of Multi-Block Stream information should be separated + from Stream encoder and decoder. Those would be useful to implement + multi-threaded coding in applications. + + Buffer to buffer coding is not implemented in liblzma. Probably + a naive version should be written first, which would simply wrap + things around lzma_stream. Later, there should be separate buffer + coding functions, that are slightly faster (less memcpy()) and + have smaller memory usage than the functions using lzma_stream. + + libzfile is not implemented. + + LZMA filter doesn't support predefined history buffer. + + +Security + + Search for bugs, especially security related issues. Security is + important in every piece of code in LZMA Utils, but it is extremely + important in the decoder part of liblzma. + + Subblock: If there is LZMA as a Subfilter but without EOPM, can it + trigger infinite loop when Subblock's "Unset Subfilter" flag is hit? + + Similarly, can LZ decoder get stuck in infinite loop if the next + filter in the chain returns LZMA_STREAM_END but the decoded data + doesn't allow finishing the LZ decoding? + + +Reliability + + Create a test suite to be run with "make check". + + Should we use strlimit() and getrlimit() for memory usage limitting? + + +Performance + + Benchmark the CRC code on non-x86 CPUs. Won't have huge effect on + overall speed, but it would still be nice to know what algorithm + variant is the best on different CPUs. + + +Third party support + + Add support for LZMA to various applications. This naturally requires + cooperating with the authors of the specific applications. + * GNU grep and GNU diffutils: BSD grep already uses zlib directly + instead of ugly shell scripts. It would be nice to get similar + feature into relevant GNU tools. With libzfile, multiple + compression formats would be easy to support. + * kioslave for KDE + * Magic for the `file' command + * GNU Midnight Commander + * GNU Texinfo + * The `man' command + * Package managers + + Test the patches already written. The patches should be sent to + upstream developers _once_ LZMA Utils APIs are stable enough (so + people don't need to fix those patches all the time). + + Mandriva has quite a few patches. Some of them are OK, some need + adapting for new LZMA Utils. + + +Documentation + + Revise the man page of lzma command line tool. + + If the Doxygen docs aren't enough, write good Texinfo manual for + liblzma. It's been a long time I've even tried to build the Doxygen + docs, so they may look quite bad at the moment. + + Document LZMA as an algorithm. It would be great to have detailed + description of the algorithm in English. Many people think, that + reading the source code is not the optimal way to learn how LZMA + works. + + +Other + + Some things return LZMA_PROG_ERROR with invalid options, some + LZMA_HEADER_ERROR. These must be checked carefully and made so + that LZMA_HEADER_ERROR is used only when the given option could + make sense in future version of libzma. + + lzma_restrict vs. restrict + + Usage of LZMA_RUN vs. LZMA_FINISH with Metadata coders. + + Port the Deflate implementation from 7-Zip into liblzma. 7-Zip's + Deflate compresses better than zlib, gzip or Info-ZIP. I don't + know if Deflate will be included in .lzma format (probably not), + but it's still useful once we also add support for .gz file format. + diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 00000000..beddf735 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,38 @@ +#!/bin/sh + +set -e -x + +# autooint copies all kinds of crap even though we have told in +# configure.ac that we don't want the intl directory. It is able +# to omit the intl directory but still copies the m4 files needed +# only by the stuff in the non-existing intl directory. +autopoint -f +rm -f \ + codeset.m4 \ + glibc2.m4 \ + glibc21.m4 \ + intdiv0.m4 \ + intl.m4 \ + intldir.m4 \ + intmax.m4 \ + inttypes-pri.m4 \ + inttypes_h.m4 \ + lcmessage.m4 \ + lock.m4 \ + longdouble.m4 \ + longlong.m4 \ + printf-posix.m4 \ + size_max.m4 \ + stdint_h.m4 \ + uintmax_t.m4 \ + ulonglong.m4 \ + visibility.m4 \ + wchar_t.m4 \ + wint_t.m4 \ + xsize.m4 + +aclocal -I m4 +libtoolize -c -f || glibtoolize -c -f +autoconf +autoheader +automake -acf --foreign diff --git a/configure.ac b/configure.ac new file mode 100644 index 00000000..3f9ad531 --- /dev/null +++ b/configure.ac @@ -0,0 +1,611 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +############################################################################### +# +# Copyright (C) 2007 Lasse Collin +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +############################################################################### + +# NOTE: Don't add useless checks. autoscan detects this and that, but don't +# let it confuse you. For example, we don't care about checking for behavior +# of malloc(), stat(), or lstat(), since we don't use those functions in +# a way that would cause the problems the autoconf macros check. + +AC_PREREQ(2.61) + +# [LZMA] instead of [LZMA utils] since I prefer to have lzma-version.tar.gz +# instead of lzma-utils-version.tar.gz. +AC_INIT([LZMA], [4.42.2alpha], [lasse.collin@tukaani.org]) + +AC_CONFIG_SRCDIR([src/liblzma/common/common.h]) +AC_CONFIG_HEADER([config.h]) + +echo +echo "LZMA Utils $PACKAGE_VERSION" + +echo +echo "System type:" +# This is needed to know if assembler optimizations can be used. +AC_CANONICAL_HOST + +echo +echo "Configure options:" + +# Enable/disable debugging code: +AC_MSG_CHECKING([if debugging code should be compiled]) +AC_ARG_ENABLE(debug, AC_HELP_STRING([--enable-debug], [Enable debugging code.]), + [], enable_debug=no) +if test "x$enable_debug" = xyes; then + CFLAGS="-g $CFLAGS" + AC_MSG_RESULT([yes]) +else + AC_DEFINE(NDEBUG, 1, [Define to disable debugging code.]) + AC_MSG_RESULT([no]) +fi + +# Enable/disable the encoder components: +AC_MSG_CHECKING([if encoder components should be built]) +AC_ARG_ENABLE(encoder, AC_HELP_STRING([--disable-encoder], + [Do not build the encoder components.]), + [], enable_encoder=yes) +if test "x$enable_encoder" = xyes; then + AC_DEFINE([HAVE_ENCODER], 1, + [Define to 1 if encoder components are enabled.]) + AC_MSG_RESULT([yes]) +else + AC_MSG_RESULT([no]) +fi +AM_CONDITIONAL(COND_MAIN_ENCODER, test "x$enable_encoder" = xyes) + +# Enable/disable the decoder components: +AC_MSG_CHECKING([if decoder components should be built]) +AC_ARG_ENABLE(decoder, AC_HELP_STRING([--disable-decoder], + [Do not build the decoder components.]), + [], enable_decoder=yes) +if test "x$enable_decoder" = xyes; then + AC_DEFINE([HAVE_DECODER], 1, + [Define to 1 if decoder components are enabled.]) + AC_MSG_RESULT([yes]) +else + AC_MSG_RESULT([no]) + if test "x$enable_encoder" = xno; then + AC_MSG_ERROR([Do not disable both encoder and decoder.]) + fi +fi +AM_CONDITIONAL(COND_MAIN_DECODER, test "x$enable_decoder" = xyes) + +# Filters +AC_MSG_CHECKING([which filters to build]) +AC_ARG_ENABLE(filters, AC_HELP_STRING([--enable-filters=], + [Comma-separated list of filters to build. Default=all. + Filters used in encoding are needed also in decoding. + Available filters: copy subblock x86 powerpc ia64 + arm armthumb sparc delta lzma]), + [], [enable_filters=copy,subblock,x86,powerpc,ia64,arm,armthumb,sparc,delta,lzma]) +enable_filters=`echo "$enable_filters" | sed 's/,/ /g'` +enable_filters_copy=no +enable_filters_subblock=no +enable_filters_x86=no +enable_filters_powerpc=no +enable_filters_ia64=no +enable_filters_arm=no +enable_filters_armthumb=no +enable_filters_sparc=no +enable_filters_delta=no +enable_filters_lzma=no +enable_simple_filters=no +if test "x$enable_filters" = xno || test "x$enable_filters" = x; then + AC_MSG_RESULT([]) + AC_MSG_ERROR([Please enable at least one filter.]) +else + for arg in $enable_filters + do + case $arg in + copy) + enable_filters_copy=yes + AC_DEFINE([HAVE_FILTER_COPY], 1, + [Define to 1 if support for the + Copy filter is enabled.]) + ;; + subblock) + enable_filters_subblock=yes + AC_DEFINE([HAVE_FILTER_SUBBLOCK], 1, + [Define to 1 if support for the + Subblock filter is enabled.]) + ;; + x86) + enable_filters_x86=yes + enable_simple_filters=yes + AC_DEFINE([HAVE_FILTER_X86], 1, + [Define to 1 if support for the + x86 (BCJ) filter is enabled.]) + ;; + powerpc) + enable_filters_powerpc=yes + enable_simple_filters=yes + AC_DEFINE([HAVE_FILTER_POWERPC], 1, + [Define to 1 if support for the + PowerPC filter is enabled.]) + ;; + ia64) + enable_filters_ia64=yes + enable_simple_filters=yes + AC_DEFINE([HAVE_FILTER_IA64], 1, + [Define to 1 if support for the + IA64 filter is enabled.]) + ;; + arm) + enable_filters_arm=yes + enable_simple_filters=yes + AC_DEFINE([HAVE_FILTER_ARM], 1, + [Define to 1 if support for the + ARM filter is enabled.]) + ;; + armthumb) + enable_filters_armthumb=yes + enable_simple_filters=yes + AC_DEFINE([HAVE_FILTER_ARMTHUMB], 1, + [Define to 1 if support for the + ARMThumb filter is enabled.]) + ;; + sparc) + enable_filters_sparc=yes + enable_simple_filters=yes + AC_DEFINE([HAVE_FILTER_SPARC], 1, + [Define to 1 if support for the + SPARC filter is enabled.]) + ;; + delta) + enable_filters_delta=yes + AC_DEFINE([HAVE_FILTER_DELTA], 1, + [Define to 1 if support for the + Delta filter is enabled.]) + ;; + lzma) + enable_filters_lzma=yes + AC_DEFINE([HAVE_FILTER_LZMA], 1, + [Define to 1 if support for the + LZMA filter is enabled.]) + ;; + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([unknown filter: $arg]) + ;; + esac + done + AC_MSG_RESULT([$enable_filters]) +fi +if test "x$enable_simple_filters" = xyes ; then + AC_DEFINE([HAVE_FILTER_SIMPLE], 1, [Define to 1 if support for any + of the so called simple filters is enabled.]) +fi +AM_CONDITIONAL(COND_FILTER_COPY, test "x$enable_filters_copy" = xyes) +AM_CONDITIONAL(COND_FILTER_SUBBLOCK, test "x$enable_filters_subblock" = xyes) +AM_CONDITIONAL(COND_FILTER_X86, test "x$enable_filters_x86" = xyes) +AM_CONDITIONAL(COND_FILTER_POWERPC, test "x$enable_filters_powerpc" = xyes) +AM_CONDITIONAL(COND_FILTER_IA64, test "x$enable_filters_ia64" = xyes) +AM_CONDITIONAL(COND_FILTER_ARM, test "x$enable_filters_arm" = xyes) +AM_CONDITIONAL(COND_FILTER_ARMTHUMB, test "x$enable_filters_armthumb" = xyes) +AM_CONDITIONAL(COND_FILTER_SPARC, test "x$enable_filters_sparc" = xyes) +AM_CONDITIONAL(COND_FILTER_DELTA, test "x$enable_filters_delta" = xyes) +AM_CONDITIONAL(COND_FILTER_LZMA, test "x$enable_filters_lzma" = xyes) +AM_CONDITIONAL(COND_MAIN_SIMPLE, test "x$enable_simple_filters" = xyes) + +# Which match finders should be enabled: +AC_MSG_CHECKING([which match finders to build]) +AC_ARG_ENABLE(match-finders, AC_HELP_STRING([--enable-match-finders=], + [Comma-separated list of match finders to build. Default=all. + At least one match finder is required for encoding with + the LZMA filter. + Available match finders: hc3 hc4 bt2 bt3 bt4]), [], + [enable_match_finders=hc3,hc4,bt2,bt3,bt4]) +enable_match_finders=`echo "$enable_match_finders" | sed 's/,/ /g'` +enable_match_finders_hc3=no +enable_match_finders_hc4=no +enable_match_finders_bt2=no +enable_match_finders_bt3=no +enable_match_finders_bt4=no +if test "x$enable_encoder" = xyes && test "x$enable_filters_lzma" = xyes ; then + for arg in $enable_match_finders + do + case $arg in + hc3) enable_match_finders_hc3=yes ;; + hc4) enable_match_finders_hc4=yes ;; + bt2) enable_match_finders_bt2=yes ;; + bt3) enable_match_finders_bt3=yes ;; + bt4) enable_match_finders_bt4=yes ;; + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([unknown match finder: $arg]) + ;; + esac + done + AC_MSG_RESULT([$enable_match_finders]) +else + AC_MSG_RESULT([(none because not building the LZMA encoder)]) +fi +AM_CONDITIONAL(COND_MF_HC3, test "x$enable_match_finders_hc3" = xyes) +AM_CONDITIONAL(COND_MF_HC4, test "x$enable_match_finders_hc4" = xyes) +AM_CONDITIONAL(COND_MF_BT2, test "x$enable_match_finders_bt2" = xyes) +AM_CONDITIONAL(COND_MF_BT3, test "x$enable_match_finders_bt3" = xyes) +AM_CONDITIONAL(COND_MF_BT4, test "x$enable_match_finders_bt4" = xyes) + +# Which integrity checks to build +AC_MSG_CHECKING([which integrity checks to build]) +AC_ARG_ENABLE(checks, AC_HELP_STRING([--enable-checks=], + [Comma-separated list of integrity checks to build. + Default=all. Available integrity checks: crc32 crc64 sha256]), + [], [enable_checks=crc32,crc64,sha256]) +enable_checks=`echo "$enable_checks" | sed 's/,/ /g'` +enable_checks_crc32=no +enable_checks_crc64=no +enable_checks_sha256=no +if test "x$enable_checks" = xno || test "x$enable_checks" = x; then + AC_MSG_RESULT([(none)]) +else + for arg in $enable_checks + do + case $arg in + crc32) + enable_checks_crc32=yes + AC_DEFINE([HAVE_CHECK_CRC32], 1, + [Define to 1 if CRC32 support + is enabled.]) + ;; + crc64) + enable_checks_crc64=yes + AC_DEFINE([HAVE_CHECK_CRC64], 1, + [Define to 1 if CRC64 support + is enabled.]) + ;; + sha256) + enable_checks_sha256=yes + AC_DEFINE([HAVE_CHECK_SHA256], 1, + [Define to 1 if SHA256 support + is enabled.]) + ;; + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([unknown integrity check: $arg]) + ;; + esac + done + AC_MSG_RESULT([$enable_checks]) +fi +if test "x$enable_checks_crc32" = xno ; then + AC_MSG_ERROR([For now, the CRC32 check must always be enabled.]) +fi +AM_CONDITIONAL(COND_CHECK_CRC32, test "x$enable_checks_crc32" = xyes) +AM_CONDITIONAL(COND_CHECK_CRC64, test "x$enable_checks_crc64" = xyes) +AM_CONDITIONAL(COND_CHECK_SHA256, test "x$enable_checks_sha256" = xyes) + +# Assembler optimizations +AC_MSG_CHECKING([if assembler optimizations should be used]) +AC_ARG_ENABLE(assembler, AC_HELP_STRING([--disable-assembler], + [Do not use assembler optimizations even if such exist + for the architecture.]), + [], [enable_assembler=yes]) +if test "x$enable_assembler" = xyes; then + case $host_cpu in + i?86) enable_assembler=x86 ;; + *) enable_assembler=no ;; + esac +fi +case $enable_assembler in + x86|no) ;; + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([--enable-assembler accepts only \`yes', \`no', or \`x86'.]) + ;; +esac +AC_MSG_RESULT([$enable_assembler]) +AM_CONDITIONAL(COND_ASM_X86, test "x$enable_assembler" = xx86) + +# Size optimization +AC_MSG_CHECKING([if small size is preferred over speed]) +AC_ARG_ENABLE(small, AC_HELP_STRING([--enable-small], + [Omit precomputed tables to make liblzma a few kilobytes + smaller. This will increase startup time of applications + slightly, because the tables need to be computed first.]), + [], [enable_small=no]) +if test "x$enable_small" = xyes; then + AC_DEFINE([HAVE_SMALL], 1, [Define to 1 if optimizing for size.]) +elif test "x$enable_small" != xno; then + AC_MSG_RESULT([]) + AC_MSG_ERROR([--enable-small accepts only \`yes' or \`no']) +fi +AC_MSG_RESULT([$enable_small]) +AM_CONDITIONAL(COND_SMALL, test "x$enable_small" = xyes) + +echo +echo "Initializing Automake:" + +# There's no C++ or Fortran in LZMA Utils: +CXX=no +F77=no + +AM_INIT_AUTOMAKE + +AC_USE_SYSTEM_EXTENSIONS + +############################################################################### +# Checks for programs. +############################################################################### + +AM_PROG_CC_C_O +AM_PROG_AS +AC_PROG_LN_S + +echo +echo "Threading support:" +ACX_PTHREAD +CC="$PTHREAD_CC" + +echo +echo "Initializing Libtool:" +AC_PROG_LIBTOOL + + +############################################################################### +# Checks for libraries. +############################################################################### + +echo +echo "Initializing gettext:" +AM_GNU_GETTEXT_VERSION([0.16.1]) +AM_GNU_GETTEXT([external]) + +############################################################################### +# Checks for header files. +############################################################################### + +echo +echo "System headers and functions:" + +# There is currently no workarounds in this package if some of +# these headers are missing. +AC_CHECK_HEADERS([fcntl.h limits.h sys/time.h], + [], + [AC_MSG_ERROR([Required header file(s) are missing.])]) + +# If any of these headers are missing, things should still work correctly: +AC_CHECK_HEADERS([assert.h errno.h byteswap.h sys/param.h sys/sysctl.h], + [], [], [ +#ifdef HAVE_SYS_PARAM_H +# include <sys/param.h> +#endif +]) + + +############################################################################### +# Checks for typedefs, structures, and compiler characteristics. +############################################################################### + +AC_HEADER_STDBOOL +AC_C_INLINE +AC_C_RESTRICT + +# The command line tool can copy high resolution timestamps if such +# information is availabe in struct stat. Otherwise one second accuracy +# is used. Most systems seem to have st_xtim but BSDs have st_xtimespec. +AC_CHECK_MEMBERS([struct stat.st_atim.tv_nsec, struct stat.st_mtim.tv_nsec, + struct stat.st_atimespec.tv_nsec, struct stat.st_mtimespec.tv_nsec]) + +# It is very unlikely that you want to build liblzma without +# large file support. +AC_SYS_LARGEFILE + +# At the moment, the only endian-dependent part should be the integrity checks. +AC_C_BIGENDIAN + + +############################################################################### +# Checks for library functions. +############################################################################### + +# Gnulib replacements as needed +gl_GETOPT + +# Functions that are not mandatory i.e. we have alternatives for them +# or we can just drop some functionality: +AC_CHECK_FUNCS([memcpy memmove memset futimes futimesat]) + +# Check how to find out the amount of physical memory in the system. The +# lzma command line tool uses this to automatically limits its memory usage. +# - sysconf() gives all the needed info on GNU+Linux and Solaris. +# - BSDs use sysctl(). +AC_MSG_CHECKING([how to detect the amount of physical memory]) +AC_COMPILE_IFELSE([ +#include <unistd.h> +int +main() +{ + long i; + i = sysconf(_SC_PAGESIZE); + i = sysconf(_SC_PHYS_PAGES); + return 0; +} +], [ + AC_DEFINE([HAVE_PHYSMEM_SYSCONF], 1, + [Define to 1 if the amount of physical memory can be detected + with sysconf(_SC_PAGESIZE) and sysconf(_SC_PHYS_PAGES).]) + AC_MSG_RESULT([sysconf]) +], [ +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +#include <sys/types.h> +#ifdef HAVE_SYS_PARAM_H +# include <sys/param.h> +#endif +#include <sys/sysctl.h> +int +main() +{ + int name[2] = { CTL_HW, HW_PHYSMEM }; + unsigned long mem; + size_t mem_ptr_size = sizeof(mem); + sysctl(name, 2, &mem, &mem_ptr_size, NULL, NULL); + return 0; +} +]])], [ + AC_DEFINE([HAVE_PHYSMEM_SYSCTL], 1, + [Define to 1 if the amount of physical memory can be detected + with sysctl().]) + AC_MSG_RESULT([sysctl]) +], [ + AC_MSG_RESULT([unknown]) +])]) + +# Check how to find out the number of available CPU cores in the system. +# sysconf(_SC_NPROCESSORS_ONLN) works on most systems, except that BSDs +# use sysctl(). +AC_MSG_CHECKING([how to detect the number of available CPU cores]) +AC_COMPILE_IFELSE([ +#include <unistd.h> +int +main() +{ + long i; + i = sysconf(_SC_NPROCESSORS_ONLN); + return 0; +} +], [ + AC_DEFINE([HAVE_NCPU_SYSCONF], 1, + [Define to 1 if the number of available CPU cores can be + detected with sysconf(_SC_NPROCESSORS_ONLN).]) + AC_MSG_RESULT([sysconf]) +], [ +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +#include <sys/types.h> +#ifdef HAVE_SYS_PARAM_H +# include <sys/param.h> +#endif +#include <sys/sysctl.h> +int +main() +{ + int name[2] = { CTL_HW, HW_NCPU }; + int cpus; + size_t cpus_size = sizeof(cpus); + sysctl(name, 2, &cpus, &cpus_size, NULL, NULL); + return 0; +} +]])], [ + AC_DEFINE([HAVE_NCPU_SYSCTL], 1, + [Define to 1 if the number of available CPU cores can be + detected with sysctl().]) + AC_MSG_RESULT([sysctl]) +], [ + AC_MSG_RESULT([unknown]) +])]) + + +############################################################################### +# If using GCC, set some additional CFLAGS: +############################################################################### + +Wno_uninitialized=no + +if test -n "$GCC" ; then + echo + echo "GCC extensions:" + gl_VISIBILITY + if test -n "$CFLAG_VISIBILITY" ; then + CFLAGS="$CFLAG_VISIBILITY $CFLAGS" + fi + + # -Wno-uninitialized is needed with -Werror with SHA256 code + # to omit a bogus warning. + AC_MSG_CHECKING([if $CC accepts -Wno-uninitialized]) + OLD_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Wno-uninitialized" + AC_COMPILE_IFELSE([void foo(void) { }], [Wno_uninitialized=yes]) + CFLAGS="$OLD_CFLAGS" + AC_MSG_RESULT([$Wno_uninitialized]) + + # Enable as much warnings as possible. These commented warnings won't + # work for LZMA Utils though: + # * -Wunreachable-code breaks several assert(0) cases, which are + # backed up with "return LZMA_PROG_ERROR". + # * -Wcast-qual would break various things where we need a non-const + # pointer although we don't modify anything through it. + # * -Wcast-align breaks optimized CRC32 and CRC64 implementation + # on some architectures (not on x86), where this warning is bogus, + # because we take care of correct alignment. + for NEW_FLAG in -Wextra -Wformat=2 -Winit-self -Wstrict-aliasing=2 \ + -Wfloat-equal -Wshadow -Wunsafe-loop-optimizations \ + -Wpointer-arith -Wbad-function-cast -Wwrite-strings \ + -Waggregate-return -Wstrict-prototypes \ + -Wold-style-definition -Wmissing-prototypes \ + -Wmissing-declarations -Wmissing-noreturn \ + -Wredundant-decls -Winline -Wdisabled-optimization + do + AC_MSG_CHECKING([if $CC accepts $NEW_FLAG]) + OLD_CFLAGS="$CFLAGS" + CFLAGS="$NEW_FLAG $CFLAGS" + AC_COMPILE_IFELSE([void foo(void) { }], [ + AC_MSG_RESULT([yes]) + ], [ + CFLAGS="$OLD_CFLAGS" + AC_MSG_RESULT([no]) + ]) + done + + AC_ARG_ENABLE([werror], + AC_HELP_STRING([--enable-werror], [Enable -Werror to abort + compilation on all compiler warnings.]), + [], [enable_werror=no]) + if test "x$enable_werror" = "xyes"; then + CFLAGS="-Werror $CFLAGS" + fi + + # IIRC these work with all GCC versions that support -std=c99: + CFLAGS="-std=c99 -pedantic -Wall $CFLAGS" +fi + +AM_CONDITIONAL([COND_WNO_UNINITIALIZED], test "x$Wno_uninitialized" = "xyes") + + +############################################################################### +# Create the makefiles and config.h +############################################################################### + +echo + +# Don't build the lib directory at all if we don't need any replacement +# functions. +AM_CONDITIONAL([COND_GNULIB], test -n "$LIBOBJS") + +AC_CONFIG_FILES([ + Doxyfile + Makefile + po/Makefile.in + lib/Makefile + src/Makefile + src/liblzma/lzma.pc + src/liblzma/Makefile + src/liblzma/api/Makefile + src/liblzma/common/Makefile + src/liblzma/check/Makefile + src/liblzma/lz/Makefile + src/liblzma/lzma/Makefile + src/liblzma/simple/Makefile + src/liblzma/subblock/Makefile + src/liblzma/rangecoder/Makefile + src/lzma/Makefile + src/lzmadec/Makefile + src/scripts/Makefile + tests/Makefile +]) + +AC_OUTPUT diff --git a/doc/bugs.txt b/doc/bugs.txt new file mode 100644 index 00000000..55579343 --- /dev/null +++ b/doc/bugs.txt @@ -0,0 +1,46 @@ + +Reporting bugs +-------------- + + Naturally it is easiest for me if you already know what causes the + unexpected behavior. Even better if you have a patch to propose. + However, quite often the reason for unexpected behavior is unknown, + so below are a few things what to do before sending a bug report. + + In case of a crash (usually segmentation violation): + + 1. Try to create a small example how to reprocude the issue. + + 2. If you are writing an application using liblzma or libzfile, + double check that you are using the libraries correctly (for + example, that you didn't forget to call lzma_init()). If it is + the command line tool included in LZMA Utils that is crashing, + ignore this step. + + 3. Compile LZMA Utils with debugging code using configure switch + `--enable-debug'. If you are using GCC as the compiler, use + CFLAGS='-O0 -ggdb'. Don't strip the resulting binaries. + + 4. Turn on core dumps. The exact command depends on your shell; + for example in GNU bash it is done with `ulimit -c unlimited', + and in tcsh with `limit coredumpsize unlimited'. + + 5. Try to reproduce the suspected bug. If you get `assertion failed' + message, be sure to include the complete message in your bug + report. If the application leaves a coredump, get a backtrace + using gdb: + $ gdb /path/to/app-binary # Loads the app to the debugger. + (gdb) core core # Opens the coredump. + (gdb) bt # Prints the backtrace. Copy & paste to bug report. + (gdb) quit # Quits gdb. + + Send your bug report to Lasse Collin <lasse.collin@tukaani.org>. Don't + send the core dump file or the actual executables. If you have a small + example file(s) (total size less than 100 KiB), please include it/them + as an attachment. + + Do NOT complain about problems with LZMA Utils to Igor Pavlov. + Although the code of LZMA Utils is derived from his code, there are + a lot of changes, which may have introduced bugs not present in + the original version. + diff --git a/doc/faq.txt b/doc/faq.txt new file mode 100644 index 00000000..d01cf91b --- /dev/null +++ b/doc/faq.txt @@ -0,0 +1,247 @@ + +LZMA Utils FAQ +-------------- + + Copyright (C) 2007 Lasse Collin + + Copying and distribution of this file, with or without modification, + are permitted in any medium without royalty provided the copyright + notice and this notice are preserved. + + +Q: What are LZMA, LZMA Utils, lzma, .lzma, liblzma, LZMA SDK, LZMA_Alone, + 7-Zip and p7zip? + +A: LZMA stands for Lempel-Ziv-Markov chain-Algorithm. LZMA is the name + of the compression algorithm designed by Igor Pavlov. He is the author + of 7-Zip, which is a great LGPL'd compression tool for Microsoft + Windows operating systems. In addition to 7-Zip itself, also LZMA SDK + is available on the website of 7-Zip. LZMA SDK contains LZMA + implementations in C++, Java and C#. The C++ version is the original + implementation which is used also in 7-Zip itself. + + Excluding the unrar plugin, 7-Zip is free software (free as in + freedom). Thanks to this, it was possible to port it to POSIX + platforms. The port was done and is maintained by myspace (TODO: + myspace's real name?). p7zip is a port of 7-Zip's command line version; + p7zip doesn't include the 7-Zip's GUI. + + In POSIX world, users are used to gzip and bzip2 command line tools. + Developers know APIs of zlib and libbzip2. LZMA Utils try to ease + adoption of LZMA on free operating systems by providing a compression + library and a set of command line tools. The library is called liblzma. + It provides a zlib-like API making it easy to adapt LZMA compression in + existing applications. The main command line tool is known as lzma, + whose command line syntax is very similar to that of gzip and bzip2. + + The original command line tool from LZMA SDK (lzma.exe) was found from + a directory called LZMA_Alone in the LZMA SDK. It used a simple header + format in .lzma files. This format was also used by LZMA Utils up to + and including 4.32.x. In LZMA Utils documentation, LZMA_Alone refers + to both the file format and the command line tool from LZMA SDK. + + Because of various limitations of the LZMA_Alone file format, a new + file format was developed. Extending some existing format such as .gz + used by gzip was considered, but these formats were found to be too + limited. The filename suffix for the new .lzma format is `.lzma'. The + same suffix is also used for files in the LZMA_Alone format. To make + the transition to the new format as transparent as possible, LZMA Utils + support both the new and old formats transparently. + + 7-Zip and LZMA SDK: <http://7-zip.org/> + p7zip: <http://p7zip.sourceforge.net/> + LZMA Utils: <http://tukaani.org/lzma/> + + +Q: What LZMA implementations there are available? + +A: LZMA SDK contains implementations in C++, Java and C#. The C++ version + is the original implementation which is part of 7-Zip. LZMA SDK + contains also a small LZMA decoder in C. + + A port of LZMA SDK to Pascal was made by Alan Birtles + <http://www.birtles.org.uk/programming/>. It should work with + multiple Pascal programming language implementations. + + LZMA Utils includes liblzma, which is directly based on LZMA SDK. + liblzma is written in C (C99, not C89). In contrast to C++ callback + API used by LZMA SDK, liblzma uses zlib-like stateful C API. I do not + want to comment whether both/former/latter/neither API(s) are good or + bad. The only reason to implement a zlib-like API was, that many + developers are already familiar with zlib, and very many applications + already use zlib. Having a similar API makes it easier to include LZMA + support in existing applications. + + See also <http://en.wikipedia.org/wiki/LZMA#External_links>. + + +Q: Which file formats are supported by LZMA Utils? + +A: Even when the raw LZMA stream is always the same, it can be wrapped + in different container formats. The preferred format is the new .lzma + format. It has magic bytes (the first six bytes: 0xFF 'L' 'Z' 'M' + 'A' 0x00). The format supports chaining up to seven filters filters, + splitting data to multiple blocks for easier multi-threading and rough + random-access reading. The file integrity is verified using CRC32, + CRC64, or SHA256, and by verifying the uncompressed size of the file. + + LZMA SDK includes a tool called LZMA_Alone. It supports uses a + primitive header which includes only the mandatory stream information + required by the LZMA decoder. This format can be both read and + written by liblzma and the command line tool (use --format=alone to + create such files). + + .7z is the native archive format used by 7-Zip. This format is not + supported by liblzma, and probably will never be supported. You + should use e.g. p7zip to extract .7z files. + + It is possible to implement custom file formats by using raw filter + mode in liblzma. In this mode the application needs to store the filter + properties and provide them to liblzma before starting to uncompress + the data. + + +Q: How can I identify files containing LZMA compressed data? + +A: The preferred filename suffix for .lzma files is `.lzma'. `.tar.lzma' + may be abbreviated to `.tlz'. The same suffixes are used for files in + LZMA_Alone format. In practice this should be no problem since tools + included in LZMA Utils support both formats transparently. + + Checking the magic bytes is easy way to detect files in the new .lzma + format (the first six bytes: 0xFF 'L' 'Z' 'M' 'A' 0x00). The "file" + command version FIXME contains magic strings for this format. + + The old LZMA_Alone format has no magic bytes. Its header cannot contain + arbitrary bytes, thus it is possible to make a guess. Unfortunately the + guessing is usually too hard to be reliable, so don't try it unless you + are desperate. + + +Q: Does the lzma command line tool support sparse files? + +A: Sparse files can (of course) be compressed like normal files, but + uncompression will not restore sparseness of the file. Use an archiver + tool to take care of sparseness before compressing the data with lzma. + + The reason for this is that archiver tools handle files, while + compression tools handle streams or buffers. Being a sparse file is + a property of the file on the disk, not a property of the stream or + buffer. + + +Q: Can I recover parts of a broken LZMA file (e.g. corrupted CD-R)? + +A: With LZMA_Alone and single-block .lzma files, you can uncompress the + file until you hit the first broken byte. The data after the broken + position is lost. LZMA relies on the uncompression history, and if + bytes are missing in the middle of the file, it is impossible to + reliably continue after the broken section. + + With multi-block .lzma files it may be possible to locale the next + block in the file and continue decoding there. A limited recovery + tool for this kind of situations is planned. + + +Q: Is LZMA patented? + +A: No, the authors are not aware of any patents that could affect LZMA. + However, due to nature of software patents, the authors cannot + guarantee, that LZMA isn't affected by any third party patent. + + +Q: Where can I find documentation about how LZMA works as an algorithm? + +A: Read the source code, Luke. There is no documentation about LZMA + internals. It is possible that Igor Pavlov is the only person on + the Earth that completely knows and understands the algorithm. + + You could begin by downloading LZMA SDK, and start reading from + the LZMA decoder to get some idea about the bitstream format. + Before you begin, you should know the basics of LZ77 and + range coding algorithms. LZMA is based on LZ77, but LZMA is + *a lot* more complex. Range coding is used to compress the + final bitstream like Huffman coding is used in Deflate. + + +Q: What are filters? + +A: In context of .lzma files, a filter means an implementation of a + compression algorithm. The primary filter is LZMA, which is why + the names of the tools contain the letters LZMA. + + liblzma and the new .lzma format support also other filters than LZMA. + There are different types of filters, which are suitable for different + types of data. Thus, to select the optimal filter and settings, the + type of the input data being compressed needs to be known. + + Some filters are most useful when combined with another filter like + LZMA. These filters increase redundancy in the data, without changing + the size of the data, by taking advantage of properties specific to + the data being compressed. + + So far, all the filters are always reversible. That is, no matter what + data you pass to a filter encoder, it can be always defiltered back to + the original form. Because of this, it is safe to compress for example + a software package that contains other file types than executables + using a filter specific to the architechture of the package being + compressed. + + The old LZMA_Alone format supports only the LZMA filter. + + +Q: I cannot find BCJ and BCJ2 filters. Don't they exist in liblzma? + +A: BCJ filter is called "x86" in liblzma. BCJ2 is not included, + because it requires using more than one encoded output stream. + + +Q: Can I use LZMA in proprietary, non-free applications? + +A: liblzma is under the GNU LGPL version 2.1 or (at your opinion) any + later version. To summarise (*NOTE* This summary is not legally + binding, that is, it doesn't give you any extra permissions compared + to the LGPL. Read the GNU LGPL carefully for the exact license + conditions.): + * All the changes made into the library itself must be published + under the same license. + * End users must be able to replace the used liblzma. Easiest way + to assure this is to link dynamically against liblzma so users + can replace the shared library file if they want. + * You must make it clear to your users, that your application uses + liblzma, and that liblzma is free software under the GNU LGPL. + A copy of GNU LGPL must be included. + + LZMA SDK contains a special exception which allows linking *unmodified* + code statically with a non-free application. This exception does *not* + apply to liblzma. + + As an alternative, you can support the development of LZMA and 7-Zip + by buying a proprietary license from Igor Pavlov. See homepage of + LZMA SDK <http://7-zip.org/sdk.html> for more information. Note that + having a proprietary license from Igor Pavlov doesn't allow you to use + liblzma in a way that contradicts with the GNU LGPL, because liblzma + contains code that is not copyrighted by Igor Pavlov. Please contact + both Lasse Collin and Igor Pavlov if the license conditions of liblzma + are not suitable for you. + + +Q: I would like to help. What can I do? + +A: See the TODO file. Please contact Lasse Collin before starting to do + anything, because it is possible that someone else is already working + on the same thing. + + +Q: How can I contact the authors? + +A: Lasse Collin is the maintainer of LZMA Utils. You can contact him + either via IRC (Larhzu on #tukaani at Freenode or IRCnet). Email + should work too, <lasse.collin@tukaani.org>. + + Igor Pavlov is the father of LZMA. He is the author of 7-Zip + and LZMA SDK. <http://7-zip.org/> + + NOTE: Please don't bother Igor Pavlov with questions specific + to LZMA Utils. + diff --git a/doc/file-format.txt b/doc/file-format.txt new file mode 100644 index 00000000..4a90a67d --- /dev/null +++ b/doc/file-format.txt @@ -0,0 +1,1861 @@ + +The .lzma File Format +--------------------- + + 0. Preface + 0.1. Copyright Notices + 0.2. Changes + 1. Conventions + 1.1. Byte and Its Representation + 1.2. Multibyte Integers + 2. Stream + 2.1. Stream Types + 2.1.1. Single-Block Stream + 2.1.2. Multi-Block Stream + 2.2. Stream Header + 2.2.1. Header Magic Bytes + 2.2.2. Stream Flags + 2.2.3. CRC32 + 3. Block + 3.1. Block Header + 3.1.1. Block Flags + 3.1.2. Compressed Size + 3.1.3. Uncompressed Size + 3.1.4. List of Filter Flags + 3.1.4.1. Misc + 3.1.4.2. External ID + 3.1.4.3. External Size of Properties + 3.1.4.4. Filter Properties + 3.1.5. CRC32 + 3.1.6. Header Padding + 3.2. Compressed Data + 3.3. Block Footer + 3.3.1. Check + 3.3.2. Stream Footer + 3.3.2.1. Uncompressed Size + 3.3.2.2. Backward Size + 3.3.2.3. Stream Flags + 3.3.2.4. Footer Magic Bytes + 3.3.3. Footer Padding + 4. Filters + 4.1. Detecting when All Data Has Been Decoded + 4.1.1. With Uncompressed Size + 4.1.2. With End of Input + 4.1.3. With End of Payload Marker + 4.2. Alignment + 4.3. Filters + 4.3.1. Copy + 4.3.2. Subblock + 4.3.2.1. Format of the Encoded Output + 4.3.3. Delta + 4.3.3.1. Format of the Encoded Output + 4.3.4. LZMA + 4.3.4.1. LZMA Properties + 4.3.4.2. Dictionary Flags + 4.3.5. Branch/Call/Jump Filters for Executables + 5. Metadata + 5.1. Metadata Flags + 5.2. Size of Header Metadata Block + 5.3. Total Size + 5.4. Uncompressed Size + 5.5. Index + 5.5.1. Number of Data Blocks + 5.5.2. Total Sizes + 5.5.3. Uncompressed Sizes + 5.6. Extra + 5.6.1. 0x00: Dummy/Padding + 5.6.2. 0x01: OpenPGP Signature + 5.6.3. 0x02: Filter Information + 5.6.4. 0x03: Comment + 5.6.5. 0x04: List of Checks + 5.6.6. 0x05: Original Filename + 5.6.7. 0x07: Modification Time + 5.6.8. 0x09: High-Resolution Modification Time + 5.6.9. 0x0B: MIME Type + 5.6.10. 0x0D: Homepage URL + 6. Custom Filter and Extra Record IDs + 6.1. Reserved Custom Filter ID Ranges + 7. Cyclic Redundancy Checks + 8. References + 8.1. Normative References + 8.2. Informative References + + +0. Preface + + This document describes the .lzma file format (filename suffix + `.lzma', MIME type `application/x-lzma'). It is intended that + this format replace the format used by the LZMA_Alone tool + included in LZMA SDK up to and including version 4.43. + + IMPORTANT: The version described in this document is a + draft, NOT a final, official version. Changes + are possible. + + +0.1. Copyright Notices + + Copyright (C) 2006, 2007 Lasse Collin <lasse.collin@tukaani.org> + Copyright (C) 2006 Ville Koskinen <w-ber@iki.fi> + + Copying and distribution of this file, with or without + modification, are permitted in any medium without royalty + provided the copyright notice and this notice are preserved. + Modified versions must be marked as such. + + All source code examples given in this document are put into + the public domain by the authors of this document. + + Thanks for helping with this document goes to Igor Pavlov, + Mark Adler and Mikko Pouru. + + +0.2. Changes + + Last modified: 2007-12-02 22:40+0200 + + (A changelog will be kept once the first official version + is made.) + + +1. Conventions + + The keywords `must', `must not', `required', `should', + `should not', `recommended', `may', and `optional' in this + document are to be interpreted as described in [RFC-2119]. + These words are not capitalized in this document. + + Indicating a warning means displaying a message, returning + appropriate exit status, or something else to let the user + know that something worth warning occurred. The operation + should still finish if a warning is indicated. + + Indicating an error means displaying a message, returning + appropriate exit status, or something else to let the user + know that something prevented successfully finishing the + operation. The operation must be aborted once an error has + been indicated. + + +1.1. Byte and Its Representation + + In this document, byte is always 8 bits. + + A `nul byte' has all bits unset. That is, the value of a nul + byte is 0x00. + + To represent byte blocks, this document uses notation that + is similar to the notation used in [RFC-1952]: + + +-------+ + | Foo | One byte. + +-------+ + + +---+---+ + | Foo | Two bytes; that is, some of the vertical bars + +---+---+ can be missing. + + +=======+ + | Foo | Zero or more bytes. + +=======+ + + In this document, a boxed byte or a byte sequence declared + using this notation is called `a field'. The example field + above would be called called `the Foo field' or plain `Foo'. + + +1.2. Multibyte Integers + + Multibyte integers of static length, such as CRC values, + are stored in little endian byte order (least significant + byte first). + + When smaller values are more likely than bigger values (e.g. + file sizes), multibyte integers are encoded in a simple + variable-length representation: + - Numbers in the range [0, 127] are copied as is, and take + one byte of space. + - Bigger numbers will occupy two or more bytes. The lowest + seven bits of every byte are used for data; the highest + (eighth) bit indicates either that + 0) the byte is in the middle of the byte sequence, or + 1) the byte is the first or the last byte. + + For now, the value of the variable-length integers is limited + to 63 bits, which limits the encoded size of the integer to + nine bytes. These limits may be increased in future if needed. + + Note that the encoding is not as optimal as it could be. For + example, it is possible to encode the number 42 using any + number of bytes between one and nine. This is convenient + for non-streamed encoders, that write Compressed Size or + Uncompressed Size fields to the Block Header (see Section 3.1) + after the Compressed Data field is written to the disk. + + In several situations, the decoder needs to compare that two + fields contain identical information. When comparing fields + using the encoding described in this Section, the decoder must + consider two fields identical if their decoded values are + identical; it does not matter if the encoded variable-length + representations differ. + + The following C code illustrates encoding and decoding 63-bit + variables; the highest bit of uint64_t must be unset. The + functions return the number of bytes occupied by the integer + (1-9), or zero on error. + + #include <sys/types.h> + #include <inttypes.h> + + size_t + encode(uint8_t buf[static 9], uint64_t num) + { + if (num >= (UINT64_C(1) << (9 * 7))) + return 0; + if (num <= 0x7F) { + buf[0] = num; + return 1; + } + buf[0] = (num & 0x7F) | 0x80; + num >>= 7; + size_t i = 1; + while (num >= 0x80) { + buf[i++] = num & 0x7F; + num >>= 7; + } + buf[i++] = num | 0x80; + return i; + } + + size_t + decode(const uint8_t buf[], size_t size_max, uint64_t *num) + { + if (size_max == 0) + return 0; + if (size_max > 9) + size_max = 9; + *num = buf[0] & 0x7F; + if (!(buf[0] & 0x80)) + return 1; + size_t i = 1; + do { + if (i == size_max) + return 0; + *num |= (uint64_t)(buf[i] & 0x7F) << (7 * i); + } while (!(buf[i++] & 0x80)); + return i; + } + + size_t + decode_reverse(const uint8_t buf[], size_t size_max, + uint64_t *num) + { + if (size_max == 0) + return 0; + const size_t end = size_max > 9 ? size_max - 9 : 0; + size_t i = size_max - 1; + *num = buf[i] & 0x7F; + if (!(buf[i] & 0x80)) + return 1; + do { + if (i-- == end) + return 0; + *num <<= 7; + *num |= buf[i] & 0x7F; + } while (!(buf[i] & 0x80)); + return size_max - i; + } + + +2. Stream + + +========+========+========+ + | Stream | Stream | Stream | ... + +========+========+========+ + + A file contains usually only one Stream. However, it is + possible to concatenate multiple Streams together with no + additional processing. It is up to the implementation to + decide if the decoder will continue decoding from the next + Stream once the end of the first Stream has been reached. + + +2.1. Stream Types + + There are two types of Streams: Single-Block Streams and + Multi-Block Streams. Decoders conforming to this specification + must support at least Single-Block Streams. Supporting + Multi-Block Streams is optional. If the decoder supports only + Single-Block Streams, the documentation of the decoder should + mention this fact clearly. + + +2.1.1. Single-Block Stream + + +===============+============+ + | Stream Header | Data Block | + +===============+============+ + + As the name says, a Single-Block Stream has exactly one Block. + The Block must be a Data Block; Metadata Blocks are not allowed + in Single-Block Streams. + + +2.1.2. Multi-Block Stream + + +===============+=======================+ + | Stream Header | Header Metadata Block | + +===============+=======================+ + + +============+ +============+=======================+ + ---> | Data Block | ... | Data Block | Footer Metadata Block | + +============+ +============+=======================+ + + Notes: + - Stream Header is mandatory. + - Header Metadata Block is optional. + - Each Multi-Block Stream has at least one Data Block. The + maximum number of Data Blocks is not limited. + - Footer Metadata Block is mandatory. + + +2.2. Stream Header + + +---+---+---+---+---+---+--------------+--+--+--+--+ + | Header Magic Bytes | Stream Flags | CRC32 | + +---+---+---+---+---+---+--------------+--+--+--+--+ + + +2.2.1. Header Magic Bytes + + The first six (6) bytes of the Stream are so called Header + Magic Bytes. They can be used to identify the file type. + + Using a C array and ASCII: + const uint8_t HEADER_MAGIC[6] + = { 0xFF, 'L', 'Z', 'M', 'A', 0x00 }; + + In plain hexadecimal: + FF 4C 5A 4D 41 00 + + Notes: + - The first byte (0xFF) was chosen so that the files cannot + be erroneously detected as being in LZMA_Alone format, in + which the first byte is in the the range [0x00, 0xE0]. + - The sixth byte (0x00) was chosen to prevent applications + from misdetecting the file as a text file. + + +2.2.2. Stream Flags + + Bit(s) Mask Description + 0-2 0x07 Type of Check (see Section 3.3.1): + ID Size Check name + 0x00 0 bytes None + 0x01 4 bytes CRC32 + 0x02 4 bytes (Reserved) + 0x03 8 bytes CRC64 + 0x04 16 bytes (Reserved) + 0x05 32 bytes SHA-256 + 0x06 32 bytes (Reserved) + 0x07 64 bytes (Reserved) + 3 0x08 The CRC32 field is present in Block Headers. + 4 0x10 If unset, this is a Single-Block Stream; if set, + this is a Multi-Block Stream. + 5-7 0xE0 Reserved for future use; must be zero for now. + + Implementations must support at least the Check IDs 0x00 (None) + and 0x01 (CRC32). Supporting other Check IDs is optional. If an + unsupported Check is used, the decoder must indicate a warning + or error. + + If any reserved bit is set, the decoder must indicate an error. + It is possible that there is a new field present which the + decoder is not aware of, and can thus parse the Stream Header + incorrectly. + + +2.2.3. CRC32 + + The CRC32 is calculated from the Stream Flags field. It is + stored as an unsigned 32-bit little endian integer. If the + calculated value does not match the stored one, the decoder + must indicate an error. + + Note that this field is always present; the bit in Stream Flags + controls only presence of CRC32 in Block Headers. + + +3. Block + + +==============+=================+==============+ + | Block Header | Compressed Data | Block Footer | + +==============+=================+==============+ + + There are two types of Blocks: + - Data Blocks hold the actual compressed data. + - Metadata Blocks hold the Index, Extra, and a few other + non-data fields (see Section 5). + + The type of the Block is indicated by the corresponding bit + in the Block Flags field (see Section 3.1.1). + + +3.1. Block Header + + +------+------+=================+===================+ + | Block Flags | Compressed Size | Uncompressed Size | + +------+------+=================+===================+ + + +======================+--+--+--+--+================+ + ---> | List of Filter Flags | CRC32 | Header Padding | + +======================+--+--+--+--+================+ + + +3.1.1. Block Flags + + The first byte of the Block Flags field is a bit field: + + Bit(s) Mask Description + 0-2 0x07 Number of filters (0-7) + 3 0x08 Use End of Payload Marker (even if + Uncompressed Size is stored to Block Header). + 4 0x10 The Compressed Size field is present. + 5 0x20 The Uncompressed Size field is present. + 6 0x40 Reserved for future use; must be zero for now. + 7 0x80 This is a Metadata Block. + + The second byte of the Block Flags field is also a bit field: + + Bit(s) Mask Description + 0-4 0x1F Size of the Header Padding field (0-31 bytes) + 5-7 0xE0 Reserved for future use; must be zero for now. + + The decoder must indicate an error if End of Payload Marker + is not used and Uncompressed Size is not stored to the Block + Header. Because of this, the first byte of Block Flags can + never be a nul byte. This is useful when detecting beginning + of the Block after Footer Padding (see Section 3.3.3). + + If any reserved bit is set, the decoder must indicate an error. + It is possible that there is a new field present which the + decoder is not aware of, and can thus parse the Block Header + incorrectly. + + +3.1.2. Compressed Size + + This field is present only if the appropriate bit is set in + the Block Flags field (see Section 3.1.1). + + This field contains the size of the Compressed Data field. + The size is stored using the encoding described in Section 1.2. + If the Compressed Size does not match the real size of the + Compressed Data field, the decoder must indicate an error. + + Having the Compressed Size field in the Block Header can be + useful for multithreaded decoding when seeking is not possible. + If the Blocks are small enough, the decoder can read multiple + Blocks into its internal buffer, and decode the Blocks in + parallel. + + Compressed Size can also be useful when seeking forwards to + a specific location in streamed mode: the decoder can quickly + skip over irrelevant Blocks, without decoding them. + + +3.1.3. Uncompressed Size + + This field is present only if the appropriate bit is set in + the Block Flags field (see Section 3.1.1). + + The Uncompressed Size field contains the size of the Block + after uncompressing. + + Storing Uncompressed Size serves several purposes: + - The decoder will know when all of the data has been + decoded without an explicit End of Payload Marker. + - The decoder knows how much memory it needs to allocate + for a temporary buffer in multithreaded mode. + - Simple error detection: wrong size indicates a broken file. + - Sometimes it is useful to know the file size without + uncompressing the file. + + It should be noted that the only reliable way to find out what + the real uncompressed size is is to uncompress the Block, + because the Block Header and Metadata Block fields may contain + (intentionally or unintentionally) invalid information. + + Uncompressed Size is stored using the encoding described in + Section 1.2. If the Uncompressed Size does not match the + real uncompressed size, the decoder must indicate an error. + + +3.1.4. List of Filter Flags + + +================+================+ +================+ + | Filter 0 Flags | Filter 1 Flags | ... | Filter n Flags | + +================+================+ +================+ + + The number of Filter Flags fields is stored in the Block Flags + field (see Section 3.1.1). As a special case, if the number of + Filter Flags fields is zero, it is equivalent to having the + Copy filter as the only filter. + + The format of each Filter Flags field is as follows: + + +------+=============+=============================+ + | Misc | External ID | External Size of Properties | + +------+=============+=============================+ + + +===================+ + ---> | Filter Properties | + +===================+ + + The list of officially defined Filter IDs and the formats of + their Filter Properties are described in Section 4.3. + + +3.1.4.1. Misc + + To save space, the most commonly used Filter IDs and the + Size of Filter Properties are encoded in a single byte. + Depending on the contents of the Misc field, Filter ID is + the value of the Misc or External ID field. + + Value Filter ID Size of Filter Properties + 0x00 - 0x1F Misc 0 bytes + 0x20 - 0x3F Misc 1 byte + 0x40 - 0x5F Misc 2 bytes + 0x60 - 0x7F Misc 3 bytes + 0x80 - 0x9F Misc 4 bytes + 0xA0 - 0xBF Misc 5 bytes + 0xC0 - 0xDF Misc 6 bytes + 0xE0 - 0xFE External ID 0-30 bytes + 0xFF External ID External Size of Properties + + The following code demonstrates parsing the Misc field and, + when needed, the External ID and External Size of Properties + fields. + + uint64_t id; + uint64_t properties_size; + uint8_t misc = read_byte(); + + if (misc >= 0xE0) { + id = read_variable_length_integer(); + + if (misc == 0xFF) + properties_size = read_variable_length_integer(); + else + properties_size = misc - 0xE0; + + } else { + id = misc; + properties_size = misc / 0x20; + } + + +3.1.4.2. External ID + + This field is present only if the Misc field contains a value + that indicates usage of External ID. The External ID is stored + using the encoding described in Section 1.2. + + +3.1.4.3. External Size of Properties + + This field is present only if the Misc field contains a value + that indicates usage of External Size of Properties. The size + of Filter Properties is stored using the encoding described in + Section 1.2. + + +3.1.4.4. Filter Properties + + Size of this field depends on the Misc field (Section 3.1.4.1) + and, if present, External Size of Properties field (Section + 3.1.4.3). The format of this field is depends on the selected + filter; see Section 4.3 for details. + + +3.1.5. CRC32 + + This field is present only if the appropriate bit is set in + the Stream Flags field (see Section 2.2.2). + + The CRC32 is calculated over everything in the Block Header + field except the Header Padding field and the CRC32 field + itself. It is stored as an unsigned 32-bit little endian + integer. If the calculated value does not match the stored + one, the decoder must indicate an error. + + +3.1.6. Header Padding + + This field contains as many nul bytes as indicated by the value + stored in the Header Flags field. If the Header Padding field + contains any non-nul bytes, the decoder must indicate an error. + + The intent of the Header Padding field is to allow alignment + of Compressed Data. The usefulness of alignment is described + in Section 4.3. + + +3.2. Compressed Data + + The format of Compressed Data depends on Block Flags and List + of Filter Flags. Excluding the descriptions of the simplest + filters in Section 4, the format of the filter-specific encoded + data is out of scope of this document. + + Note a special case: if End of Payload Marker (see Section + 3.1.1) is not used and Uncompressed Size is zero, the size + of the Compressed Data field is always zero. + + +3.3. Block Footer + + +=======+===============+================+ + | Check | Stream Footer | Footer Padding | + +=======+===============+================+ + + +3.3.1. Check + + The type and size of the Check field depends on which bits + are set in the Stream Flags field (see Section 2.2.2). + + The Check, when used, is calculated from the original + uncompressed data. If the calculated Check does not match the + stored one, the decoder must indicate an error. If the selected + type of Check is not supported by the decoder, it must indicate + a warning or error. + + +3.3.2. Stream Footer + + +===================+===============+--------------+ + | Uncompressed Size | Backward Size | Stream Flags | + +===================+===============+--------------+ + + +----------+---------+ + ---> | Footer Magic Bytes | + +----------+---------+ + + Stream Footer is present only in + - Data Block of a Single-Block Stream; and + - Footer Metadata Block of a Multi-Block Stream. + + The Stream Footer field is placed inside Block Footer, because + no padding is allowed between Check and Stream Footer. + + +3.3.2.1. Uncompressed Size + + This field is present only in the Data Block of a Single-Block + Stream if Uncompressed Size is not stored to the Block Header + (see Section 3.1.1). Without the Uncompressed Size field in + Stream Footer it would not be possible to quickly find out + the Uncompressed Size of the Stream in all cases. + + Uncompressed Size is stored using the encoding described in + Section 1.2. If the stored value does not match the real + uncompressed size of the Single-Block Stream, the decoder must + indicate an error. + + +3.3.2.2. Backward Size + + This field contains the total size of the Block Header, + Compressed Data, Check, and Uncompressed Size fields. The + value is stored using the encoding described in Section 1.2. + If the Backward Size does not match the real total size of + the appropriate fields, the decoder must indicate an error. + + Implementations reading the Stream backwards should notice + that the value in this field can never be zero. + + +3.3.2.3. Stream Flags + + This is a copy of the Stream Flags field from the Stream + Header. The information stored to Stream Flags is needed + when parsing the Stream backwards. + + +3.3.2.4. Footer Magic Bytes + + As the last step of the decoding process, the decoder must + verify the existence of Footer Magic Bytes. If they are not + found, an error must be indicated. + + Using a C array and ASCII: + const uint8_t FOOTER_MAGIC[2] = { 'Y', 'Z' }; + + In hexadecimal: + 59 5A + + The primary reason to have Footer Magic Bytes is to make + it easier to detect incomplete files quickly, without + uncompressing. If the file does not end with Footer Magic Bytes + (excluding Footer Padding described in Section 3.3.3), it + cannot be undamaged, unless someone has intentionally appended + garbage after the end of the Stream. (Appending garbage at the + end of the file does not prevent uncompressing the file, but + may give a warning or error depending on the decoder + implementation.) + + +3.3.3. Footer Padding + + In certain situations it is convenient to be able to pad + Blocks or Streams to be multiples of, for example, 512 bytes. + Footer Padding makes this possible. Note that this is in no + way required to enforce alignment in the way described in + Section 4.3; the Header Padding field is enough for that. + + When Footer Padding is used, it must contain only nul bytes. + Any non-nul byte should be considered as the beginning of + a new Block or Stream. + + The possibility of Padding should be taken into account when + designing an application that wants to find out information + about a Stream by parsing Footer Metadata Block. + + Support for Padding was inspired by a related note in + [GNU-tar]. + + +4. Filters + + The Block Flags field defines how many filters are used. When + more than one filter is used, the filters are chained; that is, + the output of one filter is the input of another filter. The + following figure illustrates the direction of data flow. + + v Uncompressed Data ^ + | Filter 0 | + Encoder | Filter 1 | Decoder + | ... | + | Filter n | + v Compressed Data ^ + + The filters are independent from each other, except that they + must cooperate a little to make it possible, in all cases, to + detect when all of the data has been decoded. In addition, the + filters should cooperate in the encoder to keep the alignment + optimal. + + +4.1. Detecting when All Data Has Been Decoded + + There must be a way for the decoder to detect when all of the + Compressed Data has been decoded. This is simple when only + one filter is used, but a bit more complex when multiple + filters are chained. + + This file format supports three methods to detect when all of + the data has been decoded: + - Uncompressed size + - End of Input + - End of Payload Marker + + In both encoder and decoder, filters are initialized starting + from the first filter in the chain. For each filter, one of + these three methods is used. + + +4.1.1. With Uncompressed Size + + This method is the only method supported by all filters. + It must be used when uncompressed size is known by the + filter-specific encoder or decoder. In practice this means + that Uncompressed Size has been stored to the Block Header. + + In case of the first filter in the chain, the uncompressed size + given to the filter-specific encoder or decoder equals the + Uncompressed Size stored in the Block Header. For the rest of + the filters in the chain, uncompressed size is the size of the + output data of the previous filter in the chain. + + Note that when Use End of Payload Marker bit is set in Block + Flags, Uncompressed Size is considered to be unknown even if + it was present in the Block Header. Thus, if End of Payload + Marker is used, uncompressed size of all of the filters in + the chain is unknown, and can never be used to detect when + all of the data has been decoded. + + Once the correct number of bytes has been written out, the + filter-specific decoder indicates to its caller that all of + the data has been decoded. If the filter-specific decoder + detects End of Input or End of Payload Marker before the + correct number of bytes is decoded, the decoder must indicate + an error. + + +4.1.2. With End of Input + + Most filters will know that all of the data has been decoded + when the End of Input data has been reached. Once the filter + knows that it has received the input data in its entirety, + it finishes its job, and indicates to its caller that all of + the data has been decoded. The filter-specific decoder must + indicate an error if it detects End of Payload Marker. + + Note that this method can work only when the filter is not + the last filter in the chain, because only another filter + can indicate the End of Input data. In practice this means, + that a filter later in the chain must support embedding + End of Payload Marker. + + When a filter that cannot embed End of Payload Marker is the + last filter in the chain, Subblock filter is appended to the + chain as an implicit filter. In the simplest case, this occurs + when no filters are specified, and Uncompressed Size is unknown + or the End of Payload Marker bit is set in Block Flags. + + +4.1.3. With End of Payload Marker + + End of Payload Marker is a filter-specific bit sequence that + indicates the end of data. It is supported by only a few + filters. It is used when uncompressed size is unknown, and + the filter + - doesn't support End of Input; or + - is the last filter in the chain. + + End of Payload Marker is embedded at the end of the encoded + data by the filter-specific encoder. When the filter-specific + decoder detects the embedded End of Payload Marker, the decoder + knows that all of the data has been decoded. Then it finishes + its job, and indicates to its caller that all of the data has + been decoded. If the filter-specific decoder detects End of + Input before End of Payload Marker, the decoder must indicate + an error. + + If the filter supports both End of Input and End of Payload + Marker, the former is used, unless the filter is the last + filter in the chain. + + +4.2. Alignment + + Some filters give better compression ratio or are faster + when the input or output data is aligned. For optimal results, + the encoder should try to enforce proper alignment when + possible. Not enforcing alignment in the encoder is not + an error. Thus, the decoder must be able to handle files with + suboptimal alignment. + + Alignment of uncompressed input data is usually the job of + the application producing the data. For example, to get the + best results, an archiver tool should make sure that all + PowerPC executable files in the archive stream start at + offsets that are multiples of four bytes. + + Some filters, for example LZMA, can be configured to take + advantage of specified alignment of input data. Note that + taking advantage of aligned input can be benefical also when + a filter is not the first filter in the chain. For example, + if you compress PowerPC executables, you may want to use the + PowerPC filter and chain that with the LZMA filter. Because not + only the input but also the output alignment of the PowerPC + filter is four bytes, it is now benefical to set LZMA settings + so that the LZMA encoder can take advantage of its + four-byte-aligned input data. + + The output of the last filter in the chain is stored to the + Compressed Data field. Aligning Compressed Data appropriately + can increase + - speed, if the filtered data is handled multiple bytes at + a time by the filter-specific encoder and decoder, + because accessing aligned data in computer memory is + usually faster; and + - compression ratio, if the output data is later compressed + with an external compression tool. + + Compressed Data in a Stream can be aligned by using the Header + Padding field in the Block Header. + + +4.3. Filters + +4.3.1. Copy + + This is a dummy filter that simply copies all data from input + to output unmodified. + + Filter ID: 0x00 + Size of Filter Properties: 0 bytes + Changes size of data: No + + Detecting when all of the data has been decoded: + Uncompressed size: Yes + End of Payload Marker: No + End of Input: Yes + + Preferred alignment: + Input data: 1 byte + Output data: 1 byte + + +4.3.2. Subblock + + The Subblock filter can be used to + - embed End of Payload Marker when the otherwise last + filter in the chain does not support embedding it; and + - apply additional filters in the middle of a Block. + + Filter ID: 0x01 + Size of Filter Properties: 0 bytes + Changes size of data: Yes, unpredictably + + Detecting when all of the data has been decoded: + Uncompressed size: Yes + End of Payload Marker: Yes + End of Input: Yes + + Preferred alignment: + Input data: 1 byte + Output data: Freely adjustable + + +4.3.2.1. Format of the Encoded Output + + The encoded data from the Subblock filter consist of zero or + more Subblocks: + + +==========+==========+ + | Subblock | Subblock | ... + +==========+==========+ + + Each Subblock contains two fields: + + +----------------+===============+ + | Subblock Flags | Subblock Data | + +----------------+===============+ + + Subblock Flags is a bitfield: + + Bits Mask Description + 0-3 0x0F The interpretation of these bits depend on + the Subblock Type: + - 0x20 Bits 0-3 for Size + - 0x30 Bits 0-3 for Repeat Count + - Other These bits must be zero. + 4-7 0xF0 Subblock Type: + - 0x00: Padding + - 0x10: End of Payload Marker + - 0x20: Data + - 0x30: Repeating Data + - 0x40: Set Subfilter + - 0x50: Unset Subfilter + If some other value is detected, the decoder + must indicate an error. + + The format of the Subblock Data field depends on Subblock Type. + + Subblocks with the Subblock Type 0x00 (Padding) don't have a + Subblock Data field. These Subblocks can be useful for fixing + alignment. There can be at maximum of 31 consecutive Subblocks + with this Subblock Type; if there are more, the decoder must + indicate an error. + + Subblock with the Subblock Type 0x10 (End of Payload Marker) + doesn't have a Subblock Data field. The decoder must indicate + an error if this Subblock Type is detected when Subfilter is + enabled, or when the Subblock filter is not supposed to embed + the End of Payload Marker. + + Subblocks with the Subblock Type 0x20 (Data) contain the rest + of the Size, which is followed by Size + 1 bytes in the Data + field (that is, Data can never be empty): + + +------+------+------+======+ + | Bits 4-27 for Size | Data | + +------+------+------+======+ + + Subblocks with the Subblock Type 0x30 (Repeating Data) contain + the rest of the Repeat Count, the Size of the Data, and finally + the actual Data to be repeated: + + +---------+---------+--------+------+======+ + | Bits 4-27 for Repeat Count | Size | Data | + +---------+---------+--------+------+======+ + + The size of the Data field is Size + 1. It is repeated Repeat + Count + 1 times. That is, the minimum size of Data is one byte; + the maximum size of Data is 256 bytes. The minimum number of + repeats is one; the maximum number of repeats is 2^28. + + If Subfilter is not used, the Data field of Subblock Types 0x20 + and 0x30 is the output of the decoded Subblock filter. If + Subfilter is used, Data is the input of the Subfilter, and the + decoded output of the Subfilter is the decoded output of the + Subblock filter. + + Subblocks with the Subblock Type 0x40 (Set Subfilter) contain + a Filter Flags field in Subblock Data: + + +==============+ + | Filter Flags | + +==============+ + + It is an error to set the Subfilter to Filter ID 0x00 (Copy) + or 0x01 (Subblock). All the other Filter IDs are allowed. + The decoder must indicate an error if this Subblock Type is + detected when a Subfilter is already enabled. + + Subblocks with the Subblock Type 0x50 (Unset Subfilter) don't + have a Subblock Data field. There must be at least one Subblock + with Subblock Type 0x20 or 0x30 between Subblocks with Subblock + Type 0x40 and 0x50; if there isn't, the decoder must indicate + an error. + + Subblock Types 0x40 and 0x50 are always used as a pair: If the + Subblock filter has been enabled with Subblock Type 0x40, it + must always be disabled later with Subblock Type 0x50. + Disabling must be done even if the Subfilter used End of + Payload Marker; after the Subfilter has detected End of Payload + Marker, the next Subblock that is not Padding must unset the + Subfilter. + + When the Subblock filter is used as an implicit filter to embed + End of Payload marker, the Subblock Types 0x40 and 0x50 (Set or + Unset Subfilter) must not be used. The decoder must indicate an + error if it detects any of these Subblock Types in an implicit + Subblock filter. + + The following code illustrates the basic structure of a + Subblock decoder. + + uint32_t consecutive_padding = 0; + bool got_output_with_subfilter = false; + + while (true) { + uint32_t size; + uint32_t repeat; + uint8_t flags = read_byte(); + + if (flags != 0) + consecutive_padding = 0; + + switch (flags >> 4) { + case 0: + // Padding + if (flags & 0x0F) + return DATA_ERROR; + if (++consecutive_padding == 32) + return DATA_ERROR; + break; + + case 1: + // End of Payload Marker + if (flags & 0x0F) + return DATA_ERROR; + if (subfilter_enabled || !allow_eopm) + return DATA_ERROR; + break; + + case 2: + // Data + size = flags & 0x0F; + for (size_t i = 4; i < 28; i += 8) + size |= (uint32_t)(read_byte()) << i; + + // If any output is produced, this will + // set got_output_with_subfilter to true. + copy_data(size); + break; + + case 3: + // Repeating Data + repeat = flags & 0x0F; + for (size_t i = 4; i < 28; i += 8) + repeat |= (uint32_t)(read_byte()) << i; + size = read_byte(); + + // If any output is produced, this will + // set got_output_with_subfilter to true. + copy_repeating_data(size, repeat); + break; + + case 4: + // Set Subfilter + if (flags & 0x0F) + return DATA_ERROR; + if (subfilter_enabled) + return DATA_ERROR; + got_output_with_subfilter = false; + set_subfilter(); + break; + + case 5: + // Unset Subfilter + if (flags & 0x0F) + return DATA_ERROR; + if (!subfilter_enabled) + return DATA_ERROR; + if (!got_output_with_subfilter) + return DATA_ERROR; + unset_subfilter(); + break; + + default: + return DATA_ERROR; + } + } + + +4.3.3. Delta + + The Delta filter may increase compression ratio when the value + of the next byte correlates with the value of an earlier byte + at specified distance. + + Filter ID: 0x20 + Size of Filter Properties: 1 byte + Changes size of data: No + + Detecting when all of the data has been decoded: + Uncompressed size: Yes + End of Payload Marker: No + End of Input: Yes + + Preferred alignment: + Input data: 1 byte + Output data: Same as the original input data + + The Properties byte indicates the delta distance, which can be + 1-256 bytes backwards from the current byte: 0x00 indicates + distance of 1 byte and 0xFF distance of 256 bytes. + + +4.3.3.1. Format of the Encoded Output + + The code below illustrates both encoding and decoding with + the Delta filter. + + // Distance is in the range [1, 256]. + const unsigned int distance = get_properties_byte() + 1; + uint8_t pos = 0; + uint8_t delta[256]; + + memset(delta, 0, sizeof(delta)); + + while (1) { + const int byte = read_byte(); + if (byte == EOF) + break; + + uint8_t tmp = delta[(uint8_t)(distance + pos)]; + if (is_encoder) { + tmp = (uint8_t)(byte) - tmp; + delta[pos] = (uint8_t)(byte); + } else { + tmp = (uint8_t)(byte) + tmp; + delta[pos] = tmp; + } + + write_byte(tmp); + --pos; + } + + +4.3.4. LZMA + + LZMA (Lempel-Ziv-Markov chain-Algorithm) is a general-purporse + compression algorithm with high compression ratio and fast + decompression. LZMA based on LZ77 and range coding algorithms. + + Filter ID: 0x40 + Size of Filter Properties: 2 bytes + Changes size of data: Yes, unpredictably + + Detecting when all of the data has been decoded: + Uncompressed size: Yes + End of Payload Marker: Yes + End of Input: No + + Preferred alignment: + Input data: Adjustable to 1/2/4/8/16 byte(s) + Output data: 1 byte + + At the time of writing, there is no other documentation about + how LZMA works than the source code in LZMA SDK. Once such + documentation gets written, it will probably be published as + a separate document, because including the documentation here + would lengthen this document considerably. + + The format of the Filter Properties field is as follows: + + +-----------------+------------------+ + | LZMA Properties | Dictionary Flags | + +-----------------+------------------+ + + +4.3.4.1. LZMA Properties + + The LZMA Properties bits contain three properties. An + abbreviation is given in parentheses, followed by the value + range of the property. The field consists of + + 1) the number of literal context bits (lc, [0, 8]); + 2) the number of literal position bits (lp, [0, 4]); and + 3) the number of position bits (pb, [0, 4]). + + They are encoded using the following formula: + + LZMA Properties = (pb * 5 + lp) * 9 + lc + + The following C code illustrates a straightforward way to + decode the properties: + + uint8_t lc, lp, pb; + uint8_t prop = get_lzma_properties() & 0xFF; + if (prop > (4 * 5 + 4) * 9 + 8) + return LZMA_PROPERTIES_ERROR; + + pb = prop / (9 * 5); + prop -= pb * 9 * 5; + lp = prop / 9; + lc = prop - lp * 9; + + +4.3.4.2. Dictionary Flags + + Currently the lowest six bits of the Dictionary Flags field + are in use: + + Bits Mask Description + 0-5 0x3F Dictionary Size + 6-7 0xC0 Reserved for future use; must be zero for now. + + Dictionary Size is encoded with one-bit mantissa and five-bit + exponent. To avoid wasting space, one-byte dictionary has its + own special value. + + Raw value Mantissa Exponent Dictionary size + 0 1 0 1 byte + 1 2 0 2 bytes + 2 3 0 3 bytes + 3 2 1 4 bytes + 4 3 1 6 bytes + 5 2 2 8 bytes + 6 3 2 12 bytes + 7 2 3 16 bytes + 8 3 3 24 bytes + 9 2 4 32 bytes + ... ... ... ... + 61 2 30 2 GiB + 62 3 30 3 GiB + 63 2 31 4 GiB (*) + + (*) The real maximum size of the dictionary is one byte + less than 4 GiB, because the distance of 4 GiB is + reserved for End of Payload Marker. + + Instead of having a table in the decoder, the dictionary size + can be decoded using the following C code: + + uint64_t dictionary_size; + const uint8_t bits = get_dictionary_flags() & 0x3F; + if (bits == 0) { + dictionary_size = 1; + } else { + dictionary_size = 2 | ((bits + 1) & 1); + dictionary_size = dictionary_size << ((bits - 1) / 2); + } + + +4.3.5. Branch/Call/Jump Filters for Executables + + These filters convert relative branch, call, and jump + instructions to their absolute counterparts in executable + files. This conversion increases redundancy and thus + compression ratio. + + Size of Filter Properties: 0 or 4 bytes + Changes size of data: No + + Detecting when all of the data has been decoded: + Uncompressed size: Yes + End of Payload Marker: No + End of Input: Yes + + Below is the list of filters in this category. The alignment + is the same for both input and output data. + + Filter ID Alignment Description + 0x04 1 byte x86 filter (BCJ) + 0x05 4 bytes PowerPC (big endian) filter + 0x06 16 bytes IA64 filter + 0x07 4 bytes ARM (little endian) filter + 0x08 2 bytes ARM Thumb (little endian) filter + 0x09 4 bytes SPARC filter + + If the size of Filter Properties is four bytes, the Filter + Properties field contains the start offset used for address + conversions. It is stored as an unsigned 32-bit little endian + integer. If the size of Filter Properties is zero, the start + offset is zero. + + Setting the start offset may be useful if an executable has + multiple sections, and there are many cross-section calls. + Taking advantage of this feature usually requires usage of + the Subblock filter. + + +5. Metadata + + Metadata is stored in Metadata Blocks, which can be in the + beginning or at the end of a Multi-Block Stream. Because of + Blocks, it is possible to compress Metadata in the same way + as the actual data is compressed. This Section describes the + format of the data stored in Metadata Blocks. + + +----------------+===============================+ + | Metadata Flags | Size of Header Metadata Block | + +----------------+===============================+ + + +============+===================+=======+=======+ + ---> | Total Size | Uncompressed Size | Index | Extra | + +============+===================+=======+=======+ + + Stream must be parseable backwards. That is, there must be + a way to locate the beginning of the Stream by starting from + the end of the Stream. Thus, the Footer Metadata Block must + contain the Total Size field or the Index field. If the Stream + has Header Metadata Block, also the Size of Header Metadata + Block field must be present in Footer Metadata Block. + + It must be possible to quickly locate the Blocks in + non-streamed mode. Thus, the Index field must be present + at least in one Metadata Block. + + If the above conditions are not met, the decoder must indicate + an error. + + There should be no additional data after the last field. If + there is, the the decoder should indicate an error. + + +5.1. Metadata Flags + + This field describes which fields are present in a Metadata + Block: + + Bit(s) Mask Desription + 0 0x01 Size of Header Metadata Block is present. + 1 0x02 Total Size is present. + 2 0x04 Uncompressed Size is present. + 3 0x08 Index is present. + 4-6 0x70 Reserve for future use; must be zero for now. + 7 0x80 Extra is present. + + If any reserved bit is set, the decoder must indicate an error. + It is possible that there is a new field present which the + decoder is not aware of, and can thus parse the Metadata + incorrectly. + + +5.2. Size of Header Metadata Block + + This field is present only if the appropriate bit is set in + the Metadata Flags field (see Section 5.1). + + Size of Header Metadata Block is needed to make it possible to + parse the Stream backwards. The size is stored using the + encoding described in Section 1.2. The decoder must verify that + that the value stored in this field is non-zero. In Footer + Metadata Block, the decoder must also verify that the stored + size matches the real size of Header Metadata Block. In the + Header Meatadata Block, the value of this field is ignored as + long as it is not zero. + + +5.3. Total Size + + This field is present only if the appropriate bit is set in the + Metadata Flags field (see Section 5.1). + + This field contains the total size of the Data Blocks in the + Stream. Total Size is stored using the encoding described in + Section 1.2. If the stored value does not match the real total + size of the Data Blocks, the decoder must indicate an error. + The value of this field must be non-zero. + + Total Size can be used to quickly locate the beginning or end + of the Stream. This can be useful for example when doing + random-access reading, and the Index field is not in the + Metadata Block currently being read. + + It is useless to have both Total Size and Index in the same + Metadata Block, because Total Size can be calculated from the + Index field. + + +5.4. Uncompressed Size + + This field is present only if the appropriate bit is set in the + Metadata Flags field (see Section 5.1). + + This field contains the total uncompressed size of the Data + Blocks in the Stream. Uncompresssed Size is stored using the + encoding described in Section 1.2. If the stored value does not + match the real uncompressed size of the Data Blocks, the + decoder must indicate an error. + + It is useless to have both Uncompressed Size and Index in + the same Metadata Block, because Uncompressed Size can be + calculated from the Index field. + + +5.5. Index + + +=======================+=============+====================+ + | Number of Data Blocks | Total Sizes | Uncompressed Sizes | + +=======================+=============+====================+ + + Index serves several purporses. Using it, one can + - verify that all Blocks in a Stream have been processed; + - find out the Uncompressed Size of a Stream; and + - quickly access the beginning of any Block (random access). + + +5.5.1. Number of Data Blocks + + This field contains the number of Data Blocks in the Stream. + The value is stored using the encoding described in Section + 1.2. If the decoder has decoded all the Data Blocks of the + Stream, and then notices that the Number of Records doesn't + match the real number of Data Blocks, the decoder must + indicate an error. The value of this field must be non-zero. + + +5.5.2. Total Sizes + + +============+============+ + | Total Size | Total Size | ... + +============+============+ + + This field lists the Total Sizes of every Data Block in the + Stream. There are as many Total Size fields as indicated by + the Number of Data Blocks field. + + Total Size is the size of Block Header, Compressed Data, and + Block Footer. It is stored using the encoding described in + Section 1.2. If the Total Sizes do not match the real sizes + of respective Blocks, the decoder should indicate an error. + All the Total Size fields must have a non-zero value. + + +5.5.3. Uncompressed Sizes + + +===================+===================+ + | Uncompressed Size | Uncompressed Size | ... + +===================+===================+ + + This field lists the Uncompressed Sizes of every Data Block + in the Stream. There are as many Uncompressed Size fields as + indicated by the Number of Records field. + + Uncompressed Sizes are stored using the encoding described + in Section 1.2. If the Uncompressed Sizes do not match the + real sizes of respective Blocks, the decoder shoud indicate + an error. + + +5.6. Extra + + This field is present only if the appropriate bit is set in the + Metadata Flags field (see Section 5.1). Note that the bit does + not indicate that there is any data in the Extra field; it only + indicates that Extra may be non-empty. + + The Extra field contains only information that is not required + to properly uncompress the Stream or to do random-access + reading. Supporting the Extra field is optional. In case the + decoder doesn't support the Extra field, it should silently + ignore it. + + Extra consists of zero or more Records: + + +========+========+ + | Record | Record | ... + +========+========+ + + Excluding Records with Record ID 0x00, each Record contains + three fields: + + +==========+==============+======+ + | Reord ID | Size of Data | Data | + +==========+==============+======+ + + The Record ID and Size of Data are stored using the encoding + described in Section 1.2. Data can be binary or UTF-8 + [RFC-3629] strings. Non-UTF-8 strings should be avoided. + Because the Size of Data is known, there is no need to + terminate strings with a nul byte, although doing so should + not be considered an error. + + The Record IDs are divided in two categories: + - Safe-to-Copy Records may be preserved as is when the + Stream is modified in ways that don't change the actual + uncompressed data. Examples of such operatings include + recompressing and adding, modifying, or deleting unrelated + Extra Records. + - Unsafe-to-Copy Records should be removed (and possibly + recreated) when any kind of changes are made to the Stream. + + When the actual uncompressed data is modified, all Records + should be removed (and possibly recreated), unless the + application knows that the Data stored to the Record(s) is + still valid. + + The following subsections describe the standard Record IDs and + the format of their Data fields. Safe-to-Copy Records have an + odd ID, while Unsafe-to-Copy Records have an even ID. + + +5.6.1. 0x00: Dummy/Padding + + This Record is special, because it doesn't have the Size of + Data or Data fields. + + Dummy Records can be used, for example, to fill Metadata Block + when a few bytes of extra space has been reserved for it. There + can be any number of Dummy Records. + + +5.6.2. 0x01: OpenPGP Signature + + OpenPGP signature is computed from uncompressed data. The + signature can be used to verify that the contents of a Stream + has been created by a trustworthy source. + + If the decoder supports decoding concatenated Streams, it + must indicate an error when verifying OpenPGP signatures if + there is more than one Stream. + + OpenPGP format is documented in [RFC-2440]. + + +5.6.3. 0x02: Filter Information + + The Filter Information Record contains information about the + filters used in the Stream. This field can be used to quickly + - display which filters are used in each Block; + - check if all the required filters are supported by the + current decoder version; and + - check how much memory is required to decode each Block. + + The format of the Filter Information field is as follows: + + +=================+=================+ + | Block 0 Filters | Block 1 Filters | ... + +=================+=================+ + + There can be at maximum of as many Block Filters fields as + there are Data Blocks in the Stream. The format of the Block + Filters field is as follows: + + +------------------+======================+============+ + | Block Properties | List of Filter Flags | Subfilters | + +------------------+======================+============+ + + Block Properties is a bitfield: + + Bit(s) Mask Description + 0-2 0x07 Number of filters (0-7) + 3 0x08 End of Payload Marker is used. + 4 0x10 The Subfilters field is present. + 5-7 0xE0 Reserved for future use; must be zero for now. + + The contents of the List of Filter Flags field must match the + List of Filter Flags field in the respective Block Header. + + The Subfilters field may be present only if the List of Filter + Flags contains a Filter Flags field for a Subblock filter. The + format of the Subfilters field is as follows: + + +======================+=========================+ + | Number of Subfilters | List of Subfilter Flags | + +======================+=========================+ + + The value stored in the Number of Subfilters field is stored + using the encoding described in Section 1.2. The List of + Subfilter Flags field contains as many Filter Flags fields + as indicated by the Number of Subfilters field. These Filter + Flags fields list some or all the Subfilters used via the + Subblock filter. The order of the listed Subfilters is not + significant. + + Decoders supporting this Record should indicate a warning or + error if this Record contains Filter Flags that are not + actually used by the respective Blocks. + + +5.6.4. 0x03: Comment + + Free-form comment is stored in UTF-8 [RFC-3629] encoding. + + The beginning of a new line should be indicated using the + ASCII Line Feed character (0x0A). When the Line Feed character + is not the native way to indicate new line in the underlying + operating system, the encoder and decoder should convert the + newline characters to and from Line Feeds. + + +5.6.5. 0x04: List of Checks + + +=======+=======+ + | Check | Check | ... + +=======+=======+ + + There are as many Check fields as there are Blocks in the + Stream. The size of Check fields depend on Stream Flags + (see Section 2.2.2). + + Decoders supporting this Record should indicate a warning or + error if the Checks don't match the respective Blocks. + + +5.6.6. 0x05: Original Filename + + Original filename is stored in UTF-8 [RFC-3629] encoding. + + The filename must not include any path, only the filename + itself. Special care must be taken to prevent directory + traversal vulnerabilities. + + When files are moved between different operating systems, it + is possible that filename valid in the source system is not + valid in the target system. It is implementation defined how + the decoder handles this kind of situations. + + +5.6.7. 0x07: Modification Time + + Modification time is stored as POSIX time, as an unsigned + little endian integer. The number of bits depends on the + Size of Data field. Note that the usage of unsigned integer + limits the earliest representable time to 1970-01-01T00:00:00. + + +5.6.8. 0x09: High-Resolution Modification Time + + This Record extends the `0x04: Modification time' Record with + a subsecond time information. There are two supported formats + of this field, which can be distinguished by looking at the + Size of Data field. + + Size Data + 3 [0; 9,999,999] times 100 nanoseconds + 4 [0; 999,999,999] nanoseconds + + The value is stored as an unsigned 24-bit or 32-bit little + endian integer. + + +5.6.9. 0x0B: MIME Type + + MIME type of the uncompressed Stream. This can be used to + detect the content type. [IANA-MIME] + + +5.6.10. 0x0D: Homepage URL + + This field can be used, for example, when distributing software + packages (sources or binaries). The field would indicate the + homepage of the program. + + For details on how to encode URLs, see [RFC-1738]. + + +6. Custom Filter and Extra Record IDs + + If a developer wants to use custom Filter or Extra Record IDs, + he has two choices. The first choice is to contact Lasse Collin + and ask him to allocate a range of IDs for the developer. + + The second choice is to generate a 40-bit random integer, + which the developer can use as his personal Developer ID. + To minimalize the risk of collisions, Developer ID has to be + a randomly generated integer, not manually selected "hex word". + The following command, which works on many free operating + systems, can be used to generate Developer ID: + + dd if=/dev/urandom bs=5 count=1 | hexdump + + The developer can then use his Developer ID to create unique + (well, hopefully unique) Filter and Extra Record IDs. + + Bits Mask Description + 0-15 0x0000_0000_0000_FFFF Filter or Extra Record ID + 16-55 0x00FF_FFFF_FFFF_0000 Developer ID + 56-62 0x7F00_0000_0000_0000 Static prefix: 0x7F + + The resulting 63-bit integer will use 9 bytes of space when + stored using the encoding described in Section 1.2. To get + a shorter ID, see the beginning of this Section how to + request a custom ID range. + + Note that Filter and Metadata Record IDs are in their own + namespaces. That is, you can use the same ID value as Filter ID + and Metadata Record ID, and the meanings of the IDs do not need + to be related to each other. + + +6.1. Reserved Custom Filter ID Ranges + + Range Description + 0x0000_0000 - 0x0000_00DF IDs fitting into the Misc field + 0x0002_0000 - 0x0007_FFFF Reserved to ease .7z compatibility + 0x0200_0000 - 0x07FF_FFFF Reserved to ease .7z compatibility + + +7. Cyclic Redundancy Checks + + There are several incompatible variations to calculate CRC32 + and CRC64. For simplicity and clarity, complete examples are + provided to calculate the checks as they are used in this file + format. Implementations may use different code as long as it + gives identical results. + + The program below reads data from standard input, calculates + the CRC32 and CRC64 values, and prints the calculated values + as big endian hexadecimal strings to standard output. + + #include <sys/types.h> + #include <inttypes.h> + #include <stdio.h> + + uint32_t crc32_table[256]; + uint64_t crc64_table[256]; + + void + init(void) + { + static const uint32_t poly32 = UINT32_C(0xEDB88320); + static const uint64_t poly64 + = UINT64_C(0xC96C5795D7870F42); + + for (size_t i = 0; i < 256; ++i) { + uint32_t crc32 = i; + uint64_t crc64 = i; + + for (size_t j = 0; j < 8; ++j) { + if (crc32 & 1) + crc32 = (crc32 >> 1) ^ poly32; + else + crc32 >>= 1; + + if (crc64 & 1) + crc64 = (crc64 >> 1) ^ poly64; + else + crc64 >>= 1; + } + + crc32_table[i] = crc32; + crc64_table[i] = crc64; + } + } + + uint32_t + crc32(const uint8_t *buf, size_t size, uint32_t crc) + { + crc = ~crc; + for (size_t i = 0; i < size; ++i) + crc = crc32_table[buf[i] ^ (crc & 0xFF)] + ^ (crc >> 8); + return ~crc; + } + + uint64_t + crc64(const uint8_t *buf, size_t size, uint64_t crc) + { + crc = ~crc; + for (size_t i = 0; i < size; ++i) + crc = crc64_table[buf[i] ^ (crc & 0xFF)] + ^ (crc >> 8); + return ~crc; + } + + int + main() + { + init(); + + uint32_t value32 = 0; + uint64_t value64 = 0; + uint64_t total_size = 0; + uint8_t buf[8192]; + + while (1) { + const size_t buf_size = fread(buf, 1, 8192, stdin); + if (buf_size == 0) + break; + + total_size += buf_size; + value32 = crc32(buf, buf_size, value32); + value64 = crc64(buf, buf_size, value64); + } + + printf("Bytes: %" PRIu64 "\n", total_size); + printf("CRC-32: 0x%08" PRIX32 "\n", value32); + printf("CRC-64: 0x%016" PRIX64 "\n", value64); + + return 0; + } + + +8. References + +8.1. Normative References + + [RFC-1738] + Uniform Resource Locators (URL) + http://www.ietf.org/rfc/rfc1738.txt + + [RFC-2119] + Key words for use in RFCs to Indicate Requirement Levels + http://www.ietf.org/rfc/rfc2119.txt + + [RFC-2440] + OpenPGP Message Format + http://www.ietf.org/rfc/rfc2440.txt + + [RFC-3629] + UTF-8, a transformation format of ISO 10646 + http://www.ietf.org/rfc/rfc3629.txt + + [IANA-MIME] + MIME Media Types + http://www.iana.org/assignments/media-types/ + + +8.2. Informative References + + LZMA SDK - The original LZMA implementation + http://7-zip.org/sdk.html + + LZMA Utils - LZMA adapted to POSIX-like systems + http://tukaani.org/lzma/ + + [RFC-1952] + GZIP file format specification version 4.3 + http://www.ietf.org/rfc/rfc1952.txt + - Notation of byte boxes in section `2.1. Overall conventions' + + [GNU-tar] + GNU tar 1.16.1 manual + http://www.gnu.org/software/tar/manual/html_node/Blocking-Factor.html + - Node 9.4.2 `Blocking Factor', paragraph that begins + `gzip will complain about trailing garbage' + - Note that this URL points to the latest version of the + manual, and may some day not contain the note which is in + 1.16.1. For the exact version of the manual, download GNU + tar 1.16.1: ftp://ftp.gnu.org/pub/gnu/tar/tar-1.16.1.tar.gz + diff --git a/doc/history.txt b/doc/history.txt new file mode 100644 index 00000000..55293062 --- /dev/null +++ b/doc/history.txt @@ -0,0 +1,140 @@ + +LZMA Utils history +------------------ + +Tukaani distribution + + In 2005, there was a small group working on Tukaani distribution, which + was a Slackware fork. One of the project goals was to fit the distro on + a single 700 MiB ISO-9660 image. Using LZMA instead of gzip helped a + lot. Roughly speaking, one could fit data that took 1000 MiB in gzipped + form into 700 MiB with LZMA. Naturally compression ratio varied across + packages, but this was what we got on average. + + Slackware packages have traditionally had .tgz as the filename suffix, + which is an abbreviation of .tar.gz. A logical naming for LZMA + compressed packages was .tlz, being an abbreviation of .tar.lzma. + + At the end of the year 2007, there's no distribution under the Tukaani + project anymore. Development of LZMA Utils still continues. Still, + there are .tlz packages around, because at least Vector Linux (a + Slackware based distribution) uses LZMA for its packages. + + First versions of the modified pkgtools used the LZMA_Alone tool from + Igor Pavlov's LZMA SDK as is. It was fine, because users wouldn't need + to interact with LZMA_Alone directly. But people soon wanted to use + LZMA for other files too, and the interface of LZMA_Alone wasn't + comfortable for those used to gzip and bzip2. + + +First steps of LZMA Utils + + The first version of LZMA Utils (4.22.0) included a shell script called + lzmash. It was wrapper that had gzip-like command line interface. It + used the LZMA_Alone tool from LZMA SDK to do all the real work. zgrep, + zdiff, and related scripts from gzip were adapted work with LZMA and + were part of the first LZMA Utils release too. + + LZMA Utils 4.22.0 included also lzmadec, which was a small (less than + 10 KiB) decoder-only command line tool. It was written on top of the + decoder-only C code found from the LZMA SDK. lzmadec was convenient in + situations where LZMA_Alone (a few hundred KiB) would be too big. + + lzmash and lzmadec were written by Lasse Collin. + + +Second generation + + The lzmash script was an ugly and not very secure hack. The last + version of LZMA Utils to use lzmash was 4.27.1. + + LZMA Utils 4.32.0beta1 introduced a new lzma command line tool written + by Ville Koskinen. It was written in C++, and used the encoder and + decoder from C++ LZMA SDK with little modifications. This tool replaced + both the lzmash script and the LZMA_Alone command line tool in LZMA + Utils. + + Introducing this new tool caused some temporary incompatibilities, + because LZMA_Alone executable was simply named lzma like the new + command line tool, but they had completely different command line + interface. The file format was still the same. + + Lasse wrote liblzmadec, which was a small decoder-only library based on + the C code found from LZMA SDK. liblzmadec had API similar to zlib, + although there were some significant differences, which made it + non-trivial to use it in some applications designed for zlib and + libbzip2. + + The lzmadec command line tool was converted to use liblzmadec. + + Alexandre Sauvé helped converting build system to use GNU Autotools. + This made is easier to test for certain less portable features needed + by the new command line tool. + + Since the new command line tool never got completely finished (for + example, it didn't support LZMA_OPT environment variable), the intent + was to not call 4.32.x stable. Similarly, liblzmadec wasn't polished, + but appeared to work well enough, so some people started using it too. + + Because the development of the third generation of LZMA Utils was + delayed considerably (roughly two years), the 4.32.x branch had to be + kept maintained. It got some bug fixes now and then, and finally it was + decided to call it stable, although most of the missing features were + never added. + + +File format problems + + The file format used by LZMA_Alone was primitive. It was designed for + embedded systems in mind, and thus provided only minimal set of + features. The two biggest problems for non-embedded use were lack of + magic bytes and integrity check. + + Igor and Lasse started developing a new file format with some help from + Ville Koskinen, Mark Adler and Mikko Pouru. Designing the new format + took quite a long time. It was mostly because Lasse was quite slow at + getting things done due to personal reasons. + + Near the end of the year 2007 the new format was practically finished. + Compared to LZMA_Alone format and the .gz format used by gzip, the new + .lzma format is quite complex as a whole. This means that tools having + *full* support for the new format would be larger and more complex than + the tools supporting only the old LZMA_Alone format. + + For the situations where the full support for the .lzma format wouldn't + be required (embedded systems, operating system kernels), the new + format has a well-defined subset, which is easy to support with small + amount of code. It wouldn't be as small as an implementation using the + LZMA_Alone format, but the difference shouldn't be significant. + + The new .lzma format allows dividing the data in multiple independent + blocks, which can be compressed and uncompressed independenly. This + makes multi-threading possible with algorithms that aren't inherently + parallel (such as LZMA). There's also a central index of the sizes of + the blocks, which makes it possible to do limited random-access reading + with granularity of the block size. + + The new .lzma format uses the same filename suffix that was used for + LZMA_Alone files. The advantage is that users using the new tools won't + notice the change to the new format. The disadvantage is that the old + tools won't work with the new files. + + +Third generation + + LZMA Utils 4.42.0alphas drop the rest of the C++ LZMA SDK. The LZMA and + other included filters (algorithm implementations) are still directly + based on LZMA SDK, but ported to C. + + liblzma is now the core of LZMA Utils. It has zlib-like API, which + doesn't suffer from the problems of the API of liblzmadec. liblzma + supports not only LZMA, but several other filters, which together + can improve compression ratio even further with certain file types. + + The lzma and lzmadec command line tools have been rewritten. They uses + liblzma to do the actual compressing or uncompressing. + + The development of LZMA Utils 4.42.x is still in alpha stage. Several + features are still missing or don't fully work yet. Documentation is + also very minimal. + diff --git a/doc/liblzma-advanced.txt b/doc/liblzma-advanced.txt new file mode 100644 index 00000000..d829a33a --- /dev/null +++ b/doc/liblzma-advanced.txt @@ -0,0 +1,324 @@ + +Advanced features of liblzma +---------------------------- + +0. Introduction + + Most developers need only the basic features of liblzma. These + features allow single-threaded encoding and decoding of .lzma files + in streamed mode. + + In some cases developers want more. The .lzma file format is + designed to allow multi-threaded encoding and decoding and limited + random-access reading. These features are possible in non-streamed + mode and limitedly also in streamed mode. + + To take advange of these features, the application needs a custom + .lzma file format handler. liblzma provides a set of tools to ease + this task, but it's still quite a bit of work to get a good custom + .lzma handler done. + + +1. Where to begin + + Start by reading the .lzma file format specification. Understanding + the basics of the .lzma file structure is required to implement a + custom .lzma file handler and to understand the rest of this document. + + +2. The basic components + +2.1. Stream Header and tail + + Stream Header begins the .lzma Stream and Stream tail ends it. Stream + Header is defined in the file format specification, but Stream tail + isn't (thus I write "tail" with a lower-case letter). Stream tail is + simply the Stream Flags and the Footer Magic Bytes fields together. + It was done this way in liblzma, because the Block coders take care + of the rest of the stuff in the Stream Footer. + + For now, the size of Stream Header is fixed to 11 bytes. The header + <lzma/stream_flags.h> defines LZMA_STREAM_HEADER_SIZE, which you + should use instead of a hardcoded number. Similarly, Stream tail + is fixed to 3 bytes, and there is a constant LZMA_STREAM_TAIL_SIZE. + + It is possible, that a future version of the .lzma format will have + variable-sized Stream Header and tail. As of writing, this seems so + unlikely though, that it was considered simplest to just use a + constant instead of providing a functions to get and store the sizes + of the Stream Header and tail. + + +2.x. Stream tail + + For now, the size of Stream tail is fixed to 3 bytes. The header + <lzma/stream_flags.h> defines LZMA_STREAM_TAIL_SIZE, which you + should use instead of a hardcoded number. + + +3. Keeping track of size information + + The lzma_info_* functions found from <lzma/info.h> should ease the + task of keeping track of sizes of the Blocks and also the Stream + as a whole. Using these functions is strongly recommended, because + there are surprisingly many situations where an error can occur, + and these functions check for possible errors every time some new + information becomes available. + + If you find lzma_info_* functions lacking something that you would + find useful, please contact the author. + + +3.1. Start offset of the Stream + + If you are storing the .lzma Stream inside anothe file format, or + for some other reason are placing the .lzma Stream to somewhere + else than to the beginning of the file, you should tell the starting + offset of the Stream using lzma_info_start_offset_set(). + + The start offset of the Stream is used for two distinct purporses. + First, knowing the start offset of the Stream allows + lzma_info_alignment_get() to correctly calculate the alignment of + every Block. This information is given to the Block encoder, which + will calculate the size of Header Padding so that Compressed Data + is alignment at an optimal offset. + + Another use for start offset of the Stream is in random-access + reading. If you set the start offset of the Stream, lzma_info_locate() + will be able to calculate the offset relative to the beginning of the + file containing the Stream (instead of offset relative to the + beginning of the Stream). + + +3.2. Size of Stream Header + + While the size of Stream Header is constant (11 bytes) in the current + version of the .lzma file format, this may change in future. + + +3.3. Size of Header Metadata Block + + This information is needed when doing random-access reading, and + to verify the value of this field stored in Footer Metadata Block. + + +3.4. Total Size of the Data Blocks + + +3.5. Uncompressed Size of Data Blocks + + +3.6. Index + + + + +x. Alignment + + There are a few slightly different types of alignment issues when + working with .lzma files. + + The .lzma format doesn't strictly require any kind of alignment. + However, if the encoder carefully optimizes the alignment in all + situations, it can improve compression ratio, speed of the encoder + and decoder, and slightly help if the files get damaged and need + recovery. + + Alignment has the most significant effect compression ratio FIXME + + +x.1. Compression ratio + + Some filters take advantage of the alignment of the input data. + To get the best compression ratio, make sure that you feed these + filters correctly aligned data. + + Some filters (e.g. LZMA) don't necessarily mind too much if the + input doesn't match the preferred alignment. With these filters + the penalty in compression ratio depends on the specific type of + data being compressed. + + Other filters (e.g. PowerPC executable filter) won't work at all + with data that is improperly aligned. While the data can still + be de-filtered back to its original form, the benefit of the + filtering (better compression ratio) is completely lost, because + these filters expect certain patterns at properly aligned offsets. + The compression ratio may even worse with incorrectly aligned input + than without the filter. + + +x.1.1. Inter-filter alignment + + When there are multiple filters chained, checking the alignment can + be useful not only with the input of the first filter and output of + the last filter, but also between the filters. + + Inter-filter alignment important especially with the Subblock filter. + + +x.1.2. Further compression with external tools + + This is relatively rare situation in practice, but still worth + understanding. + + Let's say that there are several SPARC executables, which are each + filtered to separate .lzma files using only the SPARC filter. If + Uncompressed Size is written to the Block Header, the size of Block + Header may vary between the .lzma files. If no Padding is used in + the Block Header to correct the alignment, the starting offset of + the Compressed Data field will be differently aligned in different + .lzma files. + + All these .lzma files are archived into a single .tar archive. Due + to nature of the .tar format, every file is aligned inside the + archive to an offset that is a multiple of 512 bytes. + + The .tar archive is compressed into a new .lzma file using the LZMA + filter with options, that prefer input alignment of four bytes. Now + if the independent .lzma files don't have the same alignment of + the Compressed Data fields, the LZMA filter will be unable to take + advantage of the input alignment between the files in the .tar + archive, which reduces compression ratio. + + Thus, even if you have only single Block per file, it can be good for + compression ratio to align the Compressed Data to optimal offset. + + +x.2. Speed + + Most modern computers are faster when multi-byte data is located + at aligned offsets in RAM. Proper alignment of the Compressed Data + fields can slightly increase the speed of some filters. + + +x.3. Recovery + + Aligning every Block Header to start at an offset with big enough + alignment may ease or at least speed up recovery of broken files. + + +y. Typical usage cases + +y.x. Parsing the Stream backwards + + You may need to parse the Stream backwards if you need to get + information such as the sizes of the Stream, Index, or Extra. + The basic procedure to do this follows. + + Locate the end of the Stream. If the Stream is stored as is in a + standalone .lzma file, simply seek to the end of the file and start + reading backwards using appropriate buffer size. The file format + specification allows arbitrary amount of Footer Padding (zero or more + NUL bytes), which you skip before trying to decode the Stream tail. + + Once you have located the end of the Stream (a non-NULL byte), make + sure you have at least the last LZMA_STREAM_TAIL_SIZE bytes of the + Stream in a buffer. If there isn't enough bytes left from the file, + the file is too small to contain a valid Stream. Decode the Stream + tail using lzma_stream_tail_decoder(). Store the offset of the first + byte of the Stream tail; you will need it later. + + You may now want to do some internal verifications e.g. if the Check + type is supported by the liblzma build you are using. + + Decode the Backward Size field with lzma_vli_reverse_decode(). The + field is at maximum of LZMA_VLI_BYTES_MAX bytes long. Check that + Backward Size is not zero. Store the offset of the first byte of + the Backward Size; you will need it later. + + Now you know the Total Size of the last Block of the Stream. It's the + value of Backward Size plus the size of the Backward Size field. Note + that you cannot use lzma_vli_size() to calculate the size since there + might be padding; you need to use the real observed size of the + Backward Size field. + + At this point, the operation continues differently for Single-Block + and Multi-Block Streams. + + +y.x.1. Single-Block Stream + + There might be Uncompressed Size field present in the Stream Footer. + You cannot know it for sure unless you have already parsed the Block + Header earlier. For security reasons, you probably want to try to + decode the Uncompressed Size field, but you must not indicate any + error if decoding fails. Later you can give the decoded Uncompressed + Size to Block decoder if Uncopmressed Size isn't otherwise known; + this prevents it from producing too much output in case of (possibly + intentionally) corrupt file. + + Calculate the the start offset of the Stream: + + backward_offset - backward_size - LZMA_STREAM_HEADER_SIZE + + backward_offset is the offset of the first byte of the Backward Size + field. Remember to check for integer overflows, which can occur with + invalid input files. + + Seek to the beginning of the Stream. Decode the Stream Header using + lzma_stream_header_decoder(). Verify that the decoded Stream Flags + match the values found from Stream tail. You can use the + lzma_stream_flags_is_equal() macro for this. + + Decode the Block Header. Verify that it isn't a Metadata Block, since + Single-Block Streams cannot have Metadata. If Uncompressed Size is + present in the Block Header, the value you tried to decode from the + Stream Footer must be ignored, since Uncompressed Size wasn't actually + present there. If Block Header doesn't have Uncompressed Size, and + decoding the Uncompressed Size field from the Stream Footer failed, + the file is corrupt. + + If you were only looking for the Uncompressed Size of the Stream, + you now got that information, and you can stop processing the Stream. + + To decode the Block, the same instructions apply as described in + FIXME. However, because you have some extra known information decoded + from the Stream Footer, you should give this information to the Block + decoder so that it can verify it while decoding: + - If Uncompressed Size is not present in the Block Header, set + lzma_options_block.uncompressed_size to the value you decoded + from the Stream Footer. + - Always set lzma_options_block.total_size to backward_size + + size_of_backward_size (you calculated this sum earlier already). + + +y.x.2. Multi-Block Stream + + Calculate the start offset of the Footer Metadata Block: + + backward_offset - backward_size + + backward_offset is the offset of the first byte of the Backward Size + field. Remember to check for integer overflows, which can occur with + broken input files. + + Decode the Block Header. Verify that it is a Metadata Block. Set + lzma_options_block.total_size to backward_size + size_of_backward_size + (you calculated this sum earlier already). Then decode the Footer + Metadata Block. + + Store the decoded Footer Metadata to lzma_info structure using + lzma_info_set_metadata(). Set also the offset of the Backward Size + field using lzma_info_size_set(). Then you can get the start offset + of the Stream using lzma_info_size_get(). Note that any of these steps + may fail so don't omit error checking. + + Seek to the beginning of the Stream. Decode the Stream Header using + lzma_stream_header_decoder(). Verify that the decoded Stream Flags + match the values found from Stream tail. You can use the + lzma_stream_flags_is_equal() macro for this. + + If you were only looking for the Uncompressed Size of the Stream, + it's possible that you already have it now. If Uncompressed Size (or + whatever information you were looking for) isn't available yet, + continue by decoding also the Header Metadata Block. (If some + information is missing, the Header Metadata Block has to be present.) + + Decoding the Data Blocks goes the same way as described in FIXME. + + +y.x.3. Variations + + If you know the offset of the beginning of the Stream, you may want + to parse the Stream Header before parsing the Stream tail. + diff --git a/doc/liblzma-hacking.txt b/doc/liblzma-hacking.txt new file mode 100644 index 00000000..64390bcb --- /dev/null +++ b/doc/liblzma-hacking.txt @@ -0,0 +1,112 @@ + +Hacking liblzma +--------------- + +0. Preface + + This document gives some overall information about the internals of + liblzma, which should make it easier to start reading and modifying + the code. + + +1. Programming language + + liblzma was written in C99. If you use GCC, this means that you need + at least GCC 3.x.x. GCC 2 isn't and won't be supported. + + Some GCC-specific extensions are used *conditionally*. They aren't + required to build a full-featured library. Don't make the code rely + on any non-standard compiler extensions or even C99 features that + aren't portable between almost-C99 compatible compilers (for example + non-static inlines). + + The public API headers are in C89. This is to avoid frustrating those + who maintain programs, which are strictly in C89 or C++. + + An assumption about sizeof(size_t) is made. If this assumption is + wrong, some porting is probably needed: + + sizeof(uint32_t) <= sizeof(size_t) <= sizeof(uint64_t) + + +2. Internal vs. external API + + + + Input Output + v Application ^ + | liblzma public API | + | Stream coder | + | Block coder | + | Filter coder | + | ... | + v Filter coder ^ + + + Application + `-- liblzma public API + `-- Stream coder + |-- Stream info handler + |-- Stream Header coder + |-- Block Header coder + | `-- Filter Flags coder + |-- Metadata coder + | `-- Block coder + | `-- Filter 0 + | `-- Filter 1 + | ... + |-- Data Block coder + | `-- Filter 0 + | `-- Filter 1 + | ... + `-- Stream tail coder + + + +x. Designing new filters + + All filters must be designed so that the decoder cannot consume + arbitrary amount input without producing any decoded output. Failing + to follow this rule makes liblzma vulnerable to DoS attacks if + untrusted files are decoded (usually they are untrusted). + + An example should clarify the reason behind this requirement: There + are two filters in the chain. The decoder of the first filter produces + huge amount of output (many gigabytes or more) with a few bytes of + input, which gets passed to the decoder of the second filter. If the + data passed to the second filter is interpreted as something that + produces no output (e.g. padding), the filter chain as a whole + produces no output and consumes no input for a long period of time. + + The above problem was present in the first versions of the Subblock + filter. A tiny .lzma file could have taken several years to decode + while it wouldn't produce any output at all. The problem was fixed + by adding limits for number of consecutive Padding bytes, and requiring + that some decoded output must be produced between Set Subfilter and + Unset Subfilter. + + +x. Implementing new filters + + If the filter supports embedding End of Payload Marker, make sure that + when your filter detects End of Payload Marker, + - the usage of End of Payload Marker is actually allowed (i.e. End + of Input isn't used); and + - it also checks that there is no more input coming from the next + filter in the chain. + + The second requirement is slightly tricky. It's possible that the next + filter hasn't returned LZMA_STREAM_END yet. It may even need a few + bytes more input before it will do so. You need to give it as much + input as it needs, and verify that it doesn't produce any output. + + Don't call the next filter in the chain after it has returned + LZMA_STREAM_END (except in encoder if action == LZMA_SYNC_FLUSH). + It will result undefined behavior. + + Be pedantic. If the input data isn't exactly valid, reject it. + + At the moment, liblzma isn't modular. You will need to edit several + files in src/liblzma/common to include support for a new filter. grep + for LZMA_FILTER_LZMA to locate the files needing changes. + diff --git a/doc/liblzma-intro.txt b/doc/liblzma-intro.txt new file mode 100644 index 00000000..9cbd63a9 --- /dev/null +++ b/doc/liblzma-intro.txt @@ -0,0 +1,188 @@ + +Introduction to liblzma +----------------------- + +Writing applications to work with liblzma + + liblzma API is split in several subheaders to improve readability and + maintainance. The subheaders must not be #included directly; simply + use `#include <lzma.h>' instead. + + Those who have used zlib should find liblzma's API easy to use. + To developers who haven't used zlib before, I recommend learning + zlib first, because zlib has excellent documentation. + + While the API is similar to that of zlib, there are some major + differences, which are summarized below. + + For basic stream encoding, zlib has three functions (deflateInit(), + deflate(), and deflateEnd()). Similarly, there are three functions + for stream decoding (inflateInit(), inflate(), and inflateEnd()). + liblzma has only single coding and ending function. Thus, to + encode one may use, for example, lzma_stream_encoder_single(), + lzma_code(), and lzma_end(). Simlarly for decoding, one may + use lzma_auto_decoder(), lzma_code(), and lzma_end(). + + zlib has deflateReset() and inflateReset() to reset the stream + structure without reallocating all the memory. In liblzma, all + coder initialization functions are like zlib's reset functions: + the first-time initializations are done with the same functions + as the reinitializations (resetting). + + To make all this work, liblzma needs to know when lzma_stream + doesn't already point to an allocated and initialized coder. + This is achieved by initializing lzma_stream structure with + LZMA_STREAM_INIT (static initialization) or LZMA_STREAM_INIT_VAR + (for exampple when new lzma_stream has been allocated with malloc()). + This initialization should be done exactly once per lzma_stream + structure to avoid leaking memory. Calling lzma_end() will leave + lzma_stream into a state comparable to the state achieved with + LZMA_STREAM_INIT and LZMA_STREAM_INIT_VAR. + + Example probably clarifies a lot. With zlib, compression goes + roughly like this: + + z_stream strm; + deflateInit(&strm, level); + deflate(&strm, Z_RUN); + deflate(&strm, Z_RUN); + ... + deflate(&strm, Z_FINISH); + deflateEnd(&strm) or deflateReset(&strm) + + With liblzma, it's slightly different: + + lzma_stream strm = LZMA_STREAM_INIT; + lzma_stream_encoder_single(&strm, &options); + lzma_code(&strm, LZMA_RUN); + lzma_code(&strm, LZMA_RUN); + ... + lzma_code(&strm, LZMA_FINISH); + lzma_end(&strm) or reinitialize for new coding work + + Reinitialization in the last step can be any function that can + initialize lzma_stream; it doesn't need to be the same function + that was used for the previous initialization. If it is the same + function, liblzma will usually be able to re-use most of the + existing memory allocations (depends on how much the initialization + options change). If you reinitialize with different function, + liblzma will automatically free the memory of the previous coder. + + +File formats + + liblzma supports multiple container formats for the compressed data. + Different initialization functions initialize the lzma_stream to + process different container formats. See the details from the public + header files. + + The following functions are the most commonly used: + + - lzma_stream_encoder_single(): Encodes Single-Block Stream; this + the recommended format for most purporses. + + - lzma_alone_encoder(): Useful if you need to encode into the + legacy LZMA_Alone format. + + - lzma_auto_decoder(): Decoder that automatically detects the + file format; recommended when you decode compressed files on + disk, because this way compatibility with the legacy LZMA_Alone + format is transparent. + + - lzma_stream_decoder(): Decoder for Single- and Multi-Block + Streams; this is good if you want to accept only .lzma Streams. + + +Filters + + liblzma supports multiple filters (algorithm implementations). The new + .lzma format supports filter-chain having up to seven filters. In the + filter chain, the output of one filter is input of the next filter in + the chain. The legacy LZMA_Alone format supports only one filter, and + that must always be LZMA. + + General-purporse compression: + + LZMA The main algorithm of liblzma (surprise!) + + Branch/Call/Jump filters for executables: + + x86 This filter is known as BCJ in 7-Zip + IA64 IA-64 (Itanium) + PowerPC Big endian PowerPC + ARM + ARM-Thumb + SPARC + + Other filters: + + Copy Dummy filter that simply copies all the data + from input to output. + + Subblock Multi-purporse filter, that can + - embed End of Payload Marker if the previous + filter in the chain doesn't support it; and + - apply Subfilters, which filter only part + of the same compressed Block in the Stream. + + Branch/Call/Jump filters never change the size of the data. They + should usually be used as a pre-filter for some compression filter + like LZMA. + + +Integrity checks + + The .lzma Stream format uses CRC32 as the integrity check for + different file format headers. It is possible to omit CRC32 from + the Block Headers, but not from Stream Header. This is the reason + why CRC32 code cannot be disabled when building liblzma (in addition, + the LZMA encoder uses CRC32 for hashing, so that's another reason). + + The integrity check of the actual data is calculated from the + uncompressed data. This check can be CRC32, CRC64, or SHA256. + It can also be omitted completely, although that usually is not + a good thing to do. There are free IDs left, so support for new + checks algorithms can be added later. + + +API and ABI stability + + The API and ABI of liblzma isn't stable yet, although no huge + changes should happen. One potential place for change is the + lzma_options_subblock structure. + + In the 4.42.0alpha phase, the shared library version number won't + be updated even if ABI breaks. I don't want to track the ABI changes + yet. Just rebuild everything when you upgrade liblzma until we get + to the beta stage. + + +Size of the library + + While liblzma isn't huge, it is quite far from the smallest possible + LZMA implementation: full liblzma binary (with support for all + filters and other features) is way over 100 KiB, but the plain raw + LZMA decoder is only 5-10 KiB. + + To decrease the size of the library, you can omit parts of the library + by passing certain options to the `configure' script. Disabling + everything but the decoders of the require filters will usually give + you a small enough library, but if you need a decoder for example + embedded in the operating system kernel, the code from liblzma probably + isn't suitable as is. + + If you need a minimal implementation supporting .lzma Streams, you + may need to do partial rewrite. liblzma uses stateful API like zlib. + That increases the size of the library. Using callback API or even + simpler buffer-to-buffer API would allow smaller implementation. + + LZMA SDK contains smaller LZMA decoder written in ANSI-C than + liblzma, so you may want to take a look at that code. However, + it doesn't (at least not yet) support the new .lzma Stream format. + + +Documentation + + There's no other documentation than the public headers and this + text yet. Real docs will be written some day, I hope. + diff --git a/doc/liblzma-security.txt b/doc/liblzma-security.txt new file mode 100644 index 00000000..487637ed --- /dev/null +++ b/doc/liblzma-security.txt @@ -0,0 +1,219 @@ + +Using liblzma securely +---------------------- + +0. Introduction + + This document discusses how to use liblzma securely. There are issues + that don't apply to zlib or libbzip2, so reading this document is + strongly recommended even for those who are very familiar with zlib + or libbzip2. + + While making liblzma itself as secure as possible is essential, it's + out of scope of this document. + + +1. Memory usage + + The memory usage of liblzma varies a lot. + + +1.1. Problem sources + +1.1.1. Block coder + + The memory requirements of Block encoder depend on the used filters + and their settings. The memory requirements of the Block decoder + depend on the which filters and with which filter settings the Block + was encoded. Usually the memory requirements of a decoder are equal + or less than the requirements of the encoder with the same settings. + + While the typical memory requirements to decode a Block is from a few + hundred kilobytes to tens of megabytes, a maliciously constructed + files can require a lot more RAM to decode. With the current filters, + the maximum amount is about 7 GiB. If you use multi-threaded decoding, + every Block can require this amount of RAM, thus a four-threaded + decoder could suddenly try to allocate 28 GiB of RAM. + + If you don't limit the maximum memory usage in any way, and there are + no resource limits set on the operating system side, one malicious + input file can run the system out of memory, or at least make it swap + badly for a long time. This is exceptionally bad on servers e.g. + email server doing virus scanning on incoming messages. + + +1.1.2. Metadata decoder + + Multi-Block .lzma files contain at least one Metadata Block. + Externally the Metadata Blocks are similar to Data Blocks, so all + the issues mentioned about memory usage of Data Blocks applies to + Metadata Blocks too. + + The uncompressed content of Metadata Blocks contain information about + the Stream as a whole, and optionally some Extra Records. The + information about the Stream is kept in liblzma's internal data + structures in RAM. Extra Records can contain arbitrary data. They are + not interpreted by liblzma, but liblzma will provide them to the + application in uninterpreted form if the application wishes so. + + Usually the Uncompressed Size of a Metadata Block is small. Even on + extreme cases, it shouldn't be much bigger than a few megabytes. Once + the Metadata has been parsed into native data structures in liblzma, + it usually takes a little more memory than in the encoded form. For + all normal files, this is no problem, since the resulting memory usage + won't be too much. + + The problem is that a maliciously constructed Metadata Block can + contain huge amount of "information", which liblzma will try to store + in its internal data structures. This may cause liblzma to allocate + all the available RAM unless some kind of resource usage limits are + applied. + + Note that the Extra Records in Metadata are always parsed but, but + memory is allocated for them only if the application has requested + liblzma to provide the Extra Records to the application. + + +1.2. Solutions + + If you need to decode files from untrusted sources (most people do), + you must limit the memory usage to avoid denial of service (DoS) + conditions caused by malicious input files. + + The first step is to find out how much memory you are allowed consume + at maximum. This may be a hardcoded constant or derived from the + available RAM; whatever is appropriate in the application. + + The simplest solution is to use setrlimit() if the kernel supports + RLIMIT_AS, which limits the memory usage of the whole process. + For more portable and fine-grained limitting, you can use + memory limitter functions found from <lzma/memlimit.h>. + + +1.2.1. Encoder + + lzma_memory_usage() will give you a rough estimate about the memory + usage of the given filter chain. To dramatically simplify the internal + implementation, this function doesn't take into account all the small + helper data structures needed in various places; only the structures + with significant memory usage are taken into account. Still, the + accuracy of this function should be well within a mebibyte. + + The Subblock filter is a special case. If a Subfilter has been + specified, it isn't taken into account when lzma_memory_usage() + calculates the memory usage. You need to calculate the memory usage + of the Subfilter separately. + + Keeping track of Blocks in a Multi-Block Stream takes a few dozen + bytes of RAM per Block (size of the lzma_index structure plus overhead + of malloc()). It isn't a good idea to put tens of thousands of Blocks + into a Stream unless you have a very good reason to do so (compressed + dictionary could be an example of such situation). + + Also keep the number and sizes of Extra Records sane. If you produce + the list of Extra Records automatically from some untrusted source, + you should not only validate the content of these Records, but also + their memory usage. + + +1.2.2. Decoder + + A single-threaded decoder should simply use a memory limitter and + indicate an error if it runs out of memory. + + Memory-limitting with multi-threaded decoding is tricky. The simple + solution is to divide the maximum allowed memory usage with the + maximum allowed threads, and give each Block decoder their own + independent lzma_memory_limitter. The drawback is that if one Block + needs notably more RAM than any other Block, the decoder will run out + of memory when in reality there would be plenty of free RAM. + + An attractive alternative would be using shared lzma_memory_limitter. + Depending on the application and the expected type of input, this may + either be the best solution or a source of hard-to-repeat problems. + Consider the following requirements: + - You use at maximum of n threads. + - x(i) is the decoder memory requirements of the Block number i + in an expected input Stream. + - The memory limitter is set to higher value than the sum of n + highest values x(i). + + (If you are better at explaining the above conditions, please + contribute your improved version.) + + If the above conditions aren't met, it is possible that the decoding + will fail unpredictably. That is, on the same machine using the same + settings, the decoding may sometimes succeed and sometimes fail. This + is because sometimes threads may run so that the Blocks with highest + memory usage are tried to be decoded at the same time. + + Most .lzma files have all the Blocks encoded with identical settings, + or at least the memory usage won't vary dramatically. That's why most + multi-threaded decoders probably want to use the simple "separate + lzma_memory_limitter for each thread" solution, possibly fallbacking + to single-threaded mode in case the per-thread memory limits aren't + enough in multi-threaded mode. + +FIXME: Memory usage of Stream info. + +[ + +] + + +2. Huge uncompressed output + +2.1. Data Blocks + + Decoding a tiny .lzma file can produce huge amount of uncompressed + output. There is an example file of 45 bytes, which decodes to 64 PiB + (that's 2^56 bytes). Uncompressing such a file to disk is likely to + fill even a bigger disk array. If the data is written to a pipe, it + may not fill the disk, but would still take very long time to finish. + + To avoid denial of service conditions caused by huge amount of + uncompressed output, applications using liblzma should use some method + to limit the amount of output produced. The exact method depends on + the application. + + All valid .lzma Streams make it possible to find out the uncompressed + size of the Stream without actually uncompressing the data. This + information is available in at least one of the Metadata Blocks. + Once the uncompressed size is parsed, the decoder can verify that + it doesn't exceed certain limits (e.g. available disk space). + + When the uncompressed size is known, the decoder can actively keep + track of the amount of output produced so far, and that it doesn't + exceed the known uncompressed size. If it does exceed, the file is + known to be corrupt and an error should be indicated without + continuing to decode the rest of the file. + + Unfortunately, finding the uncompressed size beforehand is often + possible only in non-streamed mode, because the needed information + could be in the Footer Metdata Block, which (obviously) is at the + end of the Stream. In purely streamed mode decoding, one may need to + use some rough arbitrary limits to prevent the problems described in + the beginning of this section. + + +2.2. Metadata + + Metadata is stored in Metadata Blocks, which are very similar to + Data Blocks. Thus, the uncompressed size can be huge just like with + Data Blocks. The difference is, that the contents of Metadata Blocks + aren't given to the application as is, but parsed by liblzma. Still, + reading through a huge Metadata can take very long time, effectively + creating a denial of service like piping decoded a Data Block to + another process would do. + + At first it would seem that using a memory limitter would prevent + this issue as a side effect. But it does so only if the application + requests liblzma to allocate the Extra Records and provide them to + the application. If Extra Records aren't requested, they aren't + allocated either. Still, the Extra Records are being read through + to validate that the Metadata is in proper format. + + The solution is to limit the Uncompressed Size of a Metadata Block + to some relatively large value. This will make liblzma to give an + error when the given limit is reached. + diff --git a/doc/lzma-intro.txt b/doc/lzma-intro.txt new file mode 100644 index 00000000..bde8a059 --- /dev/null +++ b/doc/lzma-intro.txt @@ -0,0 +1,107 @@ + +Introduction to the lzma command line tool +------------------------------------------ + +Overview + + The lzma command line tool is similar to gzip and bzip2, but for + compressing and uncompressing .lzma files. + + +Supported file formats + + By default, the tool creates files in the new .lzma format. This can + be overriden with --format=FMT command line option. Use --format=alone + to create files in the old LZMA_Alone format. + + By default, the tool uncompresses both the new .lzma format and + LZMA_Alone format. This is to make it transparent to switch from + the old LZMA_Alone format to the new .lzma format. Since both + formats use the same filename suffix, average user should never + notice which format was used. + + +Differences to gzip and bzip2 + + Standard input and output + + Both gzip and bzip2 refuse to write compressed data to a terminal and + read compressed data from a terminal. With gzip (but not with bzip2), + this can be overriden with the `--force' option. lzma follows the + behavior of gzip here. + + Usage of LZMA_OPT environment variable + + gzip and bzip2 read GZIP and BZIP2 environment variables at startup. + These variables may contain extra command line options. + + gzip and bzip2 allow passing not only options, but also end-of-options + indicator (`--') and filenames via the environment variable. No quoting + is supported with the filenames. + + Here are examples with gzip. bzip2 behaves identically. + + bash$ echo asdf > 'foo bar' + bash$ GZIP='"foo bar"' gzip + gzip: "foo: No such file or directory + gzip: bar": No such file or directory + + bash$ GZIP=-- gzip --help + gzip: --help: No such file or directory + + lzma silently ignores all non-option arguments given via the + environment variable LZMA_OPT. Like on the command line, everything + after `--' is taken as non-options, and thus ignored in LZMA_OPT. + + bash$ LZMA_OPT='--help' lzma --version # Displays help + bash$ LZMA_OPT='-- --help' lzma --version # Displays version + + +Filter chain presets + + Like in gzip and bzip2, lzma supports numbered presets from 1 to 9 + where 1 is the fastest and 9 the best compression. 1 and 2 are for + fast compressing with small memory usage, 3 to 6 for good compression + ratio with medium memory usage, and 7 to 9 for excellent compression + ratio with higher memory requirements. The default is 7 if memory + usage limit allows. + + In future, there will probably be an option like --preset=NAME, which + will contain more special presets for specific file types. + + It's also possible that there will be some heuristics to select good + filters. For example, the tool could detect when a .tar archive is + being compressed, and enable x86 filter only for those files in the + .tar archive that are ELF or PE executables for x86. + + +Specifying custom filter chains + + Custom filter chains are specified by using long options with the name + of the filters in correct order. For example, to pass the input data to + the x86 filter and the output of that to the LZMA filter, the following + command will do: + + lzma --x86 --lzma filename + + Some filters accept options, which are specified as a comma-separated + list of key=value pairs: + + lzma --delta=distance=4 --lzma=dict=4Mi,lc=8,lp=2 filename + + +Memory usage control + + By default, the command line tool limits memory usage to 1/3 of the + available physical RAM. If no preset or custom filter chain has been + given, the default preset will be used. If the memory limit is too + low for the default preset, the tool will silently switch to lower + preset. + + When a preset or a custom filter chain has been specified and the + memory limit is too low, an error message is displayed and no files + are processed. + + If the decoder hits the memory usage limit, an error is displayed and + no more files are processed. + diff --git a/extra/scanlzma/scanlzma.c b/extra/scanlzma/scanlzma.c new file mode 100644 index 00000000..3612f9df --- /dev/null +++ b/extra/scanlzma/scanlzma.c @@ -0,0 +1,85 @@ +/* + scanlzma, scan for lzma compressed data in stdin and echo it to stdout. + Copyright (C) 2006 Timo Lindfors + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +/* Usage example: + + $ wget http://www.wifi-shop.cz/Files/produkty/wa2204/wa2204av1.4.1.zip + $ unzip wa2204av1.4.1.zip + $ gcc scanlzma.c -o scanlzma -Wall + $ ./scanlzma 0 < WA2204-FW1.4.1/linux-1.4.bin | lzma -c -d | strings | grep -i "copyright" + UpdateDD version 2.5, Copyright (C) 2005 Philipp Benner. + Copyright (C) 2005 Philipp Benner. + Copyright (C) 2005 Philipp Benner. + mawk 1.3%s%s %s, Copyright (C) Michael D. Brennan + # Copyright (C) 1998, 1999, 2001 Henry Spencer. + ... + +*/ + + +/* LZMA compressed file format */ +/* --------------------------- */ +/* Offset Size Description */ +/* 0 1 Special LZMA properties for compressed data */ +/* 1 4 Dictionary size (little endian) */ +/* 5 8 Uncompressed size (little endian). -1 means unknown size */ +/* 13 Compressed data */ + +#define BUFSIZE 4096 + +int find_lzma_header(unsigned char *buf) { + return (buf[0] < 0xE1 + && buf[0] == 0x5d + && buf[4] < 0x20 + && (memcmp (buf + 10 , "\x00\x00\x00", 3) == 0 + || (memcmp (buf + 5, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8) == 0))); +} + +int main(int argc, char *argv[]) { + char buf[BUFSIZE]; + int ret, i, numlzma, blocks=0; + + if (argc != 2) { + printf("usage: %s numlzma < infile | lzma -c -d > outfile\n" + "where numlzma is index of lzma file to extract, starting from zero.\n", + argv[0]); + exit(1); + } + numlzma = atoi(argv[1]); + + for (;;) { + /* Read data. */ + ret = fread(buf, BUFSIZE, 1, stdin); + if (ret != 1) + break; + + /* Scan for signature. */ + for (i = 0; i<BUFSIZE-23; i++) { + if (find_lzma_header(buf+i) && numlzma-- <= 0) { + fwrite(buf+i, (BUFSIZE-i), 1, stdout); + for (;;) { + int ch; + ch = getchar(); + if (ch == EOF) + exit(0); + putchar(ch); + } + + } + } + blocks++; + } + return 1; +} diff --git a/lib/Makefile.am b/lib/Makefile.am new file mode 100644 index 00000000..46e3a4a9 --- /dev/null +++ b/lib/Makefile.am @@ -0,0 +1,40 @@ +## +## Copyright (C) 2004-2007 Free Software Foundation, Inc. +## Copyright (C) 2007 Lasse Collin +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## + +## Not using gnulib-tool, at least for now. Less mess this way. + +## We need two builds of libgnu: one with NLS and one without. +## This is because lzma uses NLS but lzmadec doesn't, while +## both need GNU getopt_long(). +noinst_LIBRARIES = libgnu.a libgnu_nls.a + +libgnu_a_SOURCES = +libgnu_a_DEPENDENCIES = $(LIBOBJS) +libgnu_a_LIBADD = $(LIBOBJS) +libgnu_a_CPPFLAGS = -DDISABLE_NLS=1 + +libgnu_nls_a_SOURCES = +libgnu_nls_a_DEPENDENCIES = $(LIBOBJS) +libgnu_nls_a_LIBADD = $(LIBOBJS) + +EXTRA_DIST = gettext.h getopt_.h getopt.c getopt1.c getopt_int.h +BUILT_SOURCES = $(GETOPT_H) +MOSTLYCLEANFILES = getopt.h getopt.h-t + +getopt.h: getopt_.h + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ + cat $(srcdir)/getopt_.h; \ + } > $@-t + mv -f $@-t $@ diff --git a/lib/getopt.c b/lib/getopt.c new file mode 100644 index 00000000..3580ad82 --- /dev/null +++ b/lib/getopt.c @@ -0,0 +1,1191 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002,2003,2004,2006 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _LIBC +# include <config.h> +#endif + +#include "getopt.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#ifdef __VMS +# include <unixlib.h> +#endif + +#ifdef _LIBC +# include <libintl.h> +#else +# include "gettext.h" +# define _(msgid) gettext (msgid) +#endif + +#if defined _LIBC && defined USE_IN_LIBIO +# include <wchar.h> +#endif + +#ifndef attribute_hidden +# define attribute_hidden +#endif + +/* Unlike standard Unix `getopt', functions like `getopt_long' + let the user intersperse the options with the other arguments. + + As `getopt_long' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Using `getopt' or setting the environment variable POSIXLY_CORRECT + disables permutation. + Then the application's behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt_int.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Keep a global copy of all internal members of getopt_data. */ + +static struct _getopt_data getopt_data; + + +#if defined HAVE_DECL_GETENV && !HAVE_DECL_GETENV +extern char *getenv (); +#endif + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (d->__nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +static void +exchange (char **argv, struct _getopt_data *d) +{ + int bottom = d->__first_nonopt; + int middle = d->__last_nonopt; + int top = d->optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (d->__nonoption_flags_len > 0 && top >= d->__nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + d->__nonoption_flags_len = d->__nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + d->__nonoption_flags_max_len), + '\0', top + 1 - d->__nonoption_flags_max_len); + d->__nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + d->__first_nonopt += (d->optind - d->__last_nonopt); + d->__last_nonopt = d->optind; +} + +/* Initialize the internal data when the first call is made. */ + +static const char * +_getopt_initialize (int argc, char **argv, const char *optstring, + int posixly_correct, struct _getopt_data *d) +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + d->__first_nonopt = d->__last_nonopt = d->optind; + + d->__nextchar = NULL; + + d->__posixly_correct = posixly_correct || !!getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + d->__ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + d->__ordering = REQUIRE_ORDER; + ++optstring; + } + else if (d->__posixly_correct) + d->__ordering = REQUIRE_ORDER; + else + d->__ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (!d->__posixly_correct + && argc == __libc_argc && argv == __libc_argv) + { + if (d->__nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + d->__nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = d->__nonoption_flags_max_len = strlen (orig_str); + if (d->__nonoption_flags_max_len < argc) + d->__nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (d->__nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + d->__nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', d->__nonoption_flags_max_len - len); + } + } + d->__nonoption_flags_len = d->__nonoption_flags_max_len; + } + else + d->__nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. + + If POSIXLY_CORRECT is nonzero, behave as if the POSIXLY_CORRECT + environment variable were set. */ + +int +_getopt_internal_r (int argc, char **argv, const char *optstring, + const struct option *longopts, int *longind, + int long_only, int posixly_correct, struct _getopt_data *d) +{ + int print_errors = d->opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + d->optarg = NULL; + + if (d->optind == 0 || !d->__initialized) + { + if (d->optind == 0) + d->optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring, + posixly_correct, d); + d->__initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0' \ + || (d->optind < d->__nonoption_flags_len \ + && __getopt_nonoption_flags[d->optind] == '1')) +#else +# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0') +#endif + + if (d->__nextchar == NULL || *d->__nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (d->__last_nonopt > d->optind) + d->__last_nonopt = d->optind; + if (d->__first_nonopt > d->optind) + d->__first_nonopt = d->optind; + + if (d->__ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (d->__first_nonopt != d->__last_nonopt + && d->__last_nonopt != d->optind) + exchange ((char **) argv, d); + else if (d->__last_nonopt != d->optind) + d->__first_nonopt = d->optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (d->optind < argc && NONOPTION_P) + d->optind++; + d->__last_nonopt = d->optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (d->optind != argc && !strcmp (argv[d->optind], "--")) + { + d->optind++; + + if (d->__first_nonopt != d->__last_nonopt + && d->__last_nonopt != d->optind) + exchange ((char **) argv, d); + else if (d->__first_nonopt == d->__last_nonopt) + d->__first_nonopt = d->optind; + d->__last_nonopt = argc; + + d->optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (d->optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (d->__first_nonopt != d->__last_nonopt) + d->optind = d->__first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (d->__ordering == REQUIRE_ORDER) + return -1; + d->optarg = argv[d->optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + d->__nextchar = (argv[d->optind] + 1 + + (longopts != NULL && argv[d->optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[d->optind][1] == '-' + || (long_only && (argv[d->optind][2] + || !strchr (optstring, argv[d->optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar)) + { + if ((unsigned int) (nameend - d->__nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[d->optind]) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[d->optind]); +#endif + } + d->__nextchar += strlen (d->__nextchar); + d->optind++; + d->optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + d->optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + d->optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + int n; +#endif + + if (argv[d->optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[d->optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[d->optind - 1][0], + pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (n >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 + |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#endif + } + + d->__nextchar += strlen (d->__nextchar); + + d->optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (d->optind < argc) + d->optarg = argv[d->optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[d->optind - 1]) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 + |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[d->optind - 1]); +#endif + } + d->__nextchar += strlen (d->__nextchar); + d->optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + d->__nextchar += strlen (d->__nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[d->optind][1] == '-' + || strchr (optstring, *d->__nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + int n; +#endif + + if (argv[d->optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], d->__nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], d->__nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[d->optind][0], d->__nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[d->optind][0], d->__nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (n >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#endif + } + d->__nextchar = (char *) ""; + d->optind++; + d->optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *d->__nextchar++; + char *temp = strchr (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*d->__nextchar == '\0') + ++d->optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + int n; +#endif + + if (d->__posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (n >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#endif + } + d->optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + d->optind++; + } + else if (d->optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + d->optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `d->optind' once; + increment it again when taking next ARGV-elt as argument. */ + d->optarg = argv[d->optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '='; + nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar)) + { + if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[d->optind]) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[d->optind]); +#endif + } + d->__nextchar += strlen (d->__nextchar); + d->optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + d->optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 + |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + d->__nextchar += strlen (d->__nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (d->optind < argc) + d->optarg = argv[d->optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[d->optind - 1]) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 + |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[d->optind - 1]); +#endif + } + d->__nextchar += strlen (d->__nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + d->__nextchar += strlen (d->__nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + d->__nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + d->optind++; + } + else + d->optarg = NULL; + d->__nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + d->optind++; + } + else if (d->optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("\ +%s: option requires an argument -- %c\n"), + argv[0], c) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + d->optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + d->optarg = argv[d->optind++]; + d->__nextchar = NULL; + } + } + return c; + } +} + +int +_getopt_internal (int argc, char **argv, const char *optstring, + const struct option *longopts, int *longind, + int long_only, int posixly_correct) +{ + int result; + + getopt_data.optind = optind; + getopt_data.opterr = opterr; + + result = _getopt_internal_r (argc, argv, optstring, longopts, longind, + long_only, posixly_correct, &getopt_data); + + optind = getopt_data.optind; + optarg = getopt_data.optarg; + optopt = getopt_data.optopt; + + return result; +} + +/* glibc gets a LSB-compliant getopt. + Standalone applications get a POSIX-compliant getopt. */ +#if _LIBC +enum { POSIXLY_CORRECT = 0 }; +#else +enum { POSIXLY_CORRECT = 1 }; +#endif + +int +getopt (int argc, char *const *argv, const char *optstring) +{ + return _getopt_internal (argc, (char **) argv, optstring, NULL, NULL, 0, + POSIXLY_CORRECT); +} + + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (int argc, char **argv) +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == -1) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/lib/getopt1.c b/lib/getopt1.c new file mode 100644 index 00000000..cc0746ea --- /dev/null +++ b/lib/getopt1.c @@ -0,0 +1,171 @@ +/* getopt_long and getopt_long_only entry points for GNU getopt. + Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98,2004,2006 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifdef _LIBC +# include <getopt.h> +#else +# include <config.h> +# include "getopt.h" +#endif +#include "getopt_int.h" + +#include <stdio.h> + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +#include <stdlib.h> +#endif + +#ifndef NULL +#define NULL 0 +#endif + +int +getopt_long (int argc, char *__getopt_argv_const *argv, const char *options, + const struct option *long_options, int *opt_index) +{ + return _getopt_internal (argc, (char **) argv, options, long_options, + opt_index, 0, 0); +} + +int +_getopt_long_r (int argc, char **argv, const char *options, + const struct option *long_options, int *opt_index, + struct _getopt_data *d) +{ + return _getopt_internal_r (argc, argv, options, long_options, opt_index, + 0, 0, d); +} + +/* Like getopt_long, but '-' as well as '--' can indicate a long option. + If an option that starts with '-' (not '--') doesn't match a long option, + but does match a short option, it is parsed as a short option + instead. */ + +int +getopt_long_only (int argc, char *__getopt_argv_const *argv, + const char *options, + const struct option *long_options, int *opt_index) +{ + return _getopt_internal (argc, (char **) argv, options, long_options, + opt_index, 1, 0); +} + +int +_getopt_long_only_r (int argc, char **argv, const char *options, + const struct option *long_options, int *opt_index, + struct _getopt_data *d) +{ + return _getopt_internal_r (argc, argv, options, long_options, opt_index, + 1, 0, d); +} + + +#ifdef TEST + +#include <stdio.h> + +int +main (int argc, char **argv) +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + int option_index = 0; + static struct option long_options[] = + { + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:0123456789", + long_options, &option_index); + if (c == -1) + break; + + switch (c) + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case 'd': + printf ("option d with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/lib/getopt_.h b/lib/getopt_.h new file mode 100644 index 00000000..615ef9a3 --- /dev/null +++ b/lib/getopt_.h @@ -0,0 +1,226 @@ +/* Declarations for getopt. + Copyright (C) 1989-1994,1996-1999,2001,2003,2004,2005,2006,2007 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* Standalone applications should #define __GETOPT_PREFIX to an + identifier that prefixes the external functions and variables + defined in this header. When this happens, include the + headers that might declare getopt so that they will not cause + confusion if included after this file. Then systematically rename + identifiers so that they do not collide with the system functions + and variables. Renaming avoids problems with some compilers and + linkers. */ +#if defined __GETOPT_PREFIX && !defined __need_getopt +# include <stdlib.h> +# include <stdio.h> +# include <unistd.h> +# undef __need_getopt +# undef getopt +# undef getopt_long +# undef getopt_long_only +# undef optarg +# undef opterr +# undef optind +# undef optopt +# define __GETOPT_CONCAT(x, y) x ## y +# define __GETOPT_XCONCAT(x, y) __GETOPT_CONCAT (x, y) +# define __GETOPT_ID(y) __GETOPT_XCONCAT (__GETOPT_PREFIX, y) +# define getopt __GETOPT_ID (getopt) +# define getopt_long __GETOPT_ID (getopt_long) +# define getopt_long_only __GETOPT_ID (getopt_long_only) +# define optarg __GETOPT_ID (optarg) +# define opterr __GETOPT_ID (opterr) +# define optind __GETOPT_ID (optind) +# define optopt __GETOPT_ID (optopt) +#endif + +/* Standalone applications get correct prototypes for getopt_long and + getopt_long_only; they declare "char **argv". libc uses prototypes + with "char *const *argv" that are incorrect because getopt_long and + getopt_long_only can permute argv; this is required for backward + compatibility (e.g., for LSB 2.0.1). + + This used to be `#if defined __GETOPT_PREFIX && !defined __need_getopt', + but it caused redefinition warnings if both unistd.h and getopt.h were + included, since unistd.h includes getopt.h having previously defined + __need_getopt. + + The only place where __getopt_argv_const is used is in definitions + of getopt_long and getopt_long_only below, but these are visible + only if __need_getopt is not defined, so it is quite safe to rewrite + the conditional as follows: +*/ +#if !defined __need_getopt +# if defined __GETOPT_PREFIX +# define __getopt_argv_const /* empty */ +# else +# define __getopt_argv_const const +# endif +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include <features.h>, but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include <ctype.h>, which will pull in <features.h> for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include <ctype.h> +#endif + +#ifndef __THROW +# ifndef __GNUC_PREREQ +# define __GNUC_PREREQ(maj, min) (0) +# endif +# if defined __cplusplus && __GNUC_PREREQ (2,8) +# define __THROW throw () +# else +# define __THROW +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ + const char *name; + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `-', then non-option arguments are treated as + arguments to the option '\1'. This behavior is specific to the GNU + `getopt'. If OPTS begins with `+', or POSIXLY_CORRECT is set in + the environment, then do not permute arguments. */ + +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts) + __THROW; + +#ifndef __need_getopt +extern int getopt_long (int ___argc, char *__getopt_argv_const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind) + __THROW; +extern int getopt_long_only (int ___argc, char *__getopt_argv_const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind) + __THROW; + +#endif + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ diff --git a/lib/getopt_int.h b/lib/getopt_int.h new file mode 100644 index 00000000..401579fd --- /dev/null +++ b/lib/getopt_int.h @@ -0,0 +1,131 @@ +/* Internal declarations for getopt. + Copyright (C) 1989-1994,1996-1999,2001,2003,2004 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _GETOPT_INT_H +#define _GETOPT_INT_H 1 + +extern int _getopt_internal (int ___argc, char **___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only, int __posixly_correct); + + +/* Reentrant versions which can handle parsing multiple argument + vectors at the same time. */ + +/* Data type for reentrant functions. */ +struct _getopt_data +{ + /* These have exactly the same meaning as the corresponding global + variables, except that they are used for the reentrant + versions of getopt. */ + int optind; + int opterr; + int optopt; + char *optarg; + + /* Internal members. */ + + /* True if the internal members have been initialized. */ + int __initialized; + + /* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + char *__nextchar; + + /* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters, or by calling getopt. + + PERMUTE is the default. We permute the contents of ARGV as we + scan, so that eventually all the non-options are at the end. + This allows options to be given in any order, even with programs + that were not written to expect this. + + RETURN_IN_ORDER is an option available to programs that were + written to expect options and other ARGV-elements in any order + and that care about the ordering of the two. We describe each + non-option ARGV-element as if it were the argument of an option + with character code 1. Using `-' as the first character of the + list of option characters selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + + enum + { + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER + } __ordering; + + /* If the POSIXLY_CORRECT environment variable is set + or getopt was called. */ + int __posixly_correct; + + + /* Handle permutation of arguments. */ + + /* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first + of them; `last_nonopt' is the index after the last of them. */ + + int __first_nonopt; + int __last_nonopt; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + int __nonoption_flags_max_len; + int __nonoption_flags_len; +# endif +}; + +/* The initializer is necessary to set OPTIND and OPTERR to their + default values and to clear the initialization flag. */ +#define _GETOPT_DATA_INITIALIZER { 1, 1 } + +extern int _getopt_internal_r (int ___argc, char **___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only, int __posixly_correct, + struct _getopt_data *__data); + +extern int _getopt_long_r (int ___argc, char **___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + struct _getopt_data *__data); + +extern int _getopt_long_only_r (int ___argc, char **___argv, + const char *__shortopts, + const struct option *__longopts, + int *__longind, + struct _getopt_data *__data); + +#endif /* getopt_int.h */ diff --git a/lib/gettext.h b/lib/gettext.h new file mode 100644 index 00000000..b6282e54 --- /dev/null +++ b/lib/gettext.h @@ -0,0 +1,240 @@ +/* Convenience header for conditional use of GNU <libintl.h>. + Copyright (C) 1995-1998, 2000-2002, 2004-2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#ifndef _LIBGETTEXT_H +#define _LIBGETTEXT_H 1 + +/* NLS can be disabled through the configure --disable-nls option. + * + * Extra hack in LZMA Utils: if DISABLE_NLS is defined, NLS is disabled + * even if ENABLE_NLS is true. See Makefile.am for more information. + */ +#if ENABLE_NLS && !defined(DISABLE_NLS) + +/* Get declarations of GNU message catalog functions. */ +# include <libintl.h> + +/* You can set the DEFAULT_TEXT_DOMAIN macro to specify the domain used by + the gettext() and ngettext() macros. This is an alternative to calling + textdomain(), and is useful for libraries. */ +# ifdef DEFAULT_TEXT_DOMAIN +# undef gettext +# define gettext(Msgid) \ + dgettext (DEFAULT_TEXT_DOMAIN, Msgid) +# undef ngettext +# define ngettext(Msgid1, Msgid2, N) \ + dngettext (DEFAULT_TEXT_DOMAIN, Msgid1, Msgid2, N) +# endif + +#else + +/* Solaris /usr/include/locale.h includes /usr/include/libintl.h, which + chokes if dcgettext is defined as a macro. So include it now, to make + later inclusions of <locale.h> a NOP. We don't include <libintl.h> + as well because people using "gettext.h" will not include <libintl.h>, + and also including <libintl.h> would fail on SunOS 4, whereas <locale.h> + is OK. */ +#if defined(__sun) +# include <locale.h> +#endif + +/* Many header files from the libstdc++ coming with g++ 3.3 or newer include + <libintl.h>, which chokes if dcgettext is defined as a macro. So include + it now, to make later inclusions of <libintl.h> a NOP. */ +#if defined(__cplusplus) && defined(__GNUG__) && (__GNUC__ >= 3) +# include <cstdlib> +# if (__GLIBC__ >= 2) || _GLIBCXX_HAVE_LIBINTL_H +# include <libintl.h> +# endif +#endif + +/* Disabled NLS. + The casts to 'const char *' serve the purpose of producing warnings + for invalid uses of the value returned from these functions. + On pre-ANSI systems without 'const', the config.h file is supposed to + contain "#define const". */ +# define gettext(Msgid) ((const char *) (Msgid)) +# define dgettext(Domainname, Msgid) ((const char *) (Msgid)) +# define dcgettext(Domainname, Msgid, Category) ((const char *) (Msgid)) +# define ngettext(Msgid1, Msgid2, N) \ + ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2)) +# define dngettext(Domainname, Msgid1, Msgid2, N) \ + ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2)) +# define dcngettext(Domainname, Msgid1, Msgid2, N, Category) \ + ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2)) +# define textdomain(Domainname) ((const char *) (Domainname)) +# define bindtextdomain(Domainname, Dirname) ((const char *) (Dirname)) +# define bind_textdomain_codeset(Domainname, Codeset) ((const char *) (Codeset)) + +#endif + +/* A pseudo function call that serves as a marker for the automated + extraction of messages, but does not call gettext(). The run-time + translation is done at a different place in the code. + The argument, String, should be a literal string. Concatenated strings + and other string expressions won't work. + The macro's expansion is not parenthesized, so that it is suitable as + initializer for static 'char[]' or 'const char[]' variables. */ +#define gettext_noop(String) String + +/* The separator between msgctxt and msgid in a .mo file. */ +#define GETTEXT_CONTEXT_GLUE "\004" + +/* Pseudo function calls, taking a MSGCTXT and a MSGID instead of just a + MSGID. MSGCTXT and MSGID must be string literals. MSGCTXT should be + short and rarely need to change. + The letter 'p' stands for 'particular' or 'special'. */ +#ifdef DEFAULT_TEXT_DOMAIN +# define pgettext(Msgctxt, Msgid) \ + pgettext_aux (DEFAULT_TEXT_DOMAIN, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES) +#else +# define pgettext(Msgctxt, Msgid) \ + pgettext_aux (NULL, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES) +#endif +#define dpgettext(Domainname, Msgctxt, Msgid) \ + pgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES) +#define dcpgettext(Domainname, Msgctxt, Msgid, Category) \ + pgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, Category) +#ifdef DEFAULT_TEXT_DOMAIN +# define npgettext(Msgctxt, Msgid, MsgidPlural, N) \ + npgettext_aux (DEFAULT_TEXT_DOMAIN, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES) +#else +# define npgettext(Msgctxt, Msgid, MsgidPlural, N) \ + npgettext_aux (NULL, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES) +#endif +#define dnpgettext(Domainname, Msgctxt, Msgid, MsgidPlural, N) \ + npgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES) +#define dcnpgettext(Domainname, Msgctxt, Msgid, MsgidPlural, N, Category) \ + npgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, Category) + +static inline const char * +pgettext_aux (const char *domain, + const char *msg_ctxt_id, const char *msgid, + int category) +{ + const char *translation = dcgettext (domain, msg_ctxt_id, category); + if (translation == msg_ctxt_id) + return msgid; + else + return translation; +} + +static inline const char * +npgettext_aux (const char *domain, + const char *msg_ctxt_id, const char *msgid, + const char *msgid_plural, unsigned long int n, + int category) +{ + const char *translation = + dcngettext (domain, msg_ctxt_id, msgid_plural, n, category); + if (translation == msg_ctxt_id || translation == msgid_plural) + return (n == 1 ? msgid : msgid_plural); + else + return translation; +} + +/* The same thing extended for non-constant arguments. Here MSGCTXT and MSGID + can be arbitrary expressions. But for string literals these macros are + less efficient than those above. */ + +#include <string.h> + +#define _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS 1 + +#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS +#include <stdlib.h> +#endif + +#define pgettext_expr(Msgctxt, Msgid) \ + dcpgettext_expr (NULL, Msgctxt, Msgid, LC_MESSAGES) +#define dpgettext_expr(Domainname, Msgctxt, Msgid) \ + dcpgettext_expr (Domainname, Msgctxt, Msgid, LC_MESSAGES) + +static inline const char * +dcpgettext_expr (const char *domain, + const char *msgctxt, const char *msgid, + int category) +{ + size_t msgctxt_len = strlen (msgctxt) + 1; + size_t msgid_len = strlen (msgid) + 1; + const char *translation; +#if _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS + char msg_ctxt_id[msgctxt_len + msgid_len]; +#else + char buf[1024]; + char *msg_ctxt_id = + (msgctxt_len + msgid_len <= sizeof (buf) + ? buf + : (char *) malloc (msgctxt_len + msgid_len)); + if (msg_ctxt_id != NULL) +#endif + { + memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1); + msg_ctxt_id[msgctxt_len - 1] = '\004'; + memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len); + translation = dcgettext (domain, msg_ctxt_id, category); +#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS + if (msg_ctxt_id != buf) + free (msg_ctxt_id); +#endif + if (translation != msg_ctxt_id) + return translation; + } + return msgid; +} + +#define npgettext_expr(Msgctxt, Msgid, MsgidPlural, N) \ + dcnpgettext_expr (NULL, Msgctxt, Msgid, MsgidPlural, N, LC_MESSAGES) +#define dnpgettext_expr(Domainname, Msgctxt, Msgid, MsgidPlural, N) \ + dcnpgettext_expr (Domainname, Msgctxt, Msgid, MsgidPlural, N, LC_MESSAGES) + +static inline const char * +dcnpgettext_expr (const char *domain, + const char *msgctxt, const char *msgid, + const char *msgid_plural, unsigned long int n, + int category) +{ + size_t msgctxt_len = strlen (msgctxt) + 1; + size_t msgid_len = strlen (msgid) + 1; + const char *translation; +#if _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS + char msg_ctxt_id[msgctxt_len + msgid_len]; +#else + char buf[1024]; + char *msg_ctxt_id = + (msgctxt_len + msgid_len <= sizeof (buf) + ? buf + : (char *) malloc (msgctxt_len + msgid_len)); + if (msg_ctxt_id != NULL) +#endif + { + memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1); + msg_ctxt_id[msgctxt_len - 1] = '\004'; + memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len); + translation = dcngettext (domain, msg_ctxt_id, msgid_plural, n, category); +#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS + if (msg_ctxt_id != buf) + free (msg_ctxt_id); +#endif + if (!(translation == msg_ctxt_id || translation == msgid_plural)) + return translation; + } + return (n == 1 ? msgid : msgid_plural); +} + +#endif /* _LIBGETTEXT_H */ diff --git a/m4/acx_pthread.m4 b/m4/acx_pthread.m4 new file mode 100644 index 00000000..d2b11694 --- /dev/null +++ b/m4/acx_pthread.m4 @@ -0,0 +1,279 @@ +##### http://autoconf-archive.cryp.to/acx_pthread.html +# +# SYNOPSIS +# +# ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro figures out how to build C programs using POSIX threads. +# It sets the PTHREAD_LIBS output variable to the threads library and +# linker flags, and the PTHREAD_CFLAGS output variable to any special +# C compiler flags that are needed. (The user can also force certain +# compiler flags/libs to be tested by setting these environment +# variables.) +# +# Also sets PTHREAD_CC to any special C compiler that is needed for +# multi-threaded programs (defaults to the value of CC otherwise). +# (This is necessary on AIX to use the special cc_r compiler alias.) +# +# NOTE: You are assumed to not only compile your program with these +# flags, but also link it with them as well. e.g. you should link +# with $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS +# $LIBS +# +# If you are only building threads programs, you may wish to use +# these variables in your default LIBS, CFLAGS, and CC: +# +# LIBS="$PTHREAD_LIBS $LIBS" +# CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +# CC="$PTHREAD_CC" +# +# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute +# constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to +# that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +# +# ACTION-IF-FOUND is a list of shell commands to run if a threads +# library is found, and ACTION-IF-NOT-FOUND is a list of commands to +# run it if it is not found. If ACTION-IF-FOUND is not specified, the +# default action will define HAVE_PTHREAD. +# +# Please let the authors know if this macro fails on any platform, or +# if you have any other suggestions or comments. This macro was based +# on work by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) +# (with help from M. Frigo), as well as ac_pthread and hb_pthread +# macros posted by Alejandro Forero Cuervo to the autoconf macro +# repository. We are also grateful for the helpful feedback of +# numerous users. +# +# LAST MODIFICATION +# +# 2007-07-29 +# +# COPYLEFT +# +# Copyright (c) 2007 Steven G. Johnson <stevenj@alum.mit.edu> +# +# This program is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/>. +# +# As a special exception, the respective Autoconf Macro's copyright +# owner gives unlimited permission to copy, distribute and modify the +# configure scripts that are the output of Autoconf when processing +# the Macro. You need not follow the terms of the GNU General Public +# License when using or distributing such scripts, even though +# portions of the text of the Macro appear in them. The GNU General +# Public License (GPL) does govern all other use of the material that +# constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the +# Autoconf Macro released by the Autoconf Macro Archive. When you +# make and distribute a modified version of the Autoconf Macro, you +# may extend this special exception to the GPL to apply to your +# modified version as well. + +AC_DEFUN([ACX_PTHREAD], [ +AC_REQUIRE([AC_CANONICAL_HOST]) +AC_LANG_SAVE +AC_LANG_C +acx_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on True64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) + AC_TRY_LINK_FUNC(pthread_join, acx_pthread_ok=yes) + AC_MSG_RESULT($acx_pthread_ok) + if test x"$acx_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case "${host_cpu}-${host_os}" in + *solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags" + ;; +esac + +if test x"$acx_pthread_ok" = xno; then +for flag in $acx_pthread_flags; do + + case $flag in + none) + AC_MSG_CHECKING([whether pthreads work without any flags]) + ;; + + -*) + AC_MSG_CHECKING([whether pthreads work with $flag]) + PTHREAD_CFLAGS="$flag" + ;; + + pthread-config) + AC_CHECK_PROG(acx_pthread_config, pthread-config, yes, no) + if test x"$acx_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + AC_MSG_CHECKING([for the pthreads library -l$flag]) + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + AC_TRY_LINK([#include <pthread.h>], + [pthread_t th; pthread_join(th, 0); + pthread_attr_init(0); pthread_cleanup_push(0, 0); + pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], + [acx_pthread_ok=yes]) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + AC_MSG_RESULT($acx_pthread_ok) + if test "x$acx_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$acx_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + AC_MSG_CHECKING([for joinable pthread attribute]) + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + AC_TRY_LINK([#include <pthread.h>], [int attr=$attr; return attr;], + [attr_name=$attr; break]) + done + AC_MSG_RESULT($attr_name) + if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then + AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name, + [Define to necessary symbol if this constant + uses a non-standard name on your system.]) + fi + + AC_MSG_CHECKING([if more special flags are required for pthreads]) + flag=no + case "${host_cpu}-${host_os}" in + *-aix* | *-freebsd* | *-darwin*) flag="-D_THREAD_SAFE";; + *solaris* | *-osf* | *-hpux*) flag="-D_REENTRANT";; + esac + AC_MSG_RESULT(${flag}) + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: must compile with xlc_r or cc_r + if test x"$GCC" != xyes; then + AC_CHECK_PROGS(PTHREAD_CC, xlc_r cc_r, ${CC}) + else + PTHREAD_CC=$CC + fi +else + PTHREAD_CC="$CC" +fi + +AC_SUBST(PTHREAD_LIBS) +AC_SUBST(PTHREAD_CFLAGS) +AC_SUBST(PTHREAD_CC) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$acx_pthread_ok" = xyes; then + ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) + : +else + acx_pthread_ok=no + $2 +fi +AC_LANG_RESTORE +])dnl ACX_PTHREAD diff --git a/m4/getopt.m4 b/m4/getopt.m4 new file mode 100644 index 00000000..c0a73b2c --- /dev/null +++ b/m4/getopt.m4 @@ -0,0 +1,83 @@ +# getopt.m4 serial 13 +dnl Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +# The getopt module assume you want GNU getopt, with getopt_long etc, +# rather than vanilla POSIX getopt. This means your code should +# always include <getopt.h> for the getopt prototypes. + +AC_DEFUN([gl_GETOPT_SUBSTITUTE], +[ + AC_LIBOBJ([getopt]) + AC_LIBOBJ([getopt1]) + gl_GETOPT_SUBSTITUTE_HEADER + gl_PREREQ_GETOPT +]) + +AC_DEFUN([gl_GETOPT_SUBSTITUTE_HEADER], +[ + GETOPT_H=getopt.h + AC_DEFINE([__GETOPT_PREFIX], [[rpl_]], + [Define to rpl_ if the getopt replacement functions and variables + should be used.]) + AC_SUBST([GETOPT_H]) +]) + +AC_DEFUN([gl_GETOPT_CHECK_HEADERS], +[ + if test -z "$GETOPT_H"; then + AC_CHECK_HEADERS([getopt.h], [], [GETOPT_H=getopt.h]) + fi + + if test -z "$GETOPT_H"; then + AC_CHECK_FUNCS([getopt_long_only], [], [GETOPT_H=getopt.h]) + fi + + dnl BSD getopt_long uses an incompatible method to reset option processing, + dnl and (as of 2004-10-15) mishandles optional option-arguments. + if test -z "$GETOPT_H"; then + AC_CHECK_DECL([optreset], [GETOPT_H=getopt.h], [], [#include <getopt.h>]) + fi + + dnl Solaris 10 getopt doesn't handle `+' as a leading character in an + dnl option string (as of 2005-05-05). + if test -z "$GETOPT_H"; then + AC_CACHE_CHECK([for working GNU getopt function], [gl_cv_func_gnu_getopt], + [AC_RUN_IFELSE( + [AC_LANG_PROGRAM([#include <getopt.h>], + [[ + char *myargv[3]; + myargv[0] = "conftest"; + myargv[1] = "-+"; + myargv[2] = 0; + return getopt (2, myargv, "+a") != '?'; + ]])], + [gl_cv_func_gnu_getopt=yes], + [gl_cv_func_gnu_getopt=no], + [dnl cross compiling - pessimistically guess based on decls + dnl Solaris 10 getopt doesn't handle `+' as a leading character in an + dnl option string (as of 2005-05-05). + AC_CHECK_DECL([getopt_clip], + [gl_cv_func_gnu_getopt=no], [gl_cv_func_gnu_getopt=yes], + [#include <getopt.h>])])]) + if test "$gl_cv_func_gnu_getopt" = "no"; then + GETOPT_H=getopt.h + fi + fi +]) + +AC_DEFUN([gl_GETOPT_IFELSE], +[ + AC_REQUIRE([gl_GETOPT_CHECK_HEADERS]) + AS_IF([test -n "$GETOPT_H"], [$1], [$2]) +]) + +AC_DEFUN([gl_GETOPT], [gl_GETOPT_IFELSE([gl_GETOPT_SUBSTITUTE])]) + +# Prerequisites of lib/getopt*. +AC_DEFUN([gl_PREREQ_GETOPT], +[ + AC_CHECK_DECLS_ONCE([getenv]) +]) diff --git a/po/LINGUAS b/po/LINGUAS new file mode 100644 index 00000000..e133fade --- /dev/null +++ b/po/LINGUAS @@ -0,0 +1 @@ +fi diff --git a/po/Makevars b/po/Makevars new file mode 100644 index 00000000..dc19bc96 --- /dev/null +++ b/po/Makevars @@ -0,0 +1,46 @@ +# Makefile variables for PO directory in any package using GNU gettext. + +# Usually the message domain is the same as the package name. +DOMAIN = $(PACKAGE) + +# These two variables depend on the location of this directory. +subdir = po +top_builddir = .. + +# These options get passed to xgettext. +XGETTEXT_OPTIONS = --keyword=_ --keyword=N_ + +# This is the copyright holder that gets inserted into the header of the +# $(DOMAIN).pot file. Set this to the copyright holder of the surrounding +# package. (Note that the msgstr strings, extracted from the package's +# sources, belong to the copyright holder of the package.) Translators are +# expected to transfer the copyright for their translations to this person +# or entity, or to disclaim their copyright. The empty string stands for +# the public domain; in this case the translators are expected to disclaim +# their copyright. +COPYRIGHT_HOLDER = + +# This is the email address or URL to which the translators shall report +# bugs in the untranslated strings: +# - Strings which are not entire sentences, see the maintainer guidelines +# in the GNU gettext documentation, section 'Preparing Strings'. +# - Strings which use unclear terms or require additional context to be +# understood. +# - Strings which make invalid assumptions about notation of date, time or +# money. +# - Pluralisation problems. +# - Incorrect English spelling. +# - Incorrect formatting. +# It can be your email address, or a mailing list address where translators +# can write to without being subscribed, or the URL of a web page through +# which the translators can contact you. +MSGID_BUGS_ADDRESS = + +# This is the list of locale categories, beyond LC_MESSAGES, for which the +# message catalogs shall be used. It is usually empty. +EXTRA_LOCALE_CATEGORIES = + +# Although you may need slightly wider terminal than 80 chars, it is +# much nicer to edit the output of --help when this is set. +XGETTEXT_OPTIONS += --no-wrap +MSGMERGE += --no-wrap diff --git a/po/POTFILES.in b/po/POTFILES.in new file mode 100644 index 00000000..72f744a9 --- /dev/null +++ b/po/POTFILES.in @@ -0,0 +1,13 @@ +# List of source files which contain translatable strings. +src/lzma/help.c +src/lzma/alloc.c +src/lzma/args.c +src/lzma/error.c +src/lzma/io.c +src/lzma/list.c +src/lzma/main.c +src/lzma/options.c +src/lzma/process.c +src/lzma/suffix.c +src/lzma/util.c +lib/getopt.c diff --git a/po/fi.po b/po/fi.po new file mode 100644 index 00000000..cb19b3e6 --- /dev/null +++ b/po/fi.po @@ -0,0 +1,445 @@ +# translation of fi.po to Finnish +# This file is put in the public domain. +# +msgid "" +msgstr "" +"Project-Id-Version: fi\n" +"Report-Msgid-Bugs-To: lasse.collin@tukaani.org\n" +"POT-Creation-Date: 2007-12-01 18:36+0200\n" +"PO-Revision-Date: 2007-08-09 22:14+0300\n" +"Last-Translator: Lasse Collin <lasse.collin@tukaani.org>\n" +"Language-Team: Finnish\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: src/lzma/help.c:28 +#, c-format +msgid "Try `%s --help' for more information." +msgstr "Lisätietoja saa komennolla \"%s --help\"." + +#: src/lzma/help.c:36 +#, c-format +msgid "" +"Usage: %s [OPTION]... [FILE]...\n" +"Compress or decompress FILEs in the .lzma format.\n" +"\n" +msgstr "" +"Käyttö: %s [VALITSIN]... [TIEDOSTO]...\n" +"Pakkaa tai purkaa .lzma-muotoiset TIEDOSTOt\n" +"\n" + +#: src/lzma/help.c:40 +msgid "Mandatory arguments to long options are mandatory for short options too.\n" +msgstr "Pitkien valitsinten pakolliset argumentit ovat pakollisia myös lyhyille.\n" + +#: src/lzma/help.c:44 +msgid "" +" Operation mode:\n" +"\n" +" -z, --compress force compression\n" +" -d, --decompress force decompression\n" +" -t, --test test compressed file integrity\n" +" -l, --list list block sizes, total sizes, and possible metadata\n" +msgstr "" + +#: src/lzma/help.c:53 +msgid "" +" Operation modifiers:\n" +"\n" +" -k, --keep keep (don't delete) input files\n" +" -f, --force force overwrite of output file and (de)compress links\n" +" -c, --stdout write to standard output and don't delete input files\n" +" -S, --suffix=.SUF use suffix `.SUF' on compressed files instead of `.lzma'\n" +" -F, --format=FMT file format to encode or decode; possible values are\n" +" `auto', `native', `single', `multi', and `alone'\n" +" --files=[FILE] read filenames to process from FILE; if FILE is\n" +" omitted, filenames are read from the standard input;\n" +" filenames must be terminated with the newline character\n" +" --files0=[FILE] like --files but use the nul byte as terminator\n" +msgstr "" + +#: src/lzma/help.c:68 +msgid "" +" Compression presets and basic compression options:\n" +"\n" +" -1 .. -2 fast compression\n" +" -3 .. -6 good compression; default is -6 if memory limit allows\n" +" -7 .. -9 excellent compression, but needs a lot of memory\n" +"\n" +" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n" +" or `sha256'\n" +msgstr "" + +#: src/lzma/help.c:79 +msgid "" +" Custom filter chain for compression (alternative for using presets):\n" +"\n" +" --lzma=[OPTS] LZMA filter; OPTS is a comma-separated list of zero or\n" +" more of the following options (valid values; default):\n" +" dict=NUM dictionary size in bytes (1 - 1Gi; 8Mi)\n" +" lc=NUM number of literal context bits (0-8; 3)\n" +" lp=NUM number of literal position bits (0-4; 0)\n" +" pb=NUM number of position bits (0-4; 2)\n" +" mode=MODE compression mode (`fast' or `best'; `best')\n" +" fb=NUM number of fast bytes (5-273; 128)\n" +" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" +" mfc=NUM match finder cycles; 0=automatic (default)\n" +"\n" +" --x86 x86 filter (sometimes called BCJ filter)\n" +" --powerpc PowerPC (big endian) filter\n" +" --ia64 IA64 (Itanium) filter\n" +" --arm ARM filter\n" +" --armthumb ARM-Thumb filter\n" +" --sparc SPARC filter\n" +"\n" +" --copy No filtering (useful only when specified alone)\n" +" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n" +" size=NUM number of bytes of data per subblock\n" +" (1 - 256Mi; 4Ki)\n" +" rle=NUM run-length encoder chunk size (0-256; 0)\n" +msgstr "" + +#: src/lzma/help.c:119 +msgid "" +" Resource usage options:\n" +"\n" +" -M, --memory=NUM use roughly NUM bytes of memory at maximum\n" +" -T, --threads=NUM use at maximum of NUM (de)compression threads\n" +msgstr "" + +#: src/lzma/help.c:128 +msgid "" +" Other options:\n" +"\n" +" -q, --quiet suppress warnings; specify twice to suppress errors too\n" +" -v, --verbose be verbose; specify twice for even more verbose\n" +"\n" +" -h, --help display this help and exit\n" +" -V, --version display version and license information and exit\n" +msgstr "" + +#: src/lzma/help.c:136 +msgid "With no FILE, or when FILE is -, read standard input.\n" +msgstr "Jos TIEDOSTOa ei ole annettu, tai se on \"-\", luetaan vakiosyötettä.\n" + +#: src/lzma/help.c:142 +msgid "On this system and configuration, the tool will use" +msgstr "Tässä järjestelmässä näillä asetuksilla, tämä työkalu käyttää" + +#: src/lzma/help.c:143 +#, c-format +msgid " * roughly %zu MiB of memory at maximum; and\n" +msgstr " * korkeintaan %zu MiB keskusmuistia (suurpiirteinen rajoitus); ja\n" + +#: src/lzma/help.c:146 +msgid "" +" * at maximum of one thread for (de)compression.\n" +"\n" +msgstr "" +" * korkeintaan yhtä säiettä pakkaukseen tai purkuun.\n" +"\n" + +#: src/lzma/help.c:150 +#, c-format +msgid "Report bugs to <%s> (in English or Finnish).\n" +msgstr "" +"Lähetä raportit ohjelmistovioista (englanniksi tai suomeksi)\n" +"osoitteeseen <%s>.\n" + +#: src/lzma/args.c:126 +msgid "Maximum number of filters is seven" +msgstr "Suotimia voi olla korkeintaan seitsemän" + +#: src/lzma/args.c:189 +#, c-format +msgid "%s: Invalid filename suffix" +msgstr "%s: Virheellinen tiedostonimen pääte" + +#: src/lzma/args.c:323 +#, c-format +msgid "%s: Unknown file format type" +msgstr "%s: Tuntematon tiedostomuodon tyyppi" + +#: src/lzma/args.c:349 +#, c-format +msgid "%s: Unknown integrity check type" +msgstr "%s: Tuntematon eheystarkisteen tyyppi" + +#: src/lzma/args.c:367 +msgid "Only one file can be specified with `--files'or `--files0'." +msgstr "" + +#: src/lzma/args.c:417 +msgid "The environment variable LZMA_OPT contains too many arguments" +msgstr "Ympäristömuuttuja LZMA_OPT sisältää liian monta argumenttia" + +#: src/lzma/args.c:483 +msgid "Memory usage limit is too small for any internal filter preset" +msgstr "" + +#: src/lzma/args.c:498 +msgid "Memory usage limit is too small for the given filter setup" +msgstr "Muistinkäyttörajoitus on liian pieni annetuille suodinasetusille" + +#: src/lzma/error.c:35 +msgid "Operation successful" +msgstr "" + +#: src/lzma/error.c:38 +msgid "Operation finished successfully" +msgstr "" + +#: src/lzma/error.c:41 src/lzma/error.c:153 +msgid "Internal error (bug)" +msgstr "Sisäinen virhe (bugi)" + +#: src/lzma/error.c:44 +msgid "Compressed data is corrupt" +msgstr "Pakattu data on korruptoitunut" + +#: src/lzma/error.c:50 +msgid "Unexpected end of input" +msgstr "" + +#: src/lzma/error.c:53 +msgid "Unsupported options" +msgstr "" + +#: src/lzma/error.c:56 +msgid "Unsupported integrity check type" +msgstr "" + +#: src/lzma/error.c:87 src/lzma/error.c:93 +#, c-format +msgid "Writing to standard output failed: %s" +msgstr "" + +#: src/lzma/io.c:81 +#, c-format +msgid "Cannot get file descriptor of the current directory: %s" +msgstr "" + +#: src/lzma/io.c:123 src/lzma/util.c:127 +#, c-format +msgid "%s: Invalid filename" +msgstr "%s: Virheellinen tiedostonimi" + +#: src/lzma/io.c:130 src/lzma/io.c:233 +#, c-format +msgid "Cannot change directory: %s" +msgstr "Hakemiston vaihtaminen epäonnistui: %s" + +#: src/lzma/io.c:135 +#, c-format +msgid "%s: File seems to be moved, not removing" +msgstr "%s: Tiedosto näyttää olevan siirretty, ei poisteta" + +#: src/lzma/io.c:141 +#, c-format +msgid "%s: Cannot remove: %s" +msgstr "" + +#: src/lzma/io.c:164 +#, c-format +msgid "%s: Cannot set the file owner: %s" +msgstr "%s: Tiedoston omistajuuden asetus epäonnistui: %s" + +#: src/lzma/io.c:170 +#, c-format +msgid "%s: Cannot set the file group: %s" +msgstr "%s: Tiedoston ryhmän asetus epäonnistui: %s" + +#: src/lzma/io.c:189 +#, c-format +msgid "%s: Cannot set the file permissions: %s" +msgstr "%s: Tiedoston oikeuksien asetus epäonnistui: %s" + +#: src/lzma/io.c:252 +#, c-format +msgid "%s: Cannot open the directory containing the file: %s" +msgstr "" + +#: src/lzma/io.c:313 +#, c-format +msgid "%s: Is a symbolic link, skipping" +msgstr "%s: Symbolinen linkki, ohitetaan" + +#: src/lzma/io.c:330 +#, c-format +msgid "%s: Is a directory, skipping" +msgstr "%s: Hakemisto, ohitetaan" + +#: src/lzma/io.c:337 src/lzma/list.c:445 +#, c-format +msgid "%s: Not a regular file, skipping" +msgstr "%s: Ei ole tavallinen tiedosto, ohitetaan" + +#: src/lzma/io.c:348 +#, c-format +msgid "%s: File has setuid or setgid bit set, skipping" +msgstr "%s: Tiedostolla on setuid- tai setgid-bitti asetettuna, ohitetaan" + +#: src/lzma/io.c:355 +#, c-format +msgid "%s: File has sticky bit set, skipping" +msgstr "%s: Tiedostolla on sticky-bitti asetettuna, ohitetaan" + +#: src/lzma/io.c:362 +#, c-format +msgid "%s: Input file has more than one hard link, skipping" +msgstr "%s: Tiedostoon viittaa useampi kuin yksi linkki, ohitetaan" + +#: src/lzma/io.c:390 src/lzma/io.c:467 +#, c-format +msgid "%s: Closing the file failed: %s" +msgstr "%s: Tiedoston sulkeminen epäonnistui: %s" + +#: src/lzma/io.c:610 +#, c-format +msgid "%s: Read error: %s" +msgstr "%s: Lukeminen epäonnistui: %s" + +#: src/lzma/io.c:653 +#, c-format +msgid "%s: Write error: %s" +msgstr "%s: Kirjoitus epäonnistui: %s" + +#: src/lzma/list.c:451 +#, c-format +msgid "%s: File is empty" +msgstr "" + +#: src/lzma/main.c:57 +msgid "Cannot establish signal handlers" +msgstr "Signaalikäsittelijöiden asetus epäonnistui" + +#: src/lzma/main.c:75 +msgid "Compressed data not read from a terminal." +msgstr "Pakattua dataa ei lueta päätteeltä." + +#: src/lzma/main.c:77 src/lzma/main.c:92 +msgid "Use `--force' to force decompression." +msgstr "Käytä valitsinta \"--force\" pakottaaksesi purun." + +#: src/lzma/main.c:90 +msgid "Compressed data not written to a terminal." +msgstr "Pakattua dataa ei kirjoiteta päätteelle." + +#: src/lzma/main.c:117 +#, fuzzy, c-format +msgid "%s: Error reading filenames: %s" +msgstr "%s: Tiedoston sulkeminen epäonnistui: %s" + +#: src/lzma/main.c:122 +#, c-format +msgid "%s: Unexpected end of input when reading filenames" +msgstr "" + +#: src/lzma/main.c:212 +msgid "Cannot read data from standard input when reading filenames from standard input" +msgstr "Dataa ei voida lukea oletussyötteestä, kun tiedostonimiä luetaan oletussyötteestä" + +#: src/lzma/options.c:83 +#, c-format +msgid "%s: Options must be `name=value' pairs separated with commas" +msgstr "%s: Asetusten tulee olla \"nimi=arvo\"-pareja, jotka on eroteltu pilkuilla" + +#: src/lzma/options.c:111 +#, c-format +msgid "%s: Invalid option value" +msgstr "%s: Virheellinen asetuksen arvo" + +#: src/lzma/options.c:124 +#, c-format +msgid "%s: Invalid option name" +msgstr "%s: Virheellinen asetuksen nimi" + +#: src/lzma/process.c:142 +#, c-format +msgid "Cannot create a thread: %s" +msgstr "Säikeen luonti epäonnistui: %s" + +#: src/lzma/suffix.c:85 +#, c-format +msgid "%s: Filename has an unknown suffix, skipping" +msgstr "%s: Tiedostonimellä on tuntematon pääte, ohitetaan" + +#: src/lzma/suffix.c:114 +#, c-format +msgid "%s: File already has `%s' suffix, skipping" +msgstr "%s: Tiedostolla on jo \"%s\"-pääte, ohitetaan" + +#: src/lzma/util.c:44 +#, c-format +msgid "%s: Value is not a non-negative decimal integer" +msgstr "" + +#: src/lzma/util.c:84 +#, c-format +msgid "%s: Invalid multiplier suffix. Valid suffixes:" +msgstr "" + +#: src/lzma/util.c:105 +#, c-format +msgid "Value of the option `%s' must be in the range [%llu, %llu]" +msgstr "Asetuksen \"%s\" arvon tulee olla välillä [%llu, %llu]" + +#: src/lzma/util.c:175 +msgid "Empty filename, skipping" +msgstr "Tyhjä tiedostonimi, ohitetaan" + +#: lib/getopt.c:531 lib/getopt.c:547 +#, c-format +msgid "%s: option `%s' is ambiguous\n" +msgstr "" + +#: lib/getopt.c:580 lib/getopt.c:584 +#, c-format +msgid "%s: option `--%s' doesn't allow an argument\n" +msgstr "" + +#: lib/getopt.c:593 lib/getopt.c:598 +#, c-format +msgid "%s: option `%c%s' doesn't allow an argument\n" +msgstr "" + +#: lib/getopt.c:641 lib/getopt.c:660 lib/getopt.c:976 lib/getopt.c:995 +#, c-format +msgid "%s: option `%s' requires an argument\n" +msgstr "" + +#: lib/getopt.c:698 lib/getopt.c:701 +#, c-format +msgid "%s: unrecognized option `--%s'\n" +msgstr "" + +#: lib/getopt.c:709 lib/getopt.c:712 +#, c-format +msgid "%s: unrecognized option `%c%s'\n" +msgstr "" + +#: lib/getopt.c:764 lib/getopt.c:767 +#, c-format +msgid "%s: illegal option -- %c\n" +msgstr "" + +#: lib/getopt.c:773 lib/getopt.c:776 +#, c-format +msgid "%s: invalid option -- %c\n" +msgstr "%s: virheellinen valitsin -- %c\n" + +#: lib/getopt.c:828 lib/getopt.c:844 lib/getopt.c:1048 lib/getopt.c:1066 +#, c-format +msgid "%s: option requires an argument -- %c\n" +msgstr "" + +#: lib/getopt.c:897 lib/getopt.c:913 +#, c-format +msgid "%s: option `-W %s' is ambiguous\n" +msgstr "" + +#: lib/getopt.c:937 lib/getopt.c:955 +#, c-format +msgid "%s: option `-W %s' doesn't allow an argument\n" +msgstr "" diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 00000000..531496c2 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,16 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +SUBDIRS = liblzma lzma lzmadec scripts +EXTRA_DIST = common diff --git a/src/common/open_stdxxx.h b/src/common/open_stdxxx.h new file mode 100644 index 00000000..d0ac15af --- /dev/null +++ b/src/common/open_stdxxx.h @@ -0,0 +1,50 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file open_stdxxx.h +/// \brief Make sure that file descriptors 0, 1, and 2 are open +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef OPEN_STDXXX_H +#define OPEN_STDXXX_H + +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> + + +static void +open_stdxxx(int status) +{ + for (int i = 0; i <= 2; ++i) { + // We use fcntl() to check if the file descriptor is open. + if (fcntl(i, F_GETFD) == -1 && errno == EBADF) { + // With stdin, we could use /dev/full so that + // writing to stdin would fail. However, /dev/full + // is Linux specific, and if the program tries to + // write to stdin, there's already a problem anyway. + const int fd = open("/dev/null", O_NOCTTY + | (i == 0 ? O_WRONLY : O_RDONLY)); + + if (fd != i) { + // Something went wrong. Exit with the + // exit status we were given. Don't try + // to print an error message, since stderr + // may very well be non-existent. This + // error should be extremely rare. + (void)close(fd); + exit(status); + } + } + } + + return; +} + +#endif diff --git a/src/common/physmem.h b/src/common/physmem.h new file mode 100644 index 00000000..1d40b299 --- /dev/null +++ b/src/common/physmem.h @@ -0,0 +1,77 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file physmem.h +/// \brief Get the amount of physical memory +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef PHYSMEM_H +#define PHYSMEM_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <sys/types.h> +#include <inttypes.h> + +#ifdef HAVE_LIMITS_H +# include <limits.h> +#endif + +#if defined(HAVE_PHYSMEM_SYSCTL) || defined(HAVE_NCPU_SYSCTL) +# ifdef HAVE_SYS_PARAM_H +# include <sys/param.h> +# endif +# ifdef HAVE_SYS_SYSCTL_H +# include <sys/sysctl.h> +# endif +#endif + + +/// \brief Get the amount of physical memory in bytes +/// +/// \return Amount of physical memory in bytes. On error, zero is +/// returned. +static inline uint64_t +physmem(void) +{ + uint64_t ret = 0; + +#if defined(HAVE_PHYSMEM_SYSCONF) + const long pagesize = sysconf(_SC_PAGESIZE); + const long pages = sysconf(_SC_PHYS_PAGES); + if (pagesize != -1 || pages != -1) + // According to docs, pagesize * pages can overflow. + // Simple case is 32-bit box with 4 GiB or more RAM, + // which may report exactly 4 GiB of RAM, and "long" + // being 32-bit will overflow. Casting to uint64_t + // hopefully avoids overflows in the near future. + ret = (uint64_t)(pagesize) * (uint64_t)(pages); + +#elif defined(HAVE_PHYSMEM_SYSCTL) + int name[2] = { CTL_HW, HW_PHYSMEM }; + unsigned long mem; + size_t mem_ptr_size = sizeof(mem); + if (!sysctl(name, 2, &mem, &mem_ptr_size, NULL, NULL)) { + // Some systems use unsigned int as the "return value". + // This makes a difference on 64-bit boxes. + if (mem_ptr_size != sizeof(mem)) { + if (mem_ptr_size == sizeof(unsigned int)) + ret = *(unsigned int *)(&mem); + } else { + ret = mem; + } + } +#endif + + return ret; +} + +#endif diff --git a/src/common/sysdefs.h b/src/common/sysdefs.h new file mode 100644 index 00000000..b4ba8a56 --- /dev/null +++ b/src/common/sysdefs.h @@ -0,0 +1,100 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sysdefs.h +/// \brief Common includes, definitions, system-specific things etc. +/// +/// This file is used also by the lzma command line tool, that's why this +/// file is separate from common.h. +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SYSDEFS_H +#define LZMA_SYSDEFS_H + +////////////// +// Includes // +////////////// + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "lzma.h" + +#include <stdlib.h> + +#ifdef HAVE_STDBOOL_H +# include <stdbool.h> +#else +# if ! HAVE__BOOL +typedef unsigned char _Bool; +# endif +# define bool _Bool +# define false 0 +# define true 1 +# define __bool_true_false_are_defined 1 +#endif + +#ifdef HAVE_ASSERT_H +# include <assert.h> +#else +# ifdef NDEBUG +# define assert(x) +# else + // TODO: Pretty bad assert() macro. +# define assert(x) (!(x) && abort()) +# endif +#endif + +#ifdef HAVE_STRING_H +# include <string.h> +#endif + +#ifdef HAVE_STRINGS_H +# include <strings.h> +#endif + +#ifdef HAVE_MEMORY_H +# include <memory.h> +#endif + + +//////////// +// Macros // +//////////// + +#ifndef HAVE_MEMCPY +# define memcpy(dest, src, n) bcopy(src, dest, n) +#endif + +#ifndef HAVE_MEMMOVE +# define memmove(dest, src, n) bcopy(src, dest, n) +#endif + +#ifdef HAVE_MEMSET +# define memzero(s, n) memset(s, 0, n) +#else +# define memzero(s, n) bzero(s, n) +#endif + +#ifndef MIN +# define MIN(x, y) ((x) < (y) ? (x) : (y)) +#endif + +#ifndef MAX +# define MAX(x, y) ((x) > (y) ? (x) : (y)) +#endif + +#endif diff --git a/src/liblzma/Makefile.am b/src/liblzma/Makefile.am new file mode 100644 index 00000000..78a072f4 --- /dev/null +++ b/src/liblzma/Makefile.am @@ -0,0 +1,47 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +SUBDIRS = api common check + +lib_LTLIBRARIES = liblzma.la +liblzma_la_SOURCES = +liblzma_la_LDFLAGS = -version-info 0:0:0 + +liblzma_la_LIBADD = \ + common/libcommon.la \ + check/libcheck.la + +if COND_FILTER_LZMA +SUBDIRS += lz lzma rangecoder +liblzma_la_LIBADD += \ + lz/liblz.la \ + lzma/liblzma4.la \ + rangecoder/librangecoder.la +endif + +if COND_FILTER_SUBBLOCK +SUBDIRS += subblock +liblzma_la_LIBADD += subblock/libsubblock.la +endif + +if COND_MAIN_SIMPLE +SUBDIRS += simple +liblzma_la_LIBADD += simple/libsimple.la +endif + + +## pkg-config +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = lzma.pc +EXTRA_DIST = lzma.pc.in diff --git a/src/liblzma/api/Makefile.am b/src/liblzma/api/Makefile.am new file mode 100644 index 00000000..7f5e6de4 --- /dev/null +++ b/src/liblzma/api/Makefile.am @@ -0,0 +1,39 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +nobase_include_HEADERS = \ + lzma.h \ + lzma/alignment.h \ + lzma/alone.h \ + lzma/auto.h \ + lzma/base.h \ + lzma/block.h \ + lzma/check.h \ + lzma/copy.h \ + lzma/delta.h \ + lzma/extra.h \ + lzma/filter.h \ + lzma/index.h \ + lzma/info.h \ + lzma/init.h \ + lzma/lzma.h \ + lzma/memlimit.h \ + lzma/metadata.h \ + lzma/raw.h \ + lzma/simple.h \ + lzma/stream.h \ + lzma/stream_flags.h \ + lzma/subblock.h \ + lzma/version.h \ + lzma/vli.h diff --git a/src/liblzma/api/lzma.h b/src/liblzma/api/lzma.h new file mode 100644 index 00000000..186ae12c --- /dev/null +++ b/src/liblzma/api/lzma.h @@ -0,0 +1,122 @@ +/** + * \file lzma.h + * \brief The public API of liblzma + * + * liblzma is a LZMA compression library with a zlib-like API. + * liblzma is based on LZMA SDK found from http://7-zip.org/sdk.html. + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H +#define LZMA_H + +/******************** + * External headers * + ********************/ + +/* size_t */ +#include <sys/types.h> + +/* NULL */ +#include <stddef.h> + +/* uint8_t, uint32_t, uint64_t, UINT32_C, UINT64_C, UINT64_MAX. */ +#include <inttypes.h> + + +/****************** + * GCC extensions * + ******************/ + +/* + * GCC extensions are used conditionally in the public API. It doesn't + * break anything if these are sometimes enabled and sometimes not, only + * affects warnings and optimizations. + */ +#if defined(__GNUC__) && __GNUC__ >= 3 +# ifndef lzma_attribute +# define lzma_attribute(attr) __attribute__(attr) +# endif +# ifndef lzma_restrict +# define lzma_restrict __restrict__ +# endif +#else +# ifndef lzma_attribute +# define lzma_attribute(attr) +# endif +# ifndef lzma_restrict +# define lzma_restrict +# endif +#endif + + +/************** + * Subheaders * + **************/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Subheaders check that this is defined. It is to prevent including + * them directly from applications. + */ +#define LZMA_H_INTERNAL 1 + +/* Basic features */ +#include "lzma/init.h" +#include "lzma/base.h" +#include "lzma/vli.h" +#include "lzma/filter.h" +#include "lzma/check.h" + +/* Filters */ +#include "lzma/copy.h" +#include "lzma/subblock.h" +#include "lzma/simple.h" +#include "lzma/delta.h" +#include "lzma/lzma.h" + +/* Container formats and Metadata */ +#include "lzma/block.h" +#include "lzma/index.h" +#include "lzma/extra.h" +#include "lzma/metadata.h" +#include "lzma/stream.h" +#include "lzma/alone.h" +#include "lzma/raw.h" +#include "lzma/auto.h" + +/* Advanced features */ +#include "lzma/info.h" +#include "lzma/alignment.h" +#include "lzma/stream_flags.h" +#include "lzma/memlimit.h" + +/* Version number */ +#include "lzma/version.h" + +/* + * All subheaders included. Undefine LZMA_H_INTERNAL to prevent applications + * re-including the subheaders. + */ +#undef LZMA_H_INTERNAL + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef LZMA_H */ diff --git a/src/liblzma/api/lzma/alignment.h b/src/liblzma/api/lzma/alignment.h new file mode 100644 index 00000000..6672656c --- /dev/null +++ b/src/liblzma/api/lzma/alignment.h @@ -0,0 +1,60 @@ +/** + * \file lzma/alignment.h + * \brief Calculating input and output alignment of filter chains + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Calculates the preferred alignment of the input data + * + * FIXME desc + */ +extern uint32_t lzma_alignment_input( + const lzma_options_filter *filters, uint32_t guess); + + +/** + * \brief Calculates the alignment of the encoded output + * + * Knowing the alignment of the output data is useful e.g. in the Block + * encoder which tries to align the Compressed Data field optimally. + * + * \param filters Pointer to lzma_options_filter array, whose last + * member must have .id = LZMA_VLI_VALUE_UNKNOWN. + * \param guess The value to return if the alignment of the output + * is the same as the alignment of the input data. + * If you want to always detect this special case, + * this guess to zero; this function never returns + * zero unless guess is zero. + * + * \return In most cases, a small positive integer is returned; + * for optimal use, the encoded output of this filter + * chain should start at on offset that is a multiple of + * the returned integer value. + * + * If the alignment of the output is the same as the input + * data (which this function cannot know), \a guess is + * returned. + * + * If an error occurs (that is, unknown Filter IDs or filter + * options), UINT32_MAX is returned. + */ +extern uint32_t lzma_alignment_output( + const lzma_options_filter *filters, uint32_t guess); diff --git a/src/liblzma/api/lzma/alone.h b/src/liblzma/api/lzma/alone.h new file mode 100644 index 00000000..1a6b8e27 --- /dev/null +++ b/src/liblzma/api/lzma/alone.h @@ -0,0 +1,82 @@ +/** + * \file lzma/alone.h + * \brief Handling of the legacy LZMA_Alone format + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Options for files in the LZMA_Alone format + */ +typedef struct { + /** + * \brief Uncompressed Size and usage of End of Payload Marker + * + * In contrast to .lzma Blocks, LZMA_Alone format cannot have both + * uncompressed size field in the header and end of payload marker. + * If you don't know the uncompressed size beforehand, set it to + * LZMA_VLI_VALUE_UNKNOWN and liblzma will embed end of payload + * marker. + */ + lzma_vli uncompressed_size; + + /** + * \brief LZMA options + * + * The LZMA_Alone format supports only one filter: the LZMA filter. + * + * \note There exists also an undocumented variant of the + * LZMA_Alone format, which uses the x86 filter in + * addition to LZMA. This format was never supported + * by LZMA Utils and is not supported by liblzma either. + */ + lzma_options_lzma lzma; + +} lzma_options_alone; + + +/** + * \brief Initializes LZMA_Alone encoder + * + * LZMA_Alone files have the suffix .lzma like the .lzma Stream files. + * LZMA_Alone format supports only one filter, the LZMA filter. There is + * no support for integrity checks like CRC32. + * + * Use this format if and only if you need to create files readable by + * legacy LZMA tools. + * + * LZMA_Alone encoder doesn't support LZMA_SYNC_FLUSH or LZMA_FULL_FLUSH. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern lzma_ret lzma_alone_encoder( + lzma_stream *strm, const lzma_options_alone *options); + + +/** + * \brief Initializes decoder for LZMA_Alone file + * + * The LZMA_Alone decoder supports LZMA_SYNC_FLUSH. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + */ +extern lzma_ret lzma_alone_decoder(lzma_stream *strm); diff --git a/src/liblzma/api/lzma/auto.h b/src/liblzma/api/lzma/auto.h new file mode 100644 index 00000000..327e726f --- /dev/null +++ b/src/liblzma/api/lzma/auto.h @@ -0,0 +1,41 @@ +/** + * \file lzma/auto.h + * \brief Decoder with automatic file format detection + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Decode .lzma Streams and LZMA_Alone files with autodetection + * + * Autodetects between the .lzma Stream and LZMA_Alone formats, and + * calls lzma_stream_decoder_init() or lzma_alone_decoder_init() once + * the type of the file has been detected. + * + * \param strm Pointer to propertily prepared lzma_stream + * \param header Pointer to hold a pointer to Extra Records read + * from the Header Metadata Block. Use NULL if + * you don't care about Extra Records. + * \param footer Same as header, but for Footer Metadata Block. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + */ +extern lzma_ret lzma_auto_decoder(lzma_stream *strm, + lzma_extra **header, lzma_extra **footer); diff --git a/src/liblzma/api/lzma/base.h b/src/liblzma/api/lzma/base.h new file mode 100644 index 00000000..53cf89f5 --- /dev/null +++ b/src/liblzma/api/lzma/base.h @@ -0,0 +1,410 @@ +/** + * \file lzma/base.h + * \brief Data types and functions used in many places of the public API + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Boolean + * + * This is here because C89 doesn't have stdbool.h. To set a value for + * variables having type lzma_bool, you can use + * - C99's `true' and `false' from stdbool.h; + * - C++'s internal `true' and `false'; or + * - integers one (true) and zero (false). + */ +typedef unsigned char lzma_bool; + + +/** + * \brief Return values used by several functions in liblzma + * + * Check the descriptions of specific functions to find out which return + * values they can return and the exact meanings of the values in every + * situation. The descriptions given here are only suggestive. + */ +typedef enum { + LZMA_OK = 0, + /**< + * \brief Operation completed successfully + */ + + LZMA_STREAM_END = 1, + /**< + * \brief End of stream was reached + * + * The application should pick the last remaining output + * bytes from strm->next_out. + */ + + LZMA_PROG_ERROR = -2, + /**< + * \brief Programming error + * + * This indicates that the arguments given to the function are + * invalid or the internal state of the decoder is corrupt. + * - Function arguments are invalid or the structures + * pointed by the argument pointers are invalid + * e.g. if strm->next_out has been set to NULL and + * strm->avail_out > 0 when calling lzma_code(). + * - lzma_* functions have been called in wrong order + * e.g. lzma_code() was called right after lzma_end(). + * - If errors occur randomly, the reason might be flaky + * hardware. + * + * If you think that your code is correct, this error code + * can be a sign of a bug in liblzma. See the documentation + * how to report bugs. + */ + + LZMA_DATA_ERROR = -3, + /**< + * \brief Data is corrupt + * + * - Encoder: The input size doesn't match the uncompressed + * size given to lzma_*_encoder_init(). + * - Decoder: The input is corrupt. This includes corrupted + * header, corrupted compressed data, and unmatching + * integrity Check. + * + * \todo What can be done if encoder returns this? + * Probably can continue by fixing the input + * amount, but make sure. + */ + + LZMA_MEM_ERROR = -4, + /**< + * \brief Cannot allocate memory + * + * Memory allocation failed. + */ + + LZMA_BUF_ERROR = -5, + /**< + * \brief No progress is possible + * + * This may happen when avail_in or avail_out is zero. + * + * \note This error is not fatal. Coding can continue + * normally once the reason for this error has + * been fixed. + */ + + LZMA_HEADER_ERROR = -6, + /**< + * \brief Invalid or unsupported header + * + * Invalid or unsupported options, for example + * - unsupported filter(s) or filter options; or + * - reserved bits set in headers (decoder only). + * + * Rebuilding liblzma with more features enabled, or + * upgrading to a newer version of liblzma may help. + */ + + LZMA_UNSUPPORTED_CHECK = -7, + /**< + * \brief Check type is unknown + * + * The type of Check is not supported, and thus the Check + * cannot be calculated. In the encoder, this is an error. + * In the decoder, this is only a warning and decoding can + * still proceed normally (but the Check is ignored). + */ +} lzma_ret; + + +/** + * \brief The `action' argument for lzma_code() + */ +typedef enum { + LZMA_RUN = 0, + /**< + * Encoder: Encode as much input as possible. Some internal + * buffering will probably be done (depends on the filter + * chain in use), which causes latency: the input used won't + * usually be decodeable from the output of the same + * lzma_code() call. + * + * Decoder: Decode as much input as possible and produce as + * much output as possible. This action provides best + * throughput, but may introduce latency, because the + * decoder may decode more data into its internal buffers + * than that fits into next_out. + */ + + LZMA_SYNC_FLUSH = 1, + /**< + * Encoder: Makes all the data given to liblzma via next_in + * available in next_out without resetting the filters. Call + * lzma_code() with LZMA_SYNC_FLUSH until it returns + * LZMA_STREAM_END. Then continue encoding normally. + * + * \note Synchronous flushing is supported only by + * some filters. Some filters support it only + * partially. + * + * Decoder: Asks the decoder to decode only as much as is + * needed to fill next_out. This decreases latency with some + * filters, but is likely to decrease also throughput. It is + * a good idea to use this flag only when it is likely that + * you don't need more output soon. + * + * \note With decoder, this is not comparable to + * zlib's Z_SYNC_FLUSH. + */ + + LZMA_FULL_FLUSH = 2, + /**< + * Finishes encoding of the current Data Block. All the input + * data going to the current Data Block must have been given + * to the encoder (the last bytes can still be pending in + * next_in). Call lzma_code() with LZMA_FULL_FLUSH until + * it returns LZMA_STREAM_END. Then continue normally with + * LZMA_RUN or finish the Stream with LZMA_FINISH. + * + * This action is supported only by Multi-Block Stream + * encoder. If there is no unfinished Data Block, no empty + * Data Block is created. + */ + + LZMA_FINISH = 3 + /**< + * Finishes the encoding operation. All the input data must + * have been given to the encoder (the last bytes can still + * be pending in next_in). Call lzma_code() with LZMA_FINISH + * until it returns LZMA_STREAM_END. + * + * This action is not supported by decoders. + */ +} lzma_action; + + +/** + * \brief Custom functions for memory handling + * + * A pointer to lzma_allocator may be passed via lzma_stream structure + * to liblzma. The library will use these functions for memory handling + * instead of the default malloc() and free(). + * + * liblzma doesn't make an internal copy of lzma_allocator. Thus, it is + * OK to change these function pointers in the middle of the coding + * process, but obviously it must be done carefully to make sure that the + * replacement `free' can deallocate memory allocated by the earlier + * `alloc' function(s). + */ +typedef struct { + /** + * \brief Pointer to custom memory allocation function + * + * Set this to point to your custom memory allocation function. + * It can be useful for example if you want to limit how much + * memory liblzma is allowed to use: for this, you may use + * a pointer to lzma_memory_alloc(). + * + * If you don't want a custom allocator, but still want + * custom free(), set this to NULL and liblzma will use + * the standard malloc(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param nmemb Number of elements like in calloc(). + * liblzma will always set nmemb to 1. + * This argument exists only for + * compatibility with zlib and libbzip2. + * \param size Size of an element in bytes. + * liblzma never sets this to zero. + * + * \return Pointer to the beginning of a memory block of + * size nmemb * size, or NULL if allocation fails + * for some reason. When allocation fails, functions + * of liblzma return LZMA_MEM_ERROR. + */ + void *(*alloc)(void *opaque, size_t nmemb, size_t size); + + /** + * \brief Pointer to custom memory freeing function + * + * Set this to point to your custom memory freeing function. + * If lzma_memory_alloc() is used as allocator, this should + * be set to lzma_memory_free(). + * + * If you don't want a custom freeing function, but still + * want a custom allocator, set this to NULL and liblzma + * will use the standard free(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param ptr Pointer returned by + * lzma_allocator.alloc(), or when it + * is set to NULL, a pointer returned + * by the standard malloc(). + */ + void (*free)(void *opaque, void *ptr); + + /** + * \brief Pointer passed to .alloc() and .free() + * + * opaque is passed as the first argument to lzma_allocator.alloc() + * and lzma_allocator.free(). This intended to ease implementing + * custom memory allocation functions for use with liblzma. + * + * When using lzma_memory_alloc() and lzma_memory_free(), opaque + * must point to lzma_memory_limitter structure allocated and + * initialized with lzma_memory_limitter_create(). + * + * If you don't need this, you should set it to NULL. + */ + void *opaque; + +} lzma_allocator; + + +/** + * \brief Internal data structure + * + * The contents of this structure is not visible outside the library. + */ +typedef struct lzma_internal_s lzma_internal; + + +/** + * \brief Passing data to and from liblzma + * + * The lzma_stream structure is used for + * - passing pointers to input and output buffers to liblzma; + * - defining custom memory hander functions; and + * - holding a pointer to coder-specific internal data structures. + * + * Before calling any of the lzma_*_init() functions the first time, + * the application must reset lzma_stream to LZMA_STREAM_INIT. The + * lzma_*_init() function will verify the options, allocate internal + * data structures and store pointer to them into `internal'. Finally + * total_in and total_out are reset to zero. In contrast to zlib, + * next_in and avail_in are ignored by the initialization functions. + * + * The actual coding is done with the lzma_code() function. Application + * must update next_in, avail_in, next_out, and avail_out between + * calls to lzma_decode() just like with zlib. + * + * In contrast to zlib, even the decoder requires that there always + * is at least one byte space in next_out; if avail_out == 0, + * LZMA_BUF_ERROR is returned immediatelly. This shouldn't be a problem + * for most applications that already use zlib, but it's still worth + * checking your application. + * + * Application may modify values of total_in and total_out as it wants. + * They are updated by liblzma to match the amount of data read and + * written, but liblzma doesn't use the values internally. + * + * Application must not touch the `internal' pointer. + */ +typedef struct { + uint8_t *next_in; /**< Pointer to the next input byte. */ + size_t avail_in; /**< Number of available input bytes in next_in. */ + uint64_t total_in; /**< Total number of bytes read by liblzma. */ + + uint8_t *next_out; /**< Pointer to the next output position. */ + size_t avail_out; /**< Amount of free space in next_out. */ + uint64_t total_out; /**< Total number of bytes written by liblzma. */ + + /** + * Custom memory allocation functions. Set to NULL to use + * the standard malloc() and free(). + */ + lzma_allocator *allocator; + + /** Internal state is not visible to outsiders. */ + lzma_internal *internal; + +} lzma_stream; + + +/** + * \brief Initialization for lzma_stream + * + * When you declare an instance of lzma_stream, you can immediatelly + * initialize it so that initialization functions know that no memory + * has been allocated yet: + * + * lzma_stream strm = LZMA_STREAM_INIT; + */ +#define LZMA_STREAM_INIT { NULL, 0, 0, NULL, 0, 0, NULL, NULL } + + +/** + * \brief Initialization for lzma_stream + * + * This is like LZMA_STREAM_INIT, but this can be used when the lzma_stream + * has already been allocated: + * + * lzma_stream *strm = malloc(sizeof(lzma_stream)); + * if (strm == NULL) + * return LZMA_MEM_ERROR; + * *strm = LZMA_STREAM_INIT_VAR; + */ +extern const lzma_stream LZMA_STREAM_INIT_VAR; + + +/** + * \brief Encodes or decodes data + * + * Once the lzma_stream has been successfully initialized (e.g. with + * lzma_stream_encoder_single()), the actual encoding or decoding is + * done using this function. + * + * \return Some coders may have more exact meaning for different return + * values, which are mentioned separately in the description of + * the initialization functions. Here are the typical meanings: + * - LZMA_OK: So far all good. + * - LZMA_STREAM_END: + * - Encoder: LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or + * LZMA_FINISH completed. + * - Decoder: End of uncompressed data was reached. + * - LZMA_BUF_ERROR: Unable to progress. Provide more input or + * output space, and call this function again. This cannot + * occur if both avail_in and avail_out were non-zero (or + * there's a bug in liblzma). + * - LZMA_MEM_ERROR: Unable to allocate memory. Due to lazy + * programming, the coding cannot continue even if the + * application could free more memory. The next call must + * be lzma_end() or some initialization function. + * - LZMA_DATA_ERROR: + * - Encoder: Filter(s) cannot process the given data. + * - Decoder: Compressed data is corrupt. + * - LZMA_HEADER_ERROR: Unsupported options. Rebuilding liblzma + * with more features enabled or upgrading to a newer version + * may help, although usually this is a sign of invalid options + * (encoder) or corrupted input data (decoder). + * - LZMA_PROG_ERROR: Invalid arguments or the internal state + * of the coder is corrupt. + */ +extern lzma_ret lzma_code(lzma_stream *strm, lzma_action action); + + +/** + * \brief Frees memory allocated for the coder data structures + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * + * \note zlib indicates an error if application end()s unfinished + * stream. liblzma doesn't do this, and assumes that + * application knows what it is doing. + */ +extern void lzma_end(lzma_stream *strm); diff --git a/src/liblzma/api/lzma/block.h b/src/liblzma/api/lzma/block.h new file mode 100644 index 00000000..210c1d87 --- /dev/null +++ b/src/liblzma/api/lzma/block.h @@ -0,0 +1,409 @@ +/** + * \file lzma/block.h + * \brief .lzma Block handling + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Options for the Block Header encoder and decoder + * + * Different things use different parts of this structure. Some read + * some members, other functions write, and some do both. Only the + * members listed for reading need to be initialized when the specified + * functions are called. The members marked for writing will be assigned + * new values at some point either by calling the given function or by + * later calls to lzma_code(). + */ +typedef struct { + /** + * \brief Type of integrity Check + * + * The type of the integrity Check is not stored into the Block + * Header, thus its value must be provided also when decoding. + * + * Read by: + * - lzma_block_encoder() + * - lzma_block_decoder() + */ + lzma_check_type check; + + /** + * \brief Precense of CRC32 of the Block Header + * + * Set this to true if CRC32 of the Block Header should be + * calculated and stored in the Block Header. + * + * There is no way to autodetect if CRC32 is present in the Block + * Header, thus this information must be provided also when decoding. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encoder() + * - lzma_block_header_decoder() + */ + lzma_bool has_crc32; + + /** + * \brief Usage of End of Payload Marker + * + * If this is true, End of Payload Marker is used even if + * Uncompressed Size is known. + * + * Read by: + * - lzma_block_header_encoder() + * - lzma_block_encoder() + * - lzma_block_decoder() + * + * Written by: + * - lzma_block_header_decoder() + */ + lzma_bool has_eopm; + + /** + * \brief True if the Block is a Metadata Block + * + * If this is true, the Metadata bit will be set in the Block Header. + * It is up to the application to store correctly formatted data + * into Metadata Block. + * + * Read by: + * - lzma_block_header_encoder() + * + * Written by: + * - lzma_block_header_decoder() + */ + lzma_bool is_metadata; + + /** + * \brief True if Uncompressed Size is in Block Footer + * + * Read by: + * - lzma_block_encoder() + * - lzma_block_decoder() + */ + lzma_bool has_uncompressed_size_in_footer; + + /** + * \brief True if Backward Size is in Block Footer + * + * Read by: + * - lzma_block_encoder() + * - lzma_block_decoder() + */ + lzma_bool has_backward_size; + + /** + * \brief True if Block coder should take care of Padding + * + * In liblzma, Stream decoder sets this to true when decoding + * Header Metadata Block or Data Blocks from Multi-Block Stream, + * and to false when decoding Single-Block Stream or Footer + * Metadata Block from a Multi-Block Stream. + * + * Read by: + * - lzma_block_encoder() + * - lzma_block_decoder() + */ + lzma_bool handle_padding; + + /** + * \brief Size of the Compressed Data in bytes + * + * Usually you don't know this value when encoding in streamed mode. + * In non-streamed mode you can reserve space for this field when + * encoding the Block Header the first time, and then re-encode the + * Block Header and copy it over the original one after the encoding + * of the Block has been finished. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encoder() + * - lzma_block_encoder() + * - lzma_block_decoder() + * + * Written by: + * - lzma_block_header_decoder() + * - lzma_block_encoder() + * - lzma_block_decoder() + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed Size in bytes + * + * Encoder: If this value is not LZMA_VLI_VALUE_UNKNOWN, it is stored + * to the Uncompressed Size field in the Block Header. The real + * uncompressed size of the data being compressed must match + * the Uncompressed Size or LZMA_HEADER_ERROR is returned. + * + * If Uncompressed Size is unknown, End of Payload Marker must + * be used. If uncompressed_size == LZMA_VLI_VALUE_UNKNOWN and + * has_eopm == 0, LZMA_HEADER_ERROR will be returned. + * + * Decoder: If this value is not LZMA_VLI_VALUE_UNKNOWN, it is + * compared to the real Uncompressed Size. If they do not match, + * LZMA_HEADER_ERROR is returned. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encoder() + * - lzma_block_encoder() + * - lzma_block_decoder() + * + * Written by: + * - lzma_block_header_decoder() + * - lzma_block_encoder() + * - lzma_block_decoder() + */ + lzma_vli uncompressed_size; + + /** + * \brief Number of bytes to reserve for Compressed Size + * + * This is useful if you want to be able to store the Compressed Size + * to the Block Header, but you don't know it when starting to encode. + * Setting this to non-zero value at maximum of LZMA_VLI_BYTES_MAX, + * the Block Header encoder will force the Compressed Size field to + * occupy specified number of bytes. You can later rewrite the Block + * Header to contain correct information by using otherwise identical + * lzma_options_block structure except the correct compressed_size. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encoder() + * + * Written by: + * - lzma_block_header_decoder() + */ + uint32_t compressed_reserve; + + /** + * \brief Number of bytes to reserve for Uncompressed Size + * + * See the description of compressed_size above. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encoder() + * + * Written by: + * - lzma_block_header_decoder() + */ + uint32_t uncompressed_reserve; + + /** + * \brief Total Size of the Block in bytes + * + * This is useful in the decoder, which can verify the Total Size + * if it is known from Index. + * + * Read by: + * - lzma_block_encoder() + * - lzma_block_decoder() + * + * Written by: + * - lzma_block_encoder() + * - lzma_block_decoder() + */ + lzma_vli total_size; + + /** + * \brief Upper limit of Total Size + * + * Read by: + * - lzma_block_encoder() + * - lzma_block_decoder() + */ + lzma_vli total_limit; + + /** + * \brief Upper limit of Uncompressed Size + * + * Read by: + * - lzma_block_encoder() + * - lzma_block_decoder() + */ + lzma_vli uncompressed_limit; + + /** + * \brief Array of filters + * + * There can be at maximum of seven filters. The end of the array + * is marked with .id = LZMA_VLI_VALUE_UNKNOWN. Minimum number of + * filters is zero; in that case, an implicit Copy filter is used. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encoder() + * - lzma_block_encoder() + * - lzma_block_decoder() + * + * Written by: + * - lzma_block_header_decoder(): Note that this does NOT free() + * the old filter options structures. If decoding fails, the + * caller must take care of freeing the options structures + * that may have been allocated and decoded before the error + * occurred. + */ + lzma_options_filter filters[8]; + + /** + * \brief Size of the Padding field + * + * The Padding field exist to allow aligning the Compressed Data field + * optimally in the Block. See lzma_options_stream.alignment in + * stream.h for more information. + * + * If you want the Block Header encoder to automatically calculate + * optimal size for the Padding field by looking at the information + * in filters[], set this to LZMA_BLOCK_HEADER_PADDING_AUTO. In that + * case, you must also set the aligmnet variable to tell the the + * encoder the aligmnet of the beginning of the Block Header. + * + * The decoder never sets this to LZMA_BLOCK_HEADER_PADDING_AUTO. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encoder(): Note that this doesn't + * accept LZMA_BLOCK_HEADER_PADDING_AUTO. + * + * Written by (these never set padding to + * LZMA_BLOCK_HEADER_PADDING_AUTO): + * - lzma_block_header_size() + * - lzma_block_header_decoder() + */ + int32_t padding; +# define LZMA_BLOCK_HEADER_PADDING_AUTO (-1) +# define LZMA_BLOCK_HEADER_PADDING_MIN 0 +# define LZMA_BLOCK_HEADER_PADDING_MAX 31 + + /** + * \brief Alignment of the beginning of the Block Header + * + * This variable is read only if padding has been set to + * LZMA_BLOCK_HEADER_PADDING_AUTO. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encoder() + */ + uint32_t alignment; + + /** + * \brief Size of the Block Header + * + * Read by: + * - lzma_block_encoder() + * - lzma_block_decoder() + * + * Written by: + * - lzma_block_header_size() + * - lzma_block_header_decoder() + */ + uint32_t header_size; + +} lzma_options_block; + + +/** + * \brief Calculates the size of Header Padding and Block Header + * + * \return - LZMA_OK: Size calculated successfully and stored to + * options->header_size. + * - LZMA_HEADER_ERROR: Unsupported filters or filter options. + * - LZMA_PROG_ERROR: Invalid options + * + * \note This doesn't check that all the options are valid i.e. this + * may return LZMA_OK even if lzma_block_header_encode() or + * lzma_block_encoder() would fail. + */ +extern lzma_ret lzma_block_header_size(lzma_options_block *options); + + +/** + * \brief Encodes Block Header + * + * Encoding of the Block options is done with a single call instead of + * first initializing and then doing the actual work with lzma_code(). + * + * \param out Beginning of the output buffer. This must be + * at least options->header_size bytes. + * \param options Block options to be encoded. + * + * \return - LZMA_OK: Encoding was successful. options->header_size + * bytes were written to output buffer. + * - LZMA_HEADER_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR + */ +extern lzma_ret lzma_block_header_encode( + uint8_t *out, const lzma_options_block *options); + + +/** + * \brief Initializes Block Header decoder + * + * Because the results of this decoder are placed into *options, + * strm->next_in, strm->avail_in, and strm->total_in are not used. + * + * The only valid `action' with lzma_code() is LZMA_RUN. + * + * \return - LZMA_OK: Encoding was successful. options->header_size + * bytes were written to output buffer. + * - LZMA_HEADER_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR + */ +extern lzma_ret lzma_block_header_decoder( + lzma_stream *strm, lzma_options_block *options); + + +/** + * \brief Initializes .lzma Block encoder + * + * This function is required for multi-thread encoding. It may also be + * useful when implementing custom file formats. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_HEADER_ERROR + * - LZMA_DATA_ERROR: Limits (total_limit and uncompressed_limit) + * have been reached already. + * - LZMA_UNSUPPORTED_CHECK: options->check specfies a Check + * that is not supported by this buid of liblzma. Initializing + * the encoder failed. + * - LZMA_PROG_ERROR + * + * lzma_code() can return FIXME + */ +extern lzma_ret lzma_block_encoder( + lzma_stream *strm, lzma_options_block *options); + + +/** + * \brief Initializes decoder for .lzma Block + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_UNSUPPORTED_CHECK: Initialization was successful, but + * the given Check type is not supported, thus Check will be + * ignored. + * - LZMA_PROG_ERROR + * - LZMA_MEM_ERROR + */ +extern lzma_ret lzma_block_decoder( + lzma_stream *strm, lzma_options_block *options); diff --git a/src/liblzma/api/lzma/check.h b/src/liblzma/api/lzma/check.h new file mode 100644 index 00000000..4a2a453b --- /dev/null +++ b/src/liblzma/api/lzma/check.h @@ -0,0 +1,128 @@ +/** + * \file lzma/check.h + * \brief Integrity checks + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Type of the Check + * + * The .lzma format supports multiple types of Checks that are calculated + * from the uncompressed data (unless it is empty; then it's calculated + * from Block Header). + */ +typedef enum { + LZMA_CHECK_NONE = 0, + /**< + * No Check is calculated. + * + * Size of the Check field: 0 bytes + */ + + LZMA_CHECK_CRC32 = 1, + /**< + * CRC32 using the polynomial from the IEEE 802.3 standard + * + * Size of the Check field: 4 bytes + */ + + LZMA_CHECK_CRC64 = 3, + /**< + * CRC64 using the polynomial from the ECMA-182 standard + * + * Size of the Check field: 8 bytes + */ + + LZMA_CHECK_SHA256 = 5 + /**< + * SHA-256 + * + * Size of the Check field: 32 bytes + */ +} lzma_check_type; + + +/** + * \brief Maximum valid Check ID + * + * The .lzma file format specification specifies eight Check IDs (0-7). Some + * of them are only reserved i.e. no actual Check algorithm has been assigned. + * Still liblzma accepts any of these eight IDs for future compatibility + * when decoding files. If a valid but unsupported Check ID is detected, + * liblzma indicates a warning with LZMA_UNSUPPORTED_CHECK. + * + * FIXME bad desc + */ +#define LZMA_CHECK_ID_MAX 7 + + +/** + * \brief Check IDs supported by this liblzma build + * + * If lzma_available_checks[n] is true, the Check ID n is supported by this + * liblzma build. You can assume that LZMA_CHECK_NONE and LZMA_CHECK_CRC32 + * are always available. + */ +extern const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1]; + + +/** + * \brief Size of the Check field with different Check IDs + * + * Although not all Check IDs have a check algorithm associated, the size of + * every Check is already frozen. This array contains the size (in bytes) of + * the Check field with specified Check ID. The values are taken from the + * section 2.2.2 of the .lzma file format specification: + * { 0, 4, 4, 8, 16, 32, 32, 64 } + */ +extern const uint32_t lzma_check_sizes[LZMA_CHECK_ID_MAX + 1]; + + +/** + * \brief Calculate CRC32 + * + * Calculates CRC32 using the polynomial from the IEEE 802.3 standard. + * + * \param buf Pointer to the input buffer + * \param size Size of the input buffer + * \param crc Previously returned CRC value. This is used to + * calculate the CRC of a big buffer in smaller chunks. + * Set to zero when there is no previous value. + * + * \return Updated CRC value, which can be passed to this function + * again to continue CRC calculation. + */ +extern uint32_t lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc); + + +/** + * \brief Calculate CRC64 + * + * Calculates CRC64 using the polynomial from the ECMA-182 standard. + * + * This function is used similarly to lzma_crc32(). See its documentation. + */ +extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc); + + +/* + * SHA256 functions are currently not exported to public API. + * Contact the author if you think it should be. + */ diff --git a/src/liblzma/api/lzma/copy.h b/src/liblzma/api/lzma/copy.h new file mode 100644 index 00000000..f5617462 --- /dev/null +++ b/src/liblzma/api/lzma/copy.h @@ -0,0 +1,29 @@ +/** + * \file lzma/copy.h + * \brief Copy filter + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Copy filter. This is used as lzma_options_filter.id. + */ +#define LZMA_FILTER_COPY LZMA_VLI_C(0x00) diff --git a/src/liblzma/api/lzma/delta.h b/src/liblzma/api/lzma/delta.h new file mode 100644 index 00000000..58afec18 --- /dev/null +++ b/src/liblzma/api/lzma/delta.h @@ -0,0 +1,49 @@ +/** + * \file lzma/delta.h + * \brief Delta filter + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Delta filter. This is used as lzma_options_filter.id. + */ +#define LZMA_FILTER_DELTA LZMA_VLI_C(0x20) + + +/** + * \brief Options for the Delta filter + * + * These options are needed by both encoder and decoder. + */ +typedef struct { + /** + * \brief Delta distance as bytes + * + * Examples: + * - 16-bit stereo audio: distance = 4 bytes + * - 24-bit RGB image data: distance = 3 bytes + */ + uint32_t distance; +# define LZMA_DELTA_DISTANCE_MIN 1 +# define LZMA_DELTA_DISTANCE_MAX 256 + +} lzma_options_delta; diff --git a/src/liblzma/api/lzma/extra.h b/src/liblzma/api/lzma/extra.h new file mode 100644 index 00000000..29426a74 --- /dev/null +++ b/src/liblzma/api/lzma/extra.h @@ -0,0 +1,114 @@ +/** + * \file lzma/extra.h + * \brief Handling of Extra Records in Metadata + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/* + * Extra Record IDs + * + * See the .lzma file format specification for description what each + * Extra Record type exactly means. + * + * If you ever need to update .lzma files with Extra Records, note that + * the Record IDs are divided in two categories: + * - Safe-to-Copy Records may be preserved as is when the + * Stream is modified in ways that don't change the actual + * uncompressed data. Examples of such operatings include + * recompressing and adding, modifying, or deleting unrelated + * Extra Records. + * - Unsafe-to-Copy Records should be removed (and possibly + * recreated) when any kind of changes are made to the Stream. + */ + +#define LZMA_EXTRA_PADDING 0x00 +#define LZMA_EXTRA_OPENPGP 0x01 +#define LZMA_EXTRA_FILTERS 0x02 +#define LZMA_EXTRA_COMMENT 0x03 +#define LZMA_EXTRA_CHECKS 0x04 +#define LZMA_EXTRA_FILENAME 0x05 +#define LZMA_EXTRA_MTIME 0x07 +#define LZMA_EXTRA_MTIME_HR 0x09 +#define LZMA_EXTRA_MIME_TYPE 0x0B +#define LZMA_EXTRA_HOMEPAGE 0x0D + + +/** + * \brief Extra Records + * + * The .lzma format provides a way to store custom information along + * the actual compressed content. Information about these Records + * are passed to and from liblzma via this linked list. + */ +typedef struct lzma_extra_s lzma_extra; +struct lzma_extra_s { + /** + * \brief Pointer to the next Extra Record + * + * This is NULL on the last Extra Record. + */ + lzma_extra *next; + + /** + * \brief Record ID + * + * Extra Record IDs are divided in three categories: + * - Zero is a special case used for padding. It doesn't have + * Size of Data fields. + * - Odd IDs (1, 3, 5, ...) are Safe-to-Copy IDs. + * These can be preserved as is if the Stream is + * modified in a way that doesn't alter the actual + * uncompressed content. + * - Even IDs (2, 4, 6, ...) are Unsafe-to-Copy IDs. + * If the .lzma Stream is modified in any way, + * the Extra Records having a sensitive ID should + * be removed or updated accordingly. + * + * Refer to the .lzma file format specification for + * the up to date list of Extra Record IDs. + */ + lzma_vli id; + + /** + * \brief Size of the Record data + * + * In case of strings, this should not include the + * trailing '\0'. + */ + size_t size; + + /** + * \brief Record data + * + * Record data is often a string in UTF-8 encoding, + * but it can be arbitrary binary data. In case of + * strings, the trailing '\0' is usually not stored + * in the .lzma file. + * + * To ease working with Extra Records containing strings, + * liblzma always adds '\0' to the end of data even when + * it wasn't present in the .lzma file. This '\0' is not + * counted in the size of the data. + */ + uint8_t *data; +}; + + +extern void lzma_extra_free(lzma_extra *extra, lzma_allocator *allocator); diff --git a/src/liblzma/api/lzma/filter.h b/src/liblzma/api/lzma/filter.h new file mode 100644 index 00000000..a8bdd4bd --- /dev/null +++ b/src/liblzma/api/lzma/filter.h @@ -0,0 +1,166 @@ +/** + * \file lzma/filter.h + * \brief Common filter related types + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Filter options + * + * This structure is used to pass Filter ID and a pointer filter's options + * to liblzma. + */ +typedef struct { + /** + * \brief Filter ID + * + * Use constants whose name begin with `LZMA_FILTER_' to specify + * different filters. In an array of lzma_option_filter structures, + * use LZMA_VLI_VALUE_UNKNOWN to indicate end of filters. + */ + lzma_vli id; + + /** + * \brief Pointer to filter-specific options structure + * + * If the filter doesn't need options, set this to NULL. If id is + * set to LZMA_VLI_VALUE_UNKNOWN, options is ignored, and thus + * doesn't need be initialized. + * + * Some filters support changing the options in the middle of + * the encoding process. These filters store the pointer of the + * options structure and communicate with the application via + * modifications of the options structure. + */ + void *options; + +} lzma_options_filter; + + +/** + * \brief Filters available for encoding + * + * Pointer to an array containing the list of available Filter IDs that + * can be used for encoding. The last element is LZMA_VLI_VALUE_UNKNOWN. + * + * If lzma_available_filter_encoders[0] == LZMA_VLI_VALUE_UNKNOWN, the + * encoder components haven't been built at all. This means that the + * encoding-specific functions are probably missing from the library + * API/ABI completely. + */ +extern const lzma_vli *const lzma_available_filter_encoders; + + +/** + * \brief Filters available for decoding + * + * Pointer to an array containing the list of available Filter IDs that + * can be used for decoding. The last element is LZMA_VLI_VALUE_UNKNOWN. + * + * If lzma_available_filter_decoders[0] == LZMA_VLI_VALUE_UNKNOWN, the + * decoder components haven't been built at all. This means that the + * decoding-specific functions are probably missing from the library + * API/ABI completely. + */ +extern const lzma_vli *const lzma_available_filter_decoders; + + +/** + * \brief Calculate rough memory requirements for given filter chain + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_VALUE_UNKNOWN. + * \param is_encoder Set to true when calculating memory requirements + * of an encoder; false for decoder. + * + * \return Number of mebibytes (MiB i.e. 2^20) required for the given + * encoder or decoder filter chain. + * + * \note If calculating memory requirements of encoder, lzma_init() or + * lzma_init_encoder() must have been called earlier. Similarly, + * if calculating memory requirements of decoder, lzma_init() or + * lzma_init_decoder() must have been called earlier. + */ +extern uint32_t lzma_memory_usage( + const lzma_options_filter *filters, lzma_bool is_encoder); + + +/** + * \brief Calculates encoded size of a Filter Flags field + * + * Knowing the size of Filter Flags is useful to know when allocating + * memory to hold the encoded Filter Flags. + * + * \param size Pointer to integer to hold the calculated size + * \param options Filter ID and associated options whose encoded + * size is to be calculted + * + * \return - LZMA_OK: *size set successfully. Note that this doesn't + * guarantee that options->options is valid, thus + * lzma_filter_flags_encode() may still fail. + * - LZMA_HEADER_ERROR: Unknown Filter ID or unsupported options. + * - LZMA_PROG_ERROR: Invalid options + * + * \note If you need to calculate size of List of Filter Flags, + * you need to loop over every lzma_options_filter entry. + */ +extern lzma_ret lzma_filter_flags_size( + uint32_t *size, const lzma_options_filter *options); + + +/** + * \brief Encodes Filter Flags into given buffer + * + * In contrast to some functions, this doesn't allocate the needed buffer. + * This is due to how this function is used internally by liblzma. + * + * \param out Beginning of the output buffer + * \param out_pos out[*out_pos] is the next write position. This + * is updated by the encoder. + * \param out_size out[out_size] is the first byte to not write. + * \param options Filter options to be encoded + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_HEADER_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid options or not enough output + * buffer space (you should have checked it with + * lzma_filter_flags_size()). + */ +extern lzma_ret lzma_filter_flags_encode(uint8_t *out, size_t *out_pos, + size_t out_size, const lzma_options_filter *options); + + +/** + * \brief Initializes Filter Flags decoder + * + * The decoded result is stored into *options. options->options is + * initialized but the old value is NOT free()d. + * + * Because the results of this decoder are placed into *options, + * strm->next_in, strm->avail_in, and strm->total_in are not used + * when calling lzma_code(). The only valid action for lzma_code() + * is LZMA_RUN + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern lzma_ret lzma_filter_flags_decoder( + lzma_stream *strm, lzma_options_filter *options); diff --git a/src/liblzma/api/lzma/index.h b/src/liblzma/api/lzma/index.h new file mode 100644 index 00000000..7e59c4b3 --- /dev/null +++ b/src/liblzma/api/lzma/index.h @@ -0,0 +1,84 @@ +/** + * \file lzma/index.h + * \brief Handling of Index lists + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief + * + * FIXME desc + */ +typedef struct lzma_index_s lzma_index; +struct lzma_index_s { + /** + * \brief Total Size of the Block + * + * This includes Block Header, Compressed Data, and Block Footer. + */ + lzma_vli total_size; + + /** + * \brief Uncompressed Size of the Block + */ + lzma_vli uncompressed_size; + + /** + * \brief Pointer to the next Index Record + * + * This is NULL on the last Index Record. + */ + lzma_index *next; +}; + + +/** + * \brief Duplicates an Index list + * + * \return A copy of the Index list, or NULL if memory allocation + * failed or the original Index was empty. + */ +extern lzma_index *lzma_index_dup( + const lzma_index *index, lzma_allocator *allocator); + + +/** + * \brief Frees an Index list + * + * All Index Recors in the list are freed. This function is convenient when + * getting rid of lzma_metadata structures containing an Index. + */ +extern void lzma_index_free(lzma_index *index, lzma_allocator *allocator); + + +/** + * \brief Calculates information about the Index + * + * \return LZMA_OK on success, LZMA_PROG_ERROR on error. FIXME + */ +extern lzma_ret lzma_index_count(const lzma_index *index, size_t *count, + lzma_vli *lzma_restrict total_size, + lzma_vli *lzma_restrict uncompressed_size); + + +/** + * \brief Compares if two Index lists are identical + */ +extern lzma_bool lzma_index_is_equal(const lzma_index *a, const lzma_index *b); diff --git a/src/liblzma/api/lzma/info.h b/src/liblzma/api/lzma/info.h new file mode 100644 index 00000000..3a91850f --- /dev/null +++ b/src/liblzma/api/lzma/info.h @@ -0,0 +1,315 @@ +/** + * \file lzma/info.h + * \brief Handling of Stream size information + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/********** + * Basics * + **********/ + +/** + * \brief Opaque data type to hold the size information + */ +typedef struct lzma_info_s lzma_info; + + +typedef struct { + /** + * \brief Total Size of this Block + * + * This can be LZMA_VLI_VALUE_UNKNOWN. + */ + lzma_vli total_size; + + /** + * \brief Uncompressed Size of this Block + * + * This can be LZMA_VLI_VALUE_UNKNOWN. + */ + lzma_vli uncompressed_size; + + /** + * \brief Offset of the first byte of the Block + * + * In encoder, this is useful to find out the alignment of the Block. + * + * In decoder, this is useful when doing random-access reading + * with help from lzma_info_data_locate(). + */ + lzma_vli stream_offset; + + /** + * \brief Uncompressed offset of the Block + * + * Offset of the first uncompressed byte of the Block relative to + * all uncompressed data in the Block. + * FIXME desc + */ + lzma_vli uncompressed_offset; + + /** + * \brief Pointers to internal data structures + * + * Applications must not touch these. + */ + void *internal[4]; + +} lzma_info_iter; + + +typedef enum { + LZMA_INFO_STREAM_START, + LZMA_INFO_HEADER_METADATA, + LZMA_INFO_TOTAL, + LZMA_INFO_UNCOMPRESSED, + LZMA_INFO_FOOTER_METADATA +} lzma_info_size; + + +/** + * \brief Allocates and initializes a new lzma_info structure + * + * If info is NULL, a new lzma_info structure is allocated, initialized, and + * a pointer to it returned. If allocation fails, NULL is returned. + * + * If info is non-NULL, it is reinitialized and the same pointer returned. + * (In this case, return value cannot be NULL or a different pointer than + * the info given as argument.) + */ +extern lzma_info *lzma_info_init(lzma_info *info, lzma_allocator *allocator); + + +/** + * \brief Resets lzma_info + * + * This is like calling lzma_info_end() and lzma_info_create(), but + * re-uses the existing base structure. + */ +extern void lzma_info_reset( + lzma_info *info, lzma_allocator *allocator); + + +/** + * \brief Frees memory allocated for a lzma_info structure + */ +extern void lzma_info_free(lzma_info *info, lzma_allocator *allocator); + + +/************************ + * Setting known values * + ************************/ + +/** + * \brief Set a known size value + * + * \param info Pointer returned by lzma_info_create() + * \param type Any value from lzma_info_size + * \param size Value to set or verify + * + * \return LZMA_OK on success, LZMA_DATA_ERROR if the size doesn't + * match the existing information, or LZMA_PROG_ERROR + * if type is invalid or size is not a valid VLI. + */ +extern lzma_ret lzma_info_size_set( + lzma_info *info, lzma_info_size type, lzma_vli size); + + +/** + * \brief Sets the Index + * + * The given lzma_index list is "absorbed" by this function. The application + * must not access it after this function call, even if this function returns + * an error. + * + * \note The given lzma_index will at some point get freed by the + * lzma_info_* functions. If you use a custom lzma_allocator, + * make sure that it can free the lzma_index. + */ +extern lzma_ret lzma_info_index_set( + lzma_info *info, lzma_allocator *allocator, + lzma_index *index, lzma_bool eat_index); + + +/** + * \brief Sets information from a known Metadata Block + * + * This is a shortcut for calling lzma_info_size_set() with different type + * arguments, lzma_info_index_set() with metadata->index. + */ +extern lzma_ret lzma_info_metadata_set(lzma_info *info, + lzma_allocator *allocator, lzma_metadata *metadata, + lzma_bool is_header_metadata, lzma_bool eat_index); + + +/*************** + * Incremental * + ***************/ + +/** + * \brief Prepares an iterator to be used with given lzma_info structure + * + * + */ +extern void lzma_info_iter_begin(lzma_info *info, lzma_info_iter *iter); + + +/** + * \brief Moves to the next Index Record + * + * + */ +extern lzma_ret lzma_info_iter_next( + lzma_info_iter *iter, lzma_allocator *allocator); + + +/** + * \brief Sets or verifies the sizes in the Index Record + * + * \param iter Pointer to iterator to be set or verified + * \param total_size + * Total Size in bytes or LZMA_VLI_VALUE_UNKNOWN + * \param uncompressed_size + * Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN + * + * \return - LZMA_OK: All OK. + * - LZMA_DATA_ERROR: Given sizes don't match with the already + * known sizes. + * - LZMA_PROG_ERROR: Internal error, possibly integer + * overflow (e.g. the sum of all the known sizes is too big) + */ +extern lzma_ret lzma_info_iter_set(lzma_info_iter *iter, + lzma_vli total_size, lzma_vli uncompressed_size); + + +/** + * \brief Locates a Data Block + * + * \param iter Properly initialized iterator + * \param allocator Pointer to lzma_allocator or NULL + * \param uncompressed_offset + * Target offset to locate. The final offset + * will be equal or smaller than this. + * \param allow_alloc True if this function is allowed to call + * lzma_info_iter_next() to allocate a new Record + * if the requested offset reached end of Index + * Record list. Note that if Index has been marked + * final, lzma_info_iter_next() is never called. + * + * \return - LZMA_OK: All OK, *iter updated accordingly. + * - LZMA_DATA_ERROR: Trying to search past the end of the Index + * Record list, and allocating a new Record was not allowed + * either because allow_alloc was false or Index was final. + * - LZMA_PROG_ERROR: Internal error (probably integer + * overflow causing some lzma_vli getting too big). + */ +extern lzma_ret lzma_info_iter_locate(lzma_info_iter *iter, + lzma_allocator *allocator, lzma_vli uncompressed_offset, + lzma_bool allow_alloc); + + +/** + * \brief Finishes incrementally constructed Index + * + * This sets the known Total Size and Uncompressed of the Data Blocks + * based on the information collected from the Index Records, and marks + * the Index as final. + */ +extern lzma_ret lzma_info_index_finish(lzma_info *info); + + +/*************************** + * Reading the information * + ***************************/ + +/** + * \brief Gets a known size + * + * + */ +extern lzma_vli lzma_info_size_get( + const lzma_info *info, lzma_info_size type); + +extern lzma_vli lzma_info_metadata_locate( + const lzma_info *info, lzma_bool is_header_metadata); + +/** + * \brief Gets a pointer to the beginning of the Index list + * + * If detach is true, the Index will be detached from the lzma_info + * structure, and thus not be modified or freed by lzma_info_end(). + * + * If detach is false, the application must not modify the Index in any way. + * Also, the Index list is guaranteed to be valid only till the next call + * to any lzma_info_* function. + */ +extern lzma_index *lzma_info_index_get(lzma_info *info, lzma_bool detach); + + +extern size_t lzma_info_index_count_get(const lzma_info *info); + + +extern uint32_t lzma_info_metadata_alignment_get( + const lzma_info *info, lzma_bool is_header_metadata); + + + +/** + * \brief Locate a Block containing the given uncompressed offset + * + * This function is useful when you need to do random-access reading in + * a Multi-Block Stream. + * + * \param info Pointer to lzma_info that has at least one + * Index Record. The Index doesn't need to be finished. + * \param uncompressed_target + * Uncompressed target offset which the caller would + * like to locate from the Stream. + * \param stream_offset + * Starting offset (relative to the beginning the Stream) + * of the Block containing the requested location. + * \param uncompressed_offset + * The actual uncompressed offset of the beginning of + * the Block. uncompressed_offset <= uncompressed_target + * is always true; the application needs to uncompress + * uncompressed_target - uncompressed_offset bytes to + * reach the requested target offset. + * \param total_size + * Total Size of the Block. If the Index is incomplete, + * this may be LZMA_VLI_VALUE_UNKNOWN indicating unknown + * size. + * \param uncompressed_size + * Uncompressed Size of the Block. If the Index is + * incomplete, this may be LZMA_VLI_VALUE_UNKNOWN + * indicating unknown size. The application must pass + * this value to the Block decoder to verify FIXME + * + * \return + * + * \note This function is currently implemented as a linear search. + * If there are many Index Records, this can be really slow. + * This can be improved in newer liblzma versions if needed. + */ +extern lzma_bool lzma_info_data_locate(const lzma_info *info, + lzma_vli uncompressed_target, + lzma_vli *lzma_restrict stream_offset, + lzma_vli *lzma_restrict uncompressed_offset, + lzma_vli *lzma_restrict total_size, + lzma_vli *lzma_restrict uncompressed_size); diff --git a/src/liblzma/api/lzma/init.h b/src/liblzma/api/lzma/init.h new file mode 100644 index 00000000..f7b79246 --- /dev/null +++ b/src/liblzma/api/lzma/init.h @@ -0,0 +1,85 @@ +/** + * \file lzma/init.h + * \brief Initializations + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Initialize all internal static variables + * + * Depending on the build options, liblzma may have some internal static + * variables, that must be initialized before using any other part of + * the library (*). It is recommended to do these initializations in the very + * beginning of the application by calling appropriate initialization function. + * + * (*) There are some exceptions to this rule. FIXME + * + * The initialization functions are not necessarily thread-safe, thus the + * required initializations must be done before creating any threads. (The + * rest of the functions of liblzma are thread-safe.) Calling the + * initialization functions multiple times does no harm, although it + * still shouldn't be done when there are multiple threads running. + * + * lzma_init() initializes all internal static variables by calling + * lzma_lzma_init_encoder() and lzma_init_decoder(). + * + * If you need only encoder, decoder, or neither-encoder-nor-decoder + * functions, you may use other initialization functions, which initialize + * only a subset of liblzma's internal static variables. Using those + * functions have the following advantages: + * - When linking statically against liblzma, less useless functions will + * get linked into the binary. E.g. if you need only the decoder functions, + * using lzma_init_decoder() avoids linking bunch of encoder related code. + * - There is less things to initialize, making the initialization + * process slightly faster. + */ +extern void lzma_init(void); + + +/** + * \brief Initialize internal static variables needed by encoders + * + * If you need only the encoder functions, you may use this function to + * initialize only the things required by encoders. + * + * This function also calls lzma_init_check(). + */ +extern void lzma_init_encoder(void); + + +/** + * \brief Initialize internal static variables needed by decoders + * + * If you need only the decoder functions, you may use this function to + * initialize only the things required by decoders. + * + * This function also calls lzma_init_check(). + */ +extern void lzma_init_decoder(void); + + +/** + * \brief Initialize internal static variables needed by integrity checks + * + * Currently this initializes CRC32 and CRC64 lookup tables if precalculated + * tables haven't been built into the library. This function can be useful + * if the only thing you need from liblzma is the integrity check functions. + */ +extern void lzma_init_check(void); diff --git a/src/liblzma/api/lzma/lzma.h b/src/liblzma/api/lzma/lzma.h new file mode 100644 index 00000000..0fe74854 --- /dev/null +++ b/src/liblzma/api/lzma/lzma.h @@ -0,0 +1,312 @@ +/** + * \file lzma/lzma.h + * \brief LZMA filter + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the LZMA filter. This is used as lzma_options_filter.id. + */ +#define LZMA_FILTER_LZMA LZMA_VLI_C(0x40) + + +/** + * \brief LZMA compression modes + * + * Currently there are only two modes. Earlier LZMA SDKs had also third + * mode between fast and best. + */ +typedef enum { + LZMA_MODE_INVALID = -1, + /**< + * \brief Invalid mode + * + * Used as array terminator in lzma_available_modes. + */ + + + LZMA_MODE_FAST = 0, + /**< + * \brief Fast compression + * + * Fast mode is usually at its best when combined with + * a hash chain match finder. + */ + + LZMA_MODE_BEST = 2 + /**< + * \brief Best compression ratio + * + * This is usually notably slower than fast mode. Use this + * together with binary tree match finders to expose the + * full potential of the LZMA encoder. + */ +} lzma_mode; + + +/** + * \brief Match finders + * + * Match finder has major effect on both speed and compression ratio. + * Usually hash chains are faster than binary trees. + */ +typedef enum { + LZMA_MF_INVALID = -1, + /**< + * \brief Invalid match finder ID + * + * Used as array terminator in lzma_available_match_finders. + */ + + LZMA_MF_HC3 = 0x03, + /**< + * \brief Hash Chain with 3 bytes hashing + * + * \todo Memory requirements + * + * \note It's possible that this match finder gets + * removed in future. The definition will stay + * in this header, but liblzma may return + * LZMA_HEADER_ERROR if it is specified (just + * like it would if the match finder had been + * disabled at compile time). + */ + + LZMA_MF_HC4 = 0x04, + /**< + * \brief Hash Chain with 4 bytes hashing + * + * Memory requirements: 7.5 * dictionary_size + 4 MiB + * + * \note It's possible that this match finder gets + * removed in future. The definition will stay + * in this header, but liblzma may return + * LZMA_HEADER_ERROR if it is specified (just + * like it would if the match finder had been + * disabled at compile time). + */ + + LZMA_MF_BT2 = 0x12, + /**< + * \brief Binary Tree with 2 bytes hashing + * + * Memory requirements: 9.5 * dictionary_size + 4 MiB + */ + + LZMA_MF_BT3 = 0x13, + /**< + * \brief Binary Tree with 3 bytes hashing + * + * Memory requirements: 11.5 * dictionary_size + 4 MiB + */ + + LZMA_MF_BT4 = 0x14 + /**< + * \brief Binary Tree with 4 bytes hashing + * + * Memory requirements: 11.5 * dictionary_size + 4 MiB + */ +} lzma_match_finder; + + +/** + * \brief Options specific to the LZMA method handler + */ +typedef struct { + /********************************** + * LZMA encoding/decoding options * + **********************************/ + + /* These options are required in encoder and also with raw decoding. */ + + /** + * \brief Dictionary size in bytes + * + * Dictionary size indicates how many bytes of the recently processed + * uncompressed data is kept in memory. One method to reduce size of + * the uncompressed data is to store distance-length pairs, which + * indicate what data to repeat from the dictionary buffer. Thus, + * the bigger the dictionary, the better compression ratio usually is. + * + * Raw decoding: Too big dictionary does no other harm than + * wasting memory. This value is ignored by lzma_raw_decode_buffer(), + * because it uses the target buffer as the dictionary. + */ + uint32_t dictionary_size; +# define LZMA_DICTIONARY_SIZE_MIN 1 +# define LZMA_DICTIONARY_SIZE_MAX (UINT32_C(1) << 30) +# define LZMA_DICTIONARY_SIZE_DEFAULT (UINT32_C(1) << 23) + + /** + * \brief Number of literal context bits + * + * How many of the highest bits of the previous uncompressed + * eight-bit byte (also known as `literal') are taken into + * account when predicting the bits of the next literal. + * + * \todo Example + */ + uint32_t literal_context_bits; +# define LZMA_LITERAL_CONTEXT_BITS_MIN 0 +# define LZMA_LITERAL_CONTEXT_BITS_MAX 8 +# define LZMA_LITERAL_CONTEXT_BITS_DEFAULT 3 + + /** + * \brief Number of literal position bits + * + * How many of the lowest bits of the current position (number + * of bytes from the beginning of the uncompressed data) in the + * uncompressed data is taken into account when predicting the + * bits of the next literal (a single eight-bit byte). + * + * \todo Example + */ + uint32_t literal_pos_bits; +# define LZMA_LITERAL_POS_BITS_MIN 0 +# define LZMA_LITERAL_POS_BITS_MAX 4 +# define LZMA_LITERAL_POS_BITS_DEFAULT 0 + + /** + * \brief Number of position bits + * + * How many of the lowest bits of the current position in the + * uncompressed data is taken into account when estimating + * probabilities of matches. A match is a sequence of bytes for + * which a matching sequence is found from the dictionary and + * thus can be stored as distance-length pair. + * + * Example: If most of the matches occur at byte positions + * of 8 * n + 3, that is, 3, 11, 19, ... set pos_bits to 3, + * because 2**3 == 8. + */ + uint32_t pos_bits; +# define LZMA_POS_BITS_MIN 0 +# define LZMA_POS_BITS_MAX 4 +# define LZMA_POS_BITS_DEFAULT 2 + + /** + * \brief Pointer to an initial dictionary + * + * It is possible to initialize the LZ77 history window using + * a preset dictionary. Here is a good quote from zlib's + * documentation; this applies to LZMA as is: + * + * "The dictionary should consist of strings (byte sequences) that + * are likely to be encountered later in the data to be compressed, + * with the most commonly used strings preferably put towards the + * end of the dictionary. Using a dictionary is most useful when + * the data to be compressed is short and can be predicted with + * good accuracy; the data can then be compressed better than + * with the default empty dictionary." + * (From deflateSetDictionary() in zlib.h of zlib version 1.2.3) + * + * This feature should be used only in special situations. + * It works correctly only with raw encoding and decoding. + * Currently none of the container formats supported by + * liblzma allow preset dictionary when decoding, thus if + * you create a .lzma file with preset dictionary, it cannot + * be decoded with the regular .lzma decoder functions. + * + * \todo This feature is not implemented yet. + */ + const uint8_t *preset_dictionary; + + /** + * \brief Size of the preset dictionary + * + * Specifies the size of the preset dictionary. If the size is + * bigger than dictionary_size, only the last dictionary_size + * bytes are processed. + * + * This variable is read only when preset_dictionary is not NULL. + */ + uint32_t preset_dictionary_size; + + /****************************************** + * LZMA options needed only when encoding * + ******************************************/ + + /** LZMA compression mode */ + lzma_mode mode; + + /** + * \brief Number of fast bytes + * + * Number of fast bytes determines how many bytes the encoder + * compares from the match candidates when looking for the best + * match. Bigger fast bytes value usually increase both compression + * ratio and time. + */ + uint32_t fast_bytes; +# define LZMA_FAST_BYTES_MIN 5 +# define LZMA_FAST_BYTES_MAX 273 +# define LZMA_FAST_BYTES_DEFAULT 128 + + /** Match finder ID */ + lzma_match_finder match_finder; + + /** + * \brief Match finder cycles + * + * Higher values give slightly better compression ratio but + * decrease speed. Use special value 0 to let liblzma use + * match-finder-dependent default value. + * + * \todo Write much better description. + */ + uint32_t match_finder_cycles; + +} lzma_options_lzma; + + +/** + * \brief Available LZMA encoding modes + * + * Pointer to an array containing the list of available encoding modes. + * + * This variable is available only if LZMA encoder has been enabled. + */ +extern const lzma_mode *const lzma_available_modes; + + +/** + * \brief Available match finders + * + * Pointer to an array containing the list of available match finders. + * The last element is LZMA_MF_INVALID. + * + * This variable is available only if LZMA encoder has been enabled. + */ +extern const lzma_match_finder *const lzma_available_match_finders; + + +/** + * \brief Table of presets for the LZMA filter + * + * lzma_presets[0] is the fastest and lzma_preset_lzma[8] is the slowest. + * These presets match the switches -1 .. -9 of the lzma command line tool + * + * The preset values are subject to changes between liblzma versions. + * + * This variable is available only if LZMA encoder has been enabled. + */ +extern const lzma_options_lzma lzma_preset_lzma[9]; diff --git a/src/liblzma/api/lzma/memlimit.h b/src/liblzma/api/lzma/memlimit.h new file mode 100644 index 00000000..26ec50fe --- /dev/null +++ b/src/liblzma/api/lzma/memlimit.h @@ -0,0 +1,157 @@ +/** + * \file lzma/memlimit.h + * \brief Memory usage limitter + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Opaque data type used with the memory usage limitting functions + */ +typedef struct lzma_memlimit_s lzma_memlimit; + + +/** + * \brief Allocates and initializes a new lzma_memlimit structure + * + * It is easy to make liblzma to use huge amounts of memory. This can + * be a problem especially with the decoder, since it a file requiring + * huge amounts of memory to uncompress could allow even a denial of + * service attack if the memory usage wasn't limited. + * + * liblzma provides a set of functions to control memory usage. Pointers + * to these functions can be used in lzma_allocator structure, which makes + * it easy to limit memory usage with liblzma. + * + * The memory limitter functions are not tied to limitting memory usage + * with liblzma itself. You can use them with anything you like. + * + * In multi-threaded applications, only one thread at once may use the same + * lzma_memlimit structure. If there is a need, this limitation may + * be removed in future versions without breaking the libary API/ABI. + * + * \param limit Initial memory usage limit in bytes + * + * \return Pointer to allocated and initialized lzma_memlimit + * structure. On error, NULL is returned. The reason behind + * an error is either that malloc() failed or that the given + * limit was so small that it didn't allow allocating even + * the lzma_memlimit structure itself. + * + * \note Excluding lzma_memlimit_usage(), the functions whose name begin + * lzma_memlimit_ can be used even if lzma_init() hasn't been + * called. + */ +extern lzma_memlimit *lzma_memlimit_create(size_t limit); + + +/** + * \brief Sets a new memory usage limit + * + * \param mem Pointer to a lzma_memlimit structure returned + * earlier by lzma_memry_limit_create(). + * \param limit New memory usage limit + * + * The new usage limit may be smaller than the amount of memory currently + * allocated via *mem: New allocations will fail until enough memory has + * been freed or a new limit is set, but the existing allocatations will + * stay untouched. + */ +extern void lzma_memlimit_set(lzma_memlimit *mem, size_t limit); + + +/** + * \brief Gets the current memory usage limit + */ +extern size_t lzma_memlimit_get(const lzma_memlimit *mem); + + +/** + * \brief Gets the amount of currently allocated memory + * + * \note This value includes the sizes of some helper structures, + * thus it will always be larger than the total number of + * bytes allocated via lzma_memlimit_alloc(). + */ +extern size_t lzma_memlimit_used(const lzma_memlimit *mem); + + +/** + * \brief Allocates memory with malloc() if memory limit allows + * + * \param mem Pointer to a lzma_memlimit structure returned + * earlier by lzma_memry_limit_create(). + * \param nmemb Number of elements to allocate. While liblzma always + * sets this to one, this function still takes the + * value of nmemb into account to keep the function + * usable with zlib and libbzip2. + * \param size Size of an element. + * + * \return Pointer to memory allocated with malloc(nmemb * size), + * except if nmemb * size == 0 which returns malloc(1). + * On error, NULL is returned. + * + * \note This function assumes that nmemb * size is at maximum of + * SIZE_MAX. If it isn't, an overflow will occur resulting + * invalid amount of memory being allocated. + */ +extern void *lzma_memlimit_alloc( + lzma_memlimit *mem, size_t nmemb, size_t size); + + +/** + * \brief Removes the pointer from memory limitting list + * + * \param mem Pointer to a lzma_memlimit structure returned + * earlier by lzma_memry_limit_create(). + * \param ptr Pointer returned earlier by lzma_memlimit_alloc(). + * + * This function removes ptr from the internal list and decreases the + * counter of used memory accordingly. The ptr itself isn't freed. This is + * useful when Extra Records allocated by liblzma using lzma_memlimit + * are needed by the application and must not be freed when the + * lzma_memlimit structure is destroyed. + * + * It is OK to call this function with ptr that hasn't been allocated with + * lzma_memlimit_alloc(). In that case, this has no effect other than wasting + * a few CPU cycles. + */ +extern void lzma_memlimit_detach(lzma_memlimit *mem, void *ptr); + + +/** + * \brief Frees memory and updates the memory limit list + * + * This is like lzma_memlimit_detach() but also frees the given pointer. + */ +extern void lzma_memlimit_free(lzma_memlimit *mem, void *ptr); + + +/** + * \brief Frees the memory allocated for and by the memory usage limitter + * + * \param mem Pointer to memory limitter + * \param free_allocated If this is non-zero, all the memory allocated + * by lzma_memlimit_alloc() using *mem is also + * freed if it hasn't already been freed with + * lzma_memlimit_free(). Usually this should be + * set to true. + */ +extern void lzma_memlimit_end( + lzma_memlimit *mem, lzma_bool free_allocated); diff --git a/src/liblzma/api/lzma/metadata.h b/src/liblzma/api/lzma/metadata.h new file mode 100644 index 00000000..69592a3a --- /dev/null +++ b/src/liblzma/api/lzma/metadata.h @@ -0,0 +1,100 @@ +/** + * \file lzma/metadata.h + * \brief Metadata handling + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Information stored into a Metadata Block + * + * This structure holds all the information that can be stored to + * a Metadata Block. + */ +typedef struct { + /** + * \brief Size of Header Metadata Block + */ + lzma_vli header_metadata_size; + + /** + * \brief Total Size of the Stream + */ + lzma_vli total_size; + + /** + * \brief Uncompressed Size of the Stream + */ + lzma_vli uncompressed_size; + + /** + * \brief Index of the Blocks stored in the Stream + */ + lzma_index *index; + + /** + * \brief Extra information + */ + lzma_extra *extra; + +} lzma_metadata; + + +/** + * \brief Calculate the encoded size of Metadata + * + * \return Uncompressed size of the Metadata in encoded form. This value + * may be passed to Block encoder as Uncompressed Size when using + * Metadata filter. On error, zero is returned. + */ +extern lzma_vli lzma_metadata_size(const lzma_metadata *metadata); + + +/** + * \brief Initializes Metadata encoder + * + * \param coder Pointer to a pointer to hold Metadata encoder's + * internal state. Original value is ignored, thus + * you don't need to initialize the pointer. + * \param allocator Custom memory allocator; usually NULL. + * \param metadata Pointer to Metadata to encoded + * + * \return - LZMA_OK: Initialization succeeded. + * - LZMA_MEM_ERROR: Cannot allocate memory for *coder. + * + * The initialization function makes internal copy of the *metadata structure. + * However, the linked lists metadata->index and metadata->extra are NOT + * copied. Thus, the application may destroy *metadata after initialization + * if it likes, but not Index or Extra. + */ +extern lzma_ret lzma_metadata_encoder(lzma_stream *strm, + lzma_options_block *options, const lzma_metadata *metadata); + + +/** + * \brief Initializes Metadata decoder + * + * \param want_extra If this is true, Extra Records will be stored + * to metadata->extra. If this is false, Extra + * Records will be parsed but not stored anywhere, + * metadata->extra will be set to NULL. + */ +extern lzma_ret lzma_metadata_decoder( + lzma_stream *strm, lzma_options_block *options, + lzma_metadata *metadata, lzma_bool want_extra); diff --git a/src/liblzma/api/lzma/raw.h b/src/liblzma/api/lzma/raw.h new file mode 100644 index 00000000..c1ee41d8 --- /dev/null +++ b/src/liblzma/api/lzma/raw.h @@ -0,0 +1,72 @@ +/** + * \file lzma/raw.h + * \brief Raw encoder and decoder + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Initializes raw encoder + * + * This function may be useful when implementing custom file formats. + * + * \param strm Pointer to properly prepared lzma_stream + * \param options Array of lzma_options_filter structures. + * The end of the array must be marked with + * .id = LZMA_VLI_VALUE_UNKNOWN. The minimum + * number of filters is zero; the maximum is + * determined by available memory. + * \param uncompressed_size + * Size of the uncompressed data. If it is unknown, + * use LZMA_VLI_VALUE_UNKNOWN. You need to give the + * same value to the raw decoder to decode the data. + * \param allow_implicit + * If true, an implicit Copy or Subblock filter should be + * automatically added when needed. If this is false and + * an implicit filter would be needed, LZMA_PROG_ERROR is + * returned. + * + * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the + * filter chain support it), or LZMA_FINISH. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_HEADER_ERROR + * - LZMA_PROG_ERROR + */ +extern lzma_ret lzma_raw_encoder( + lzma_stream *strm, const lzma_options_filter *options, + lzma_vli uncompressed_size, lzma_bool allow_implicit); + + +/** + * \brief Initializes raw decoder + * + * The initialization of raw decoder goes similarly to raw encoder. + * + * The `action' with lzma_code() can be LZMA_RUN or LZMA_SYNC_FLUSH. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_HEADER_ERROR + * - LZMA_PROG_ERROR + */ +extern lzma_ret lzma_raw_decoder( + lzma_stream *strm, const lzma_options_filter *options, + lzma_vli uncompressed_size, lzma_bool allow_implicit); diff --git a/src/liblzma/api/lzma/simple.h b/src/liblzma/api/lzma/simple.h new file mode 100644 index 00000000..fb78d01f --- /dev/null +++ b/src/liblzma/api/lzma/simple.h @@ -0,0 +1,85 @@ +/** + * \file lzma/simple.h + * \brief So called "simple" filters + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/* Filter IDs for lzma_options_filter.id */ + +#define LZMA_FILTER_X86 LZMA_VLI_C(0x04) + /**< + * BCJ (Branch, Call, Jump) filter for x86 binaries + */ + +#define LZMA_FILTER_POWERPC LZMA_VLI_C(0x05) + /**< + * Filter for Big endian PowerPC binaries + */ + +#define LZMA_FILTER_IA64 LZMA_VLI_C(0x06) + /**< + * Filter for IA64 (Itanium) binaries. + */ + +#define LZMA_FILTER_ARM LZMA_VLI_C(0x07) + /**< + * Filter for ARM binaries. + */ + +#define LZMA_FILTER_ARMTHUMB LZMA_VLI_C(0x08) + /**< + * Filter for ARMThumb binaries. + */ + +#define LZMA_FILTER_SPARC LZMA_VLI_C(0x09) + /**< + * Filter for SPARC binaries. + */ + + +/** + * \brief Options for so called "simple" filters + * + * The simple filters never change the size of the data. Specifying options + * for them is optional: if pointer to options is NULL, default values are + * used. You probably never need to specify these options, so just set the + * options pointer to NULL and be happy. + * + * If options with non-default values have been specified when encoding, + * the same options must also be specified when decoding. + */ +typedef struct { + /** + * \brief Start offset for branch conversions + * + * This setting is useful only when the same filter is used + * _separately_ for multiple sections of the same executable file, + * and the sections contain cross-section branch/call/jump + * instructions. In that case it is benefical to set the start + * offset of the non-first sections so that the relative addresses + * of the cross-section branch/call/jump instructions will use the + * same absolute addresses as in the first section. + * + * When the pointer to options is NULL, the default value is used. + * The default value is zero. + */ + uint32_t start_offset; + +} lzma_options_simple; diff --git a/src/liblzma/api/lzma/stream.h b/src/liblzma/api/lzma/stream.h new file mode 100644 index 00000000..be86075f --- /dev/null +++ b/src/liblzma/api/lzma/stream.h @@ -0,0 +1,178 @@ +/** + * \file lzma/stream.h + * \brief .lzma Stream handling + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Options for .lzma Stream encoder + */ +typedef struct { + /** + * \brief Type of integrity Check + * + * The type of the integrity Check is stored into Stream Header + * and Stream Footer. The same Check is used for all Blocks in + * the Stream. + */ + lzma_check_type check; + + /** + * \brief Precense of CRC32 of the Block Header + * + * Set this to true if CRC32 of every Block Header should be + * calculated and stored in the Block Header. This is recommended. + * + * This setting is stored into Stream Header and Stream Footer. + */ + lzma_bool has_crc32; + + /** + * \brief Uncompressed Size in bytes + * + * This is somewhat advanced feature. Most users want to set this to + * LZMA_VLI_VALUE_UNKNOWN to indicate unknown Uncompressed Size. + * + * If the Uncompressed Size of the Stream being encoded is known, + * it can be stored to the beginning of the Stream. The details + * differ for Single-Block and Multi-Block Streams: + * - With Single-Block Streams, the Uncompressed Size is stored to + * the Block Header and End of Payload Marker is omitted. + * - With Multi-Block Streams, the Uncompressed Size is stored to + * the Header Metadata Block. The Uncompressed Size of the Blocks + * will be unknown, because liblzma cannot predict how the + * application is going to split the data in Blocks. + */ + lzma_vli uncompressed_size; + + /** + * \brief Alignment of the beginning of the Stream + * + * Certain filters handle data in bigger chunks than single bytes. + * This affects two things: + * - Performance: aligned memory access is usually faster. + * - Further compression ratio in custom file formats: if you + * encode multiple Blocks with some non-compression filter + * such as LZMA_FILTER_POWERPC, it is a good idea to keep + * the inter-Block alignment correct to maximize compression + * ratio when all these Blocks are finally compressed as a + * single step. + * + * Usually the Stream is stored into its own file, thus + * the alignment is usually zero. + */ + uint32_t alignment; + + /** + * \brief Array of filters used to encode Data Blocks + * + * There can be at maximum of seven filters. The end of the array is + * marked with .id = LZMA_VLI_VALUE_UNKNOWN. (That's why the array + * has eight members.) Minimum number of filters is zero; in that + * case, an implicit Copy filter is used. + */ + lzma_options_filter filters[8]; + + /** + * \brief Array of filters used to encode Metadata Blocks + * + * This is like filters[] but for Metadata Blocks. If Metadata + * Blocks are compressed, they usually are compressed with + * settings that require only little memory to uncompress e.g. + * LZMA with 64 KiB dictionary. + * + * \todo Recommend a preset. + * + * When liblzma sees that the Metadata Block would be very small + * even in uncompressed form, it is not compressed no matter + * what filter have been set here. This is to avoid possibly + * increasing the size of the Metadata Block with bad compression, + * and to avoid useless overhead of filters in uncompression phase. + */ + lzma_options_filter metadata_filters[8]; + + /** + * \brief Extra information in the Header Metadata Block + */ + lzma_extra *header; + + /** + * \brief Extra information in the Footer Metadata Block + * + * It is enough to set this pointer any time before calling + * lzma_code() with LZMA_FINISH as the second argument. + */ + lzma_extra *footer; + +} lzma_options_stream; + + +/** + * \brief Initializes Single-Block .lzma Stream encoder + * + * This is the function that most developers are looking for. :-) It + * compresses using the specified options without storing any extra + * information. + * + * \todo Describe that is_metadata is ignored, maybe something else. + */ +extern lzma_ret lzma_stream_encoder_single( + lzma_stream *strm, const lzma_options_stream *options); + + +/** + * \brief Initializes Multi-Block .lzma Stream encoder + * + */ +extern lzma_ret lzma_stream_encoder_multi( + lzma_stream *strm, const lzma_options_stream *options); + + +/** + * \brief Initializes decoder for .lzma Stream + * + * \param strm Pointer to propertily prepared lzma_stream + * \param header Pointer to hold a pointer to Extra Records read + * from the Header Metadata Block. Use NULL if + * you don't care about Extra Records. + * \param footer Same as header, but for Footer Metadata Block. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * + * If header and/or footer are not NULL, *header and/or *footer will be + * initially set to NULL. + * + * The application can detect that Header Metadata Block has been completely + * parsed when the decoder procudes some output the first time. If *header + * is still NULL, there was no Extra field in the Header Metadata Block (or + * the whole Header Metadata Block wasn't present at all). + * + * The application can detect that Footer Metadata Block has been parsed + * completely when lzma_code() returns LZMA_STREAM_END. If *footer is still + * NULL, there was no Extra field in the Footer Metadata Block. + * + * \note If you use lzma_memory_limitter, the Extra Records will be + * allocated with it, and thus remain in the lzma_memory_limitter + * even after they get exported to the application via *header + * and *footer pointers. + */ +extern lzma_ret lzma_stream_decoder(lzma_stream *strm, + lzma_extra **header, lzma_extra **footer); diff --git a/src/liblzma/api/lzma/stream_flags.h b/src/liblzma/api/lzma/stream_flags.h new file mode 100644 index 00000000..070c91c9 --- /dev/null +++ b/src/liblzma/api/lzma/stream_flags.h @@ -0,0 +1,142 @@ +/** + * \file lzma/stream_flags.h + * \brief .lzma Stream Header and Stream tail encoder and decoder + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Size of Stream Header + * + * Magic Bytes (6) + Stream Flags (1) + CRC32 (4) + */ +#define LZMA_STREAM_HEADER_SIZE (6 + 1 + 4) + + +/** + * \brief Size of Stream tail + * + * Because Stream Footer already has a defined meaning in the file format + * specification, we use Stream tail to denote these two fields: + * Stream Flags (1) + Magic Bytes (2) + */ +#define LZMA_STREAM_TAIL_SIZE (1 + 2) + + +/** + * Options for encoding and decoding Stream Header and Stream tail + */ +typedef struct { + /** + * Type of the Check calculated from uncompressed data + */ + lzma_check_type check; + + /** + * True if Block Headers have the CRC32 field. Note that the CRC32 + * field is always present in the Stream Header. + */ + lzma_bool has_crc32; + + /** + * True if the Stream is a Multi-Block Stream. + */ + lzma_bool is_multi; + +} lzma_stream_flags; + + +#define lzma_stream_flags_is_equal(a, b) \ + ((a).check == (b).check \ + && (a).has_crc32 == (b).has_crc32 \ + && (a).is_multi == (b).is_multi) + + +/** + * \brief Encodes Stream Header + * + * Encoding of the Stream Header is done with a single call instead of + * first initializing and then doing the actual work with lzma_code(). + * + * \param out Beginning of the output buffer + * \param out_pos out[*out_pos] is the next write position. This + * is updated by the encoder. + * \param out_size out[out_size] is the first byte to not write. + * \param options Stream Header options to be encoded. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_PROG_ERROR: Invalid options. + * - LZMA_BUF_ERROR: Not enough output buffer space. + */ +extern lzma_ret lzma_stream_header_encode( + uint8_t *out, const lzma_stream_flags *options); + + +/** + * \brief Encodes Stream tail + * + * \param footer Pointer to a pointer that will hold the + * allocated buffer. Caller must free it once + * it isn't needed anymore. + * \param footer_size Pointer to a variable that will the final size + * of the footer buffer. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc(). + * \param options Stream Header options to be encoded. + * + * \return - LZMA_OK: Success; *header and *header_size set. + * - LZMA_PROG_ERROR: *options is invalid. + * - LZMA_MEM_ERROR: Cannot allocate memory. + */ +extern lzma_ret lzma_stream_tail_encode( + uint8_t *out, const lzma_stream_flags *options); + + +/** + * \brief Initializes Stream Header decoder + * + * \param strm Pointer to lzma_stream used to pass input data + * \param options Target structure for parsed results + * + * \return - LZMA_OK: Successfully initialized + * - LZMA_MEM_ERROR: Cannot allocate memory + * + * The actual decoding is done with lzma_code() and freed with lzma_end(). + */ +extern lzma_ret lzma_stream_header_decoder( + lzma_stream *strm, lzma_stream_flags *options); + + +/** + * \brief Initializes Stream tail decoder + * + * \param strm Pointer to lzma_stream used to pass input data + * \param options Target structure for parsed results. + * \param decode_uncompressed_size + * Set to true if the first field to decode is + * Uncompressed Size. Set to false if the first + * field to decode is Backward Size. + * + * \return - LZMA_OK: Successfully initialized + * - LZMA_MEM_ERROR: Cannot allocate memory + * + * The actual decoding is done with lzma_code() and freed with lzma_end(). + */ +extern lzma_ret lzma_stream_tail_decoder( + lzma_stream *strm, lzma_stream_flags *options); diff --git a/src/liblzma/api/lzma/subblock.h b/src/liblzma/api/lzma/subblock.h new file mode 100644 index 00000000..0474b6af --- /dev/null +++ b/src/liblzma/api/lzma/subblock.h @@ -0,0 +1,197 @@ +/** + * \file lzma/subblock.h + * \brief Subblock filter + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Subblock filter. This is used as lzma_options_filter.id. + */ +#define LZMA_FILTER_SUBBLOCK LZMA_VLI_C(0x01) + + +/** + * \brief Subfilter mode + * + * See lzma_options_subblock.subfilter_mode for details. + */ +typedef enum { + LZMA_SUBFILTER_NONE, + /**< + * No Subfilter is in use. + */ + + LZMA_SUBFILTER_SET, + /**< + * New Subfilter has been requested to be initialized. + */ + + LZMA_SUBFILTER_RUN, + /**< + * Subfilter is active. + */ + + LZMA_SUBFILTER_FINISH + /**< + * Subfilter has been requested to be finished. + */ +} lzma_subfilter_mode; + + +/** + * \brief Options for the Subblock filter + * + * Specifying options for the Subblock filter is optional: if the pointer + * options is NULL, no subfilters are allowed and the default value is used + * for subblock_data_size. + */ +typedef struct { + /* Options for encoder and decoder */ + + /** + * \brief Allowing subfilters + * + * If this true, subfilters are allowed. + * + * In the encoder, if this is set to false, subfilter_mode and + * subfilter_options are completely ignored. + */ + lzma_bool allow_subfilters; + + /* Options for encoder only */ + + /** + * \brief Alignment + * + * The Subblock filter encapsulates the input data into Subblocks. + * Each Subblock has a header which takes a few bytes of space. + * When the output of the Subblock encoder is fed to another filter + * that takes advantage of the alignment of the input data (e.g. LZMA), + * the Subblock filter can add padding to keep the actual data parts + * in the Subblocks aligned correctly. + * + * The alignment should be a positive integer. Subblock filter will + * add enough padding between Subblocks so that this is true for + * every payload byte: + * input_offset % alignment == output_offset % alignment + * + * The Subblock filter assumes that the first output byte will be + * written to a position in the output stream that is properly aligned. + * + * FIXME desc + */ + uint32_t alignment; +# define LZMA_SUBBLOCK_ALIGNMENT_MIN 1 +# define LZMA_SUBBLOCK_ALIGNMENT_MAX 32 +# define LZMA_SUBBLOCK_ALIGNMENT_DEFAULT 4 + + /** + * \brief Size of the Subblock Data part of each Subblock + * + * This value is re-read every time a new Subblock is started. + * + * Bigger values + * - save a few bytes of space; + * - increase latency in the encoder (but no effect for decoding); + * - decrease memory locality (increased cache pollution) in the + * encoder (no effect in decoding). + */ + uint32_t subblock_data_size; +# define LZMA_SUBBLOCK_DATA_SIZE_MIN 1 +# define LZMA_SUBBLOCK_DATA_SIZE_MAX (UINT32_C(1) << 28) +# define LZMA_SUBBLOCK_DATA_SIZE_DEFAULT 4096 + + /** + * \brief Run-length encoder remote control + * + * The Subblock filter has an internal run-length encoder (RLE). It + * can be useful when the data includes byte sequences that repeat + * very many times. The RLE can be used also when a Subfilter is + * in use; the RLE will be applied to the output of the Subfilter. + * + * Note that in contrast to traditional RLE, this RLE is intended to + * be used only when there's a lot of data to be repeated. If the + * input data has e.g. 500 bytes of NULs now and then, this RLE + * is probably useless, because plain LZMA should provide better + * results. + * + * Due to above reasons, it was decided to keep the implementation + * of the RLE very simple. When the rle variable is non-zero, it + * subblock_data_size must be a multiple of rle. Once the Subblock + * encoder has got subblock_data_size bytes of input, it will check + * if the whole buffer of the last subblock_data_size can be + * represented with repeats of chunks having size of rle bytes. + * + * If there are consecutive identical buffers of subblock_data_size + * bytes, they will be encoded using a single repeat entry if + * possible. + * + * If need arises, more advanced RLE can be implemented later + * without breaking API or ABI. + */ + uint32_t rle; +# define LZMA_SUBBLOCK_RLE_OFF 0 +# define LZMA_SUBBLOCK_RLE_MIN 1 +# define LZMA_SUBBLOCK_RLE_MAX 256 + + /** + * \brief Subfilter remote control + * + * When the Subblock filter is initialized, this variable must be + * LZMA_SUBFILTER_NONE or LZMA_SUBFILTER_SET. + * + * When subfilter_mode is LZMA_SUBFILTER_NONE, the application may + * put Subfilter options to subfilter_options structure, and then + * set subfilter_mode to LZMA_SUBFILTER_SET. This implies setting + * flush to true. No new input data will be read until the Subfilter + * has been enabled. Once the Subfilter has been enabled, liblzma + * will set subfilter_mode to LZMA_SUBFILTER_RUN. + * + * When subfilter_mode is LZMA_SUBFILTER_RUN, the application may + * set subfilter_mode to LZMA_SUBFILTER_FINISH. No new input data + * will be read until the Subfilter has been finished. Once the + * Subfilter has been finished, liblzma will set subfilter_mode + * to LZMA_SUBFILTER_NONE. + * + * If the intent is to have Subfilter enabled to the very end of + * the data, it is not needed to separately disable Subfilter with + * LZMA_SUBFILTER_FINISH. Using LZMA_FINISH as the second argument + * of lzma_code() will make the Subblock encoder to disable the + * Subfilter once all the data has been ran through the Subfilter. + * + * \note This variable is ignored if allow_subfilters is false. + */ + lzma_subfilter_mode subfilter_mode; + + /** + * \brief Subfilter and its options + * + * When no Subfilter is used, the data is copied as is into Subblocks. + * Setting a Subfilter allows encoding some parts of the data with + * an additional filter. It is possible to many different Subfilters + * in the same Block, although only one can be used at once. + * + * \note This variable is ignored if allow_subfilters is false. + */ + lzma_options_filter subfilter_options; + +} lzma_options_subblock; diff --git a/src/liblzma/api/lzma/version.h b/src/liblzma/api/lzma/version.h new file mode 100644 index 00000000..ffbf8a81 --- /dev/null +++ b/src/liblzma/api/lzma/version.h @@ -0,0 +1,59 @@ +/** + * \file lzma/version.h + * \brief Version number + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Compile-time version number + * + * The version number is of format xyyyuuus where + * - x is the major LZMA SDK version + * - yyy is the minor LZMA SDK version + * - uuu is LZMA Utils version (reset to zero every time SDK version + * is incremented) + * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable + */ +#define LZMA_VERSION UINT32_C(40420010) + + +/** + * \brief liblzma version number as an integer + * + * This is the value of LZMA_VERSION macro at the compile time of liblzma. + * This allows the application to compare if it was built against the same, + * older, or newer version of liblzma that is currently running. + */ +extern const uint32_t lzma_version_number; + + +/** + * \brief Returns versions number of liblzma as a string + * + * This function may be useful if you want to display which version of + * libilzma your application is currently using. + * + * \return Returns a pointer to a statically allocated string constant, + * which contains the version number of liblzma. The format of + * the version string is usually (but not necessarily) x.y.z + * e.g. "4.42.1". Alpha and beta versions contain a suffix + * ("4.42.0alpha"). + */ +extern const char *const lzma_version_string; diff --git a/src/liblzma/api/lzma/vli.h b/src/liblzma/api/lzma/vli.h new file mode 100644 index 00000000..322014e1 --- /dev/null +++ b/src/liblzma/api/lzma/vli.h @@ -0,0 +1,244 @@ +/** + * \file lzma/vli.h + * \brief Variable-length integer handling + * + * \author Copyright (C) 1999-2006 Igor Pavlov + * \author Copyright (C) 2007 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + + +/** + * \brief Maximum supported value of variable-length integer + */ +#define LZMA_VLI_VALUE_MAX (UINT64_MAX / 2) + +/** + * \brief VLI value to denote that the value is unknown + */ +#define LZMA_VLI_VALUE_UNKNOWN UINT64_MAX + +/** + * \brief Maximum supported length of variable length integers + */ +#define LZMA_VLI_BYTES_MAX 9 + + +/** + * \brief VLI constant suffix + */ +#define LZMA_VLI_C(n) UINT64_C(n) + + +/** + * \brief Variable-length integer type + * + * This will always be unsigned integer. Valid VLI values are in the range + * [0, LZMA_VLI_VALUE_MAX]. Unknown value is indicated with + * LZMA_VLI_VALUE_UNKNOWN, which is the maximum value of the underlaying + * integer type (this feature is useful in several situations). + * + * In future, even if lzma_vli is typdefined to something else than uint64_t, + * it is guaranteed that 2 * LZMA_VLI_VALUE_MAX will not overflow lzma_vli. + * This simplifies integer overflow detection. + */ +typedef uint64_t lzma_vli; + + +/** + * \brief Simple macro to validate variable-length integer + * + * This is useful to test that application has given acceptable values + * for example in the uncompressed_size and compressed_size variables. + * + * \return True if the integer is representable as VLI or if it + * indicates unknown value. + */ +#define lzma_vli_is_valid(vli) \ + ((vli) <= LZMA_VLI_VALUE_MAX || (vli) == LZMA_VLI_VALUE_UNKNOWN) + + +/** + * \brief Sets VLI to given value with error checking + * + * \param dest Target variable which must have type of lzma_vli. + * \param src New value to be stored to dest. + * \param limit Maximum allowed value for src. + * + * \return False on success, true on error. If an error occurred, + * dest is left in undefined state (i.e. it's possible that + * it will be different in newer liblzma versions). + */ +#define lzma_vli_set_lim(dest, src, limit) \ + ((src) > (limit) || ((dest) = (src)) > (limit)) + +/** + * \brief + */ +#define lzma_vli_add_lim(dest, src, limit) \ + ((src) > (limit) || ((dest) += (src)) > (limit)) + +#define lzma_vli_add2_lim(dest, src1, src2, limit) \ + (lzma_vli_add_lim(dest, src1, limit) \ + || lzma_vli_add_lim(dest, src2, limit)) + +#define lzma_vli_add3_lim(dest, src1, src2, src3, limit) \ + (lzma_vli_add_lim(dest, src1, limit) \ + || lzma_vli_add_lim(dest, src2, limit) \ + || lzma_vli_add_lim(dest, src3, limit)) + +#define lzma_vli_add4_lim(dest, src1, src2, src3, src4, limit) \ + (lzma_vli_add_lim(dest, src1, limit) \ + || lzma_vli_add_lim(dest, src2, limit) \ + || lzma_vli_add_lim(dest, src3, limit) \ + || lzma_vli_add_lim(dest, src4, limit)) + +#define lzma_vli_sum_lim(dest, src1, src2, limit) \ + (lzma_vli_set_lim(dest, src1, limit) \ + || lzma_vli_add_lim(dest, src2, limit)) + +#define lzma_vli_sum3_lim(dest, src1, src2, src3, limit) \ + (lzma_vli_set_lim(dest, src1, limit) \ + || lzma_vli_add_lim(dest, src2, limit) \ + || lzma_vli_add_lim(dest, src3, limit)) + +#define lzma_vli_sum4_lim(dest, src1, src2, src3, src4, limit) \ + (lzma_vli_set_lim(dest, src1, limit) \ + || lzma_vli_add_lim(dest, src2, limit) \ + || lzma_vli_add_lim(dest, src3, limit) \ + || lzma_vli_add_lim(dest, src4, limit)) + +#define lzma_vli_set(dest, src) lzma_vli_set_lim(dest, src, LZMA_VLI_VALUE_MAX) + +#define lzma_vli_add(dest, src) lzma_vli_add_lim(dest, src, LZMA_VLI_VALUE_MAX) + +#define lzma_vli_add2(dest, src1, src2) \ + lzma_vli_add2_lim(dest, src1, src2, LZMA_VLI_VALUE_MAX) + +#define lzma_vli_add3(dest, src1, src2, src3) \ + lzma_vli_add3_lim(dest, src1, src2, src3, LZMA_VLI_VALUE_MAX) + +#define lzma_vli_add4(dest, src1, src2, src3, src4) \ + lzma_vli_add4_lim(dest, src1, src2, src3, src4, LZMA_VLI_VALUE_MAX) + +#define lzma_vli_sum(dest, src1, src2) \ + lzma_vli_sum_lim(dest, src1, src2, LZMA_VLI_VALUE_MAX) + +#define lzma_vli_sum3(dest, src1, src2, src3) \ + lzma_vli_sum3_lim(dest, src1, src2, src3, LZMA_VLI_VALUE_MAX) + +#define lzma_vli_sum4(dest, src1, src2, src3, src4) \ + lzma_vli_sum4_lim(dest, src1, src2, src3, src4, LZMA_VLI_VALUE_MAX) + + +/** + * \brief Encodes variable-length integer + * + * In the new .lzma format, most integers are encoded in variable-length + * representation. This saves space when smaller values are more likely + * than bigger values. + * + * The encoding scheme encodes seven bits to every byte, using minimum + * number of bytes required to represent the given value. In other words, + * it puts 7-63 bits into 1-9 bytes. This implementation limits the number + * of bits used to 63, thus num must be at maximum of INT64_MAX / 2. You + * may use LZMA_VLI_VALUE_MAX for clarity. + * + * \param vli Integer to be encoded + * \param vli_pos How many bytes have already been written out. This + * must be less than 9 before calling this function. + * \param vli_size Minimum size that the variable-length representation + * must take. This is useful if you want to use + * variable-length integers as padding. Usually you want + * to set this to zero. The maximum allowed value is 9. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: So far all OK, but the integer is not + * completely written out yet. + * - LZMA_STREAM_END: Integer successfully encoded. + * - LZMA_BUF_ERROR: No output space (*out_pos == out_size) + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern lzma_ret lzma_vli_encode( + lzma_vli vli, size_t *lzma_restrict vli_pos, size_t vli_size, + uint8_t *lzma_restrict out, size_t *lzma_restrict out_pos, + size_t out_size); + + +/** + * \brief Decodes variable-length integer + * + * \param vli Pointer to decoded integer. The decoder will + * initialize it to zero when *vli_pos == 0, so + * application isn't required to initialize *vli. + * \param vli_pos How many bytes have already been decoded. When + * starting to decode a new integer, *vli_pos must + * be initialized to zero. + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return - LZMA_OK: So far all OK, but the integer is not + * completely decoded yet. + * - LZMA_STREAM_END: Integer successfully decoded. + * - LZMA_BUF_ERROR: No input data (*in_pos == in_size) + * - LZMA_DATA_ERROR: Integer is longer than nine bytes. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern lzma_ret lzma_vli_decode(lzma_vli *lzma_restrict vli, + size_t *lzma_restrict vli_pos, const uint8_t *lzma_restrict in, + size_t *lzma_restrict in_pos, size_t in_size); + + +/** + * \brief Decodes variable-length integer reading buffer backwards + * + * The variable-length integer encoding is designed so that it can be read + * either from the beginning to the end, or from the end to the beginning. + * This feature is needed to make the Stream parseable backwards; + * specifically, to read the Backward Size field in Stream Footer. + * + * \param vli Pointer to variable to hold the decoded integer. + * \param in Beginning of the input buffer + * \param in_size Number of bytes available in the in[] buffer. + * On successful decoding, this is updated to match + * the number of bytes used. (in[*in_size - 1] is the + * first byte to process. After successful decoding, + * in[*in_size] will point to the first byte of the + * variable-length integer.) + * + * \return - LZMA_OK: Decoding successful + * - LZMA_DATA_ERROR: No valid variable-length integer was found. + * - LZMA_BUF_ERROR: Not enough input. Note that in practice, + * this tends to be a sign of broken input, because the + * applications usually do give as much input to this function + * as the applications have available. + */ +extern lzma_ret lzma_vli_reverse_decode( + lzma_vli *vli, const uint8_t *in, size_t *in_size); + + +/** + * \brief Gets the minimum number of bytes required to encode vli + * + * \return Number of bytes on success (1-9). If vli isn't valid, + * zero is returned. + */ +extern size_t lzma_vli_size(lzma_vli vli); diff --git a/src/liblzma/check/Makefile.am b/src/liblzma/check/Makefile.am new file mode 100644 index 00000000..f00d71a0 --- /dev/null +++ b/src/liblzma/check/Makefile.am @@ -0,0 +1,64 @@ +## +## This code has been put into the public domain. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +EXTRA_DIST = crc32_tablegen.c crc64_tablegen.c + +noinst_LTLIBRARIES = libcheck.la +libcheck_la_SOURCES = \ + check.c \ + check.h \ + check_init.c \ + check_byteswap.h \ + crc_macros.h +libcheck_la_CPPFLAGS = \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_srcdir@/src/liblzma/common + +if COND_CHECK_CRC32 + +if COND_ASM_X86 +libcheck_la_SOURCES += crc32_x86.s +else +libcheck_la_SOURCES += crc32.c +endif + +if COND_SMALL +libcheck_la_SOURCES += crc32_init.c +else +libcheck_la_SOURCES += crc32_table.c crc32_table_le.h crc32_table_be.h +endif + +endif + + +if COND_CHECK_CRC64 + +if COND_ASM_X86 +libcheck_la_SOURCES += crc64_x86.s +else +libcheck_la_SOURCES += crc64.c +endif + +if COND_SMALL +libcheck_la_SOURCES += crc64_init.c +else +libcheck_la_SOURCES += crc64_table.c crc64_table_le.h crc64_table_be.h +endif + +endif + + +if COND_CHECK_SHA256 +libcheck_la_SOURCES += sha256.c +# Hide bogus warning to allow usage of -Werror. If more issues like this +# pop up, we'll drop -Werror. +if COND_WNO_UNINITIALIZED +CFLAGS += -Wno-uninitialized +endif +endif diff --git a/src/liblzma/check/check.c b/src/liblzma/check/check.c new file mode 100644 index 00000000..ba59af2e --- /dev/null +++ b/src/liblzma/check/check.c @@ -0,0 +1,160 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file check.c +/// \brief Check sizes +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" + +// See the .lzma header format specification section 2.2.2. +LZMA_API const uint32_t lzma_check_sizes[8] = { 0, 4, 4, 8, 16, 32, 32, 64 }; + + +LZMA_API const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1] = { + true, // LZMA_CHECK_NONE + +#ifdef HAVE_CHECK_CRC32 + true, +#else + false, +#endif + + false, // Reserved + +#ifdef HAVE_CHECK_CRC64 + true, +#else + false, +#endif + + false, // Reserved + +#ifdef HAVE_CHECK_SHA256 + true, +#else + false, +#endif + + false, // Reserved + false, // Reserved +}; + + +extern lzma_ret +lzma_check_init(lzma_check *check, lzma_check_type type) +{ + lzma_ret ret = LZMA_OK; + + switch (type) { + case LZMA_CHECK_NONE: + break; + +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->crc32 = 0; + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->crc64 = 0; + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_init(&check->sha256); + break; +#endif + + default: + if (type <= LZMA_CHECK_ID_MAX) + ret = LZMA_UNSUPPORTED_CHECK; + else + ret = LZMA_PROG_ERROR; + break; + } + + return ret; +} + + +extern void +lzma_check_update(lzma_check *check, lzma_check_type type, + const uint8_t *buf, size_t size) +{ + switch (type) { +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->crc32 = lzma_crc32(buf, size, check->crc32); + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->crc64 = lzma_crc64(buf, size, check->crc64); + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_update(buf, size, &check->sha256); + break; +#endif + + default: + break; + } + + return; +} + + +extern void +lzma_check_finish(lzma_check *check, lzma_check_type type) +{ +#ifdef HAVE_CHECK_SHA256 + if (type == LZMA_CHECK_SHA256) + lzma_sha256_finish(&check->sha256); +#endif + + return; +} + + +/* +extern bool +lzma_check_compare( + lzma_check *check1, lzma_check *check2, lzma_check_type type) +{ + bool ret; + + switch (type) { + case LZMA_CHECK_NONE: + break; + + case LZMA_CHECK_CRC32: + ret = check1->crc32 != check2->crc32; + break; + + case LZMA_CHECK_CRC64: + ret = check1->crc64 != check2->crc64; + break; + + default: + // Unsupported check + assert(type <= 7); + ret = false; + break; + } + + return ret; +} +*/ diff --git a/src/liblzma/check/check.h b/src/liblzma/check/check.h new file mode 100644 index 00000000..74279ceb --- /dev/null +++ b/src/liblzma/check/check.h @@ -0,0 +1,102 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file check.h +/// \brief Prototypes for different check functions +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_CHECK_H +#define LZMA_CHECK_H + +#include "common.h" + + +typedef struct { + /// Internal state + uint32_t state[8]; + + /// Temporary 8-byte aligned buffer to hold incomplete chunk. + /// After lzma_check_finish(), the first 32 bytes will contain + /// the final digest in big endian byte order. + uint8_t buffer[64]; + + /// Size of the message excluding padding + uint64_t size; + +} lzma_sha256; + + +/// \note This is not in the public API because this structure will +/// change in future. +typedef union { + uint32_t crc32; + uint64_t crc64; + lzma_sha256 sha256; +} lzma_check; + + +#ifdef HAVE_SMALL +extern uint32_t lzma_crc32_table[8][256]; +extern uint64_t lzma_crc64_table[4][256]; +#else +extern const uint32_t lzma_crc32_table[8][256]; +extern const uint64_t lzma_crc64_table[4][256]; +#endif + +// Generic + +/// \brief Initializes *check depending on type +/// +/// \return LZMA_OK on success. LZMA_UNSUPPORTED_CHECK if the type is not +/// supported by the current version or build of liblzma. +/// LZMA_PROG_ERROR if type > LZMA_CHECK_ID_MAX. +/// +extern lzma_ret lzma_check_init(lzma_check *check, lzma_check_type type); + +/// \brief Updates *check +/// +extern void lzma_check_update(lzma_check *check, lzma_check_type type, + const uint8_t *buf, size_t size); + +/// \brief Finishes *check +/// +extern void lzma_check_finish(lzma_check *check, lzma_check_type type); + + +/* +/// \brief Compare two checks +/// +/// \return false if the checks are identical; true if they differ. +/// +extern bool lzma_check_compare( + lzma_check *check1, lzma_check *check2, lzma_check_type type); +*/ + + +// CRC32 + +extern void lzma_crc32_init(void); + + +// CRC64 + +extern void lzma_crc64_init(void); + + +// SHA256 + +extern void lzma_sha256_init(lzma_sha256 *sha256); + +extern void lzma_sha256_update( + const uint8_t *buf, size_t size, lzma_sha256 *sha256); + +extern void lzma_sha256_finish(lzma_sha256 *sha256); + + +#endif diff --git a/src/liblzma/check/check_byteswap.h b/src/liblzma/check/check_byteswap.h new file mode 100644 index 00000000..264def26 --- /dev/null +++ b/src/liblzma/check/check_byteswap.h @@ -0,0 +1,43 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file check_byteswap.h +/// \brief Byteswapping needed by the checks +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_CHECK_BYTESWAP_H +#define LZMA_CHECK_BYTESWAP_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +// byteswap.h is a GNU extension. It contains inline assembly versions +// for byteswapping. When byteswap.h is not available, we use generic code. +#ifdef HAVE_BYTESWAP_H +# include <byteswap.h> +#else +# define bswap_32(num) \ + ( (((num) << 24) ) \ + | (((num) << 8) & UINT32_C(0x00FF0000)) \ + | (((num) >> 8) & UINT32_C(0x0000FF00)) \ + | (((num) >> 24) ) ) + +# define bswap_64(num) \ + ( (((num) << 56) ) \ + | (((num) << 40) & UINT64_C(0x00FF000000000000)) \ + | (((num) << 24) & UINT64_C(0x0000FF0000000000)) \ + | (((num) << 8) & UINT64_C(0x000000FF00000000)) \ + | (((num) >> 8) & UINT64_C(0x00000000FF000000)) \ + | (((num) >> 24) & UINT64_C(0x0000000000FF0000)) \ + | (((num) >> 40) & UINT64_C(0x000000000000FF00)) \ + | (((num) >> 56) ) ) +#endif + +#endif diff --git a/src/liblzma/check/check_init.c b/src/liblzma/check/check_init.c new file mode 100644 index 00000000..1b2cfe02 --- /dev/null +++ b/src/liblzma/check/check_init.c @@ -0,0 +1,37 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file check_init.c +/// \brief Static initializations for integrity checks +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" + + +extern LZMA_API void +lzma_init_check(void) +{ +#ifdef HAVE_SMALL + static bool already_initialized = false; + if (already_initialized) + return; + +# ifdef HAVE_CHECK_CRC32 + lzma_crc32_init(); +# endif + +# ifdef HAVE_CHECK_CRC64 + lzma_crc64_init(); +# endif + + already_initialized = true; +#endif + + return; +} diff --git a/src/liblzma/check/crc32.c b/src/liblzma/check/crc32.c new file mode 100644 index 00000000..091e1422 --- /dev/null +++ b/src/liblzma/check/crc32.c @@ -0,0 +1,88 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32.c +/// \brief CRC32 calculation +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" +#include "crc_macros.h" + + +// If you make any changes, do some bench marking! Seemingly unrelated +// changes can very easily ruin the performance (and very probably is +// very compiler dependent). +extern uint32_t +lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ + crc = ~crc; + +#ifdef WORDS_BIGENDIAN + crc = bswap_32(crc); +#endif + + if (size > 8) { + // Fix the alignment, if needed. The if statement above + // ensures that this won't read past the end of buf[]. + while ((uintptr_t)(buf) & 7) { + crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); + --size; + } + + // Calculate the position where to stop. + const uint8_t *const limit = buf + (size & ~(size_t)(7)); + + // Calculate how many bytes must be calculated separately + // before returning the result. + size &= (size_t)(7); + + // Calculate the CRC32 using the slice-by-eight algorithm. + // It is explained in this document: + // http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf + // + // The code below is different than the code in Intel's + // paper, but the principle is identical. This should be + // faster with GCC than Intel's code. This is tested only + // with GCC 3.4.6 and 4.1.2 on x86, so your results may vary. + // + // Using -Os and -fomit-frame-pointer seem to give the best + // results at least with GCC 4.1.2 on x86. It's sill far + // from the speed of hand-optimized assembler. + while (buf < limit) { + crc ^= *(uint32_t *)(buf); + buf += 4; + + crc = lzma_crc32_table[7][A(crc)] + ^ lzma_crc32_table[6][B(crc)] + ^ lzma_crc32_table[5][C(crc)] + ^ lzma_crc32_table[4][D(crc)]; + + const uint32_t tmp = *(uint32_t *)(buf); + buf += 4; + + // It is critical for performance, that + // the crc variable is XORed between the + // two table-lookup pairs. + crc = lzma_crc32_table[3][A(tmp)] + ^ lzma_crc32_table[2][B(tmp)] + ^ crc + ^ lzma_crc32_table[1][C(tmp)] + ^ lzma_crc32_table[0][D(tmp)]; + } + } + + while (size-- != 0) + crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); + +#ifdef WORDS_BIGENDIAN + crc = bswap_32(crc); +#endif + + return ~crc; +} diff --git a/src/liblzma/check/crc32_init.c b/src/liblzma/check/crc32_init.c new file mode 100644 index 00000000..eee90400 --- /dev/null +++ b/src/liblzma/check/crc32_init.c @@ -0,0 +1,58 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_init.c +/// \brief CRC32 table initialization +// +// This code is based on various public domain sources. +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <sys/types.h> +#include <inttypes.h> + +#ifdef WORDS_BIGENDIAN +# include "check_byteswap.h" +#endif + + +uint32_t lzma_crc32_table[8][256]; + + +extern void +lzma_crc32_init(void) +{ + static const uint32_t poly32 = UINT32_C(0xEDB88320); + + for (size_t s = 0; s < 8; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint32_t r = s == 0 ? b : lzma_crc32_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly32; + else + r >>= 1; + } + + lzma_crc32_table[s][b] = r; + } + } + +#ifdef WORDS_BIGENDIAN + for (size_t s = 0; s < 8; ++s) + for (size_t b = 0; b < 256; ++b) + lzma_crc32_table[s][b] + = bswap_32(lzma_crc32_table[s][b]); +#endif + + return; +} diff --git a/src/liblzma/check/crc32_table.c b/src/liblzma/check/crc32_table.c new file mode 100644 index 00000000..b59642d4 --- /dev/null +++ b/src/liblzma/check/crc32_table.c @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_table.c +/// \brief Precalculated CRC32 table with correct endianness +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#ifdef WORDS_BIGENDIAN +# include "crc32_table_be.h" +#else +# include "crc32_table_le.h" +#endif diff --git a/src/liblzma/check/crc32_table_be.h b/src/liblzma/check/crc32_table_be.h new file mode 100644 index 00000000..bc5a5fbd --- /dev/null +++ b/src/liblzma/check/crc32_table_be.h @@ -0,0 +1,527 @@ +/* This file has been automatically generated by crc32_tablegen.c. */ + +#include <inttypes.h> + +const uint32_t lzma_crc32_table[8][256] = { + { + 0x00000000, 0x96300777, 0x2C610EEE, 0xBA510999, + 0x19C46D07, 0x8FF46A70, 0x35A563E9, 0xA395649E, + 0x3288DB0E, 0xA4B8DC79, 0x1EE9D5E0, 0x88D9D297, + 0x2B4CB609, 0xBD7CB17E, 0x072DB8E7, 0x911DBF90, + 0x6410B71D, 0xF220B06A, 0x4871B9F3, 0xDE41BE84, + 0x7DD4DA1A, 0xEBE4DD6D, 0x51B5D4F4, 0xC785D383, + 0x56986C13, 0xC0A86B64, 0x7AF962FD, 0xECC9658A, + 0x4F5C0114, 0xD96C0663, 0x633D0FFA, 0xF50D088D, + 0xC8206E3B, 0x5E10694C, 0xE44160D5, 0x727167A2, + 0xD1E4033C, 0x47D4044B, 0xFD850DD2, 0x6BB50AA5, + 0xFAA8B535, 0x6C98B242, 0xD6C9BBDB, 0x40F9BCAC, + 0xE36CD832, 0x755CDF45, 0xCF0DD6DC, 0x593DD1AB, + 0xAC30D926, 0x3A00DE51, 0x8051D7C8, 0x1661D0BF, + 0xB5F4B421, 0x23C4B356, 0x9995BACF, 0x0FA5BDB8, + 0x9EB80228, 0x0888055F, 0xB2D90CC6, 0x24E90BB1, + 0x877C6F2F, 0x114C6858, 0xAB1D61C1, 0x3D2D66B6, + 0x9041DC76, 0x0671DB01, 0xBC20D298, 0x2A10D5EF, + 0x8985B171, 0x1FB5B606, 0xA5E4BF9F, 0x33D4B8E8, + 0xA2C90778, 0x34F9000F, 0x8EA80996, 0x18980EE1, + 0xBB0D6A7F, 0x2D3D6D08, 0x976C6491, 0x015C63E6, + 0xF4516B6B, 0x62616C1C, 0xD8306585, 0x4E0062F2, + 0xED95066C, 0x7BA5011B, 0xC1F40882, 0x57C40FF5, + 0xC6D9B065, 0x50E9B712, 0xEAB8BE8B, 0x7C88B9FC, + 0xDF1DDD62, 0x492DDA15, 0xF37CD38C, 0x654CD4FB, + 0x5861B24D, 0xCE51B53A, 0x7400BCA3, 0xE230BBD4, + 0x41A5DF4A, 0xD795D83D, 0x6DC4D1A4, 0xFBF4D6D3, + 0x6AE96943, 0xFCD96E34, 0x468867AD, 0xD0B860DA, + 0x732D0444, 0xE51D0333, 0x5F4C0AAA, 0xC97C0DDD, + 0x3C710550, 0xAA410227, 0x10100BBE, 0x86200CC9, + 0x25B56857, 0xB3856F20, 0x09D466B9, 0x9FE461CE, + 0x0EF9DE5E, 0x98C9D929, 0x2298D0B0, 0xB4A8D7C7, + 0x173DB359, 0x810DB42E, 0x3B5CBDB7, 0xAD6CBAC0, + 0x2083B8ED, 0xB6B3BF9A, 0x0CE2B603, 0x9AD2B174, + 0x3947D5EA, 0xAF77D29D, 0x1526DB04, 0x8316DC73, + 0x120B63E3, 0x843B6494, 0x3E6A6D0D, 0xA85A6A7A, + 0x0BCF0EE4, 0x9DFF0993, 0x27AE000A, 0xB19E077D, + 0x44930FF0, 0xD2A30887, 0x68F2011E, 0xFEC20669, + 0x5D5762F7, 0xCB676580, 0x71366C19, 0xE7066B6E, + 0x761BD4FE, 0xE02BD389, 0x5A7ADA10, 0xCC4ADD67, + 0x6FDFB9F9, 0xF9EFBE8E, 0x43BEB717, 0xD58EB060, + 0xE8A3D6D6, 0x7E93D1A1, 0xC4C2D838, 0x52F2DF4F, + 0xF167BBD1, 0x6757BCA6, 0xDD06B53F, 0x4B36B248, + 0xDA2B0DD8, 0x4C1B0AAF, 0xF64A0336, 0x607A0441, + 0xC3EF60DF, 0x55DF67A8, 0xEF8E6E31, 0x79BE6946, + 0x8CB361CB, 0x1A8366BC, 0xA0D26F25, 0x36E26852, + 0x95770CCC, 0x03470BBB, 0xB9160222, 0x2F260555, + 0xBE3BBAC5, 0x280BBDB2, 0x925AB42B, 0x046AB35C, + 0xA7FFD7C2, 0x31CFD0B5, 0x8B9ED92C, 0x1DAEDE5B, + 0xB0C2649B, 0x26F263EC, 0x9CA36A75, 0x0A936D02, + 0xA906099C, 0x3F360EEB, 0x85670772, 0x13570005, + 0x824ABF95, 0x147AB8E2, 0xAE2BB17B, 0x381BB60C, + 0x9B8ED292, 0x0DBED5E5, 0xB7EFDC7C, 0x21DFDB0B, + 0xD4D2D386, 0x42E2D4F1, 0xF8B3DD68, 0x6E83DA1F, + 0xCD16BE81, 0x5B26B9F6, 0xE177B06F, 0x7747B718, + 0xE65A0888, 0x706A0FFF, 0xCA3B0666, 0x5C0B0111, + 0xFF9E658F, 0x69AE62F8, 0xD3FF6B61, 0x45CF6C16, + 0x78E20AA0, 0xEED20DD7, 0x5483044E, 0xC2B30339, + 0x612667A7, 0xF71660D0, 0x4D476949, 0xDB776E3E, + 0x4A6AD1AE, 0xDC5AD6D9, 0x660BDF40, 0xF03BD837, + 0x53AEBCA9, 0xC59EBBDE, 0x7FCFB247, 0xE9FFB530, + 0x1CF2BDBD, 0x8AC2BACA, 0x3093B353, 0xA6A3B424, + 0x0536D0BA, 0x9306D7CD, 0x2957DE54, 0xBF67D923, + 0x2E7A66B3, 0xB84A61C4, 0x021B685D, 0x942B6F2A, + 0x37BE0BB4, 0xA18E0CC3, 0x1BDF055A, 0x8DEF022D + }, { + 0x00000000, 0x41311B19, 0x82623632, 0xC3532D2B, + 0x04C56C64, 0x45F4777D, 0x86A75A56, 0xC796414F, + 0x088AD9C8, 0x49BBC2D1, 0x8AE8EFFA, 0xCBD9F4E3, + 0x0C4FB5AC, 0x4D7EAEB5, 0x8E2D839E, 0xCF1C9887, + 0x5112C24A, 0x1023D953, 0xD370F478, 0x9241EF61, + 0x55D7AE2E, 0x14E6B537, 0xD7B5981C, 0x96848305, + 0x59981B82, 0x18A9009B, 0xDBFA2DB0, 0x9ACB36A9, + 0x5D5D77E6, 0x1C6C6CFF, 0xDF3F41D4, 0x9E0E5ACD, + 0xA2248495, 0xE3159F8C, 0x2046B2A7, 0x6177A9BE, + 0xA6E1E8F1, 0xE7D0F3E8, 0x2483DEC3, 0x65B2C5DA, + 0xAAAE5D5D, 0xEB9F4644, 0x28CC6B6F, 0x69FD7076, + 0xAE6B3139, 0xEF5A2A20, 0x2C09070B, 0x6D381C12, + 0xF33646DF, 0xB2075DC6, 0x715470ED, 0x30656BF4, + 0xF7F32ABB, 0xB6C231A2, 0x75911C89, 0x34A00790, + 0xFBBC9F17, 0xBA8D840E, 0x79DEA925, 0x38EFB23C, + 0xFF79F373, 0xBE48E86A, 0x7D1BC541, 0x3C2ADE58, + 0x054F79F0, 0x447E62E9, 0x872D4FC2, 0xC61C54DB, + 0x018A1594, 0x40BB0E8D, 0x83E823A6, 0xC2D938BF, + 0x0DC5A038, 0x4CF4BB21, 0x8FA7960A, 0xCE968D13, + 0x0900CC5C, 0x4831D745, 0x8B62FA6E, 0xCA53E177, + 0x545DBBBA, 0x156CA0A3, 0xD63F8D88, 0x970E9691, + 0x5098D7DE, 0x11A9CCC7, 0xD2FAE1EC, 0x93CBFAF5, + 0x5CD76272, 0x1DE6796B, 0xDEB55440, 0x9F844F59, + 0x58120E16, 0x1923150F, 0xDA703824, 0x9B41233D, + 0xA76BFD65, 0xE65AE67C, 0x2509CB57, 0x6438D04E, + 0xA3AE9101, 0xE29F8A18, 0x21CCA733, 0x60FDBC2A, + 0xAFE124AD, 0xEED03FB4, 0x2D83129F, 0x6CB20986, + 0xAB2448C9, 0xEA1553D0, 0x29467EFB, 0x687765E2, + 0xF6793F2F, 0xB7482436, 0x741B091D, 0x352A1204, + 0xF2BC534B, 0xB38D4852, 0x70DE6579, 0x31EF7E60, + 0xFEF3E6E7, 0xBFC2FDFE, 0x7C91D0D5, 0x3DA0CBCC, + 0xFA368A83, 0xBB07919A, 0x7854BCB1, 0x3965A7A8, + 0x4B98833B, 0x0AA99822, 0xC9FAB509, 0x88CBAE10, + 0x4F5DEF5F, 0x0E6CF446, 0xCD3FD96D, 0x8C0EC274, + 0x43125AF3, 0x022341EA, 0xC1706CC1, 0x804177D8, + 0x47D73697, 0x06E62D8E, 0xC5B500A5, 0x84841BBC, + 0x1A8A4171, 0x5BBB5A68, 0x98E87743, 0xD9D96C5A, + 0x1E4F2D15, 0x5F7E360C, 0x9C2D1B27, 0xDD1C003E, + 0x120098B9, 0x533183A0, 0x9062AE8B, 0xD153B592, + 0x16C5F4DD, 0x57F4EFC4, 0x94A7C2EF, 0xD596D9F6, + 0xE9BC07AE, 0xA88D1CB7, 0x6BDE319C, 0x2AEF2A85, + 0xED796BCA, 0xAC4870D3, 0x6F1B5DF8, 0x2E2A46E1, + 0xE136DE66, 0xA007C57F, 0x6354E854, 0x2265F34D, + 0xE5F3B202, 0xA4C2A91B, 0x67918430, 0x26A09F29, + 0xB8AEC5E4, 0xF99FDEFD, 0x3ACCF3D6, 0x7BFDE8CF, + 0xBC6BA980, 0xFD5AB299, 0x3E099FB2, 0x7F3884AB, + 0xB0241C2C, 0xF1150735, 0x32462A1E, 0x73773107, + 0xB4E17048, 0xF5D06B51, 0x3683467A, 0x77B25D63, + 0x4ED7FACB, 0x0FE6E1D2, 0xCCB5CCF9, 0x8D84D7E0, + 0x4A1296AF, 0x0B238DB6, 0xC870A09D, 0x8941BB84, + 0x465D2303, 0x076C381A, 0xC43F1531, 0x850E0E28, + 0x42984F67, 0x03A9547E, 0xC0FA7955, 0x81CB624C, + 0x1FC53881, 0x5EF42398, 0x9DA70EB3, 0xDC9615AA, + 0x1B0054E5, 0x5A314FFC, 0x996262D7, 0xD85379CE, + 0x174FE149, 0x567EFA50, 0x952DD77B, 0xD41CCC62, + 0x138A8D2D, 0x52BB9634, 0x91E8BB1F, 0xD0D9A006, + 0xECF37E5E, 0xADC26547, 0x6E91486C, 0x2FA05375, + 0xE836123A, 0xA9070923, 0x6A542408, 0x2B653F11, + 0xE479A796, 0xA548BC8F, 0x661B91A4, 0x272A8ABD, + 0xE0BCCBF2, 0xA18DD0EB, 0x62DEFDC0, 0x23EFE6D9, + 0xBDE1BC14, 0xFCD0A70D, 0x3F838A26, 0x7EB2913F, + 0xB924D070, 0xF815CB69, 0x3B46E642, 0x7A77FD5B, + 0xB56B65DC, 0xF45A7EC5, 0x370953EE, 0x763848F7, + 0xB1AE09B8, 0xF09F12A1, 0x33CC3F8A, 0x72FD2493 + }, { + 0x00000000, 0x376AC201, 0x6ED48403, 0x59BE4602, + 0xDCA80907, 0xEBC2CB06, 0xB27C8D04, 0x85164F05, + 0xB851130E, 0x8F3BD10F, 0xD685970D, 0xE1EF550C, + 0x64F91A09, 0x5393D808, 0x0A2D9E0A, 0x3D475C0B, + 0x70A3261C, 0x47C9E41D, 0x1E77A21F, 0x291D601E, + 0xAC0B2F1B, 0x9B61ED1A, 0xC2DFAB18, 0xF5B56919, + 0xC8F23512, 0xFF98F713, 0xA626B111, 0x914C7310, + 0x145A3C15, 0x2330FE14, 0x7A8EB816, 0x4DE47A17, + 0xE0464D38, 0xD72C8F39, 0x8E92C93B, 0xB9F80B3A, + 0x3CEE443F, 0x0B84863E, 0x523AC03C, 0x6550023D, + 0x58175E36, 0x6F7D9C37, 0x36C3DA35, 0x01A91834, + 0x84BF5731, 0xB3D59530, 0xEA6BD332, 0xDD011133, + 0x90E56B24, 0xA78FA925, 0xFE31EF27, 0xC95B2D26, + 0x4C4D6223, 0x7B27A022, 0x2299E620, 0x15F32421, + 0x28B4782A, 0x1FDEBA2B, 0x4660FC29, 0x710A3E28, + 0xF41C712D, 0xC376B32C, 0x9AC8F52E, 0xADA2372F, + 0xC08D9A70, 0xF7E75871, 0xAE591E73, 0x9933DC72, + 0x1C259377, 0x2B4F5176, 0x72F11774, 0x459BD575, + 0x78DC897E, 0x4FB64B7F, 0x16080D7D, 0x2162CF7C, + 0xA4748079, 0x931E4278, 0xCAA0047A, 0xFDCAC67B, + 0xB02EBC6C, 0x87447E6D, 0xDEFA386F, 0xE990FA6E, + 0x6C86B56B, 0x5BEC776A, 0x02523168, 0x3538F369, + 0x087FAF62, 0x3F156D63, 0x66AB2B61, 0x51C1E960, + 0xD4D7A665, 0xE3BD6464, 0xBA032266, 0x8D69E067, + 0x20CBD748, 0x17A11549, 0x4E1F534B, 0x7975914A, + 0xFC63DE4F, 0xCB091C4E, 0x92B75A4C, 0xA5DD984D, + 0x989AC446, 0xAFF00647, 0xF64E4045, 0xC1248244, + 0x4432CD41, 0x73580F40, 0x2AE64942, 0x1D8C8B43, + 0x5068F154, 0x67023355, 0x3EBC7557, 0x09D6B756, + 0x8CC0F853, 0xBBAA3A52, 0xE2147C50, 0xD57EBE51, + 0xE839E25A, 0xDF53205B, 0x86ED6659, 0xB187A458, + 0x3491EB5D, 0x03FB295C, 0x5A456F5E, 0x6D2FAD5F, + 0x801B35E1, 0xB771F7E0, 0xEECFB1E2, 0xD9A573E3, + 0x5CB33CE6, 0x6BD9FEE7, 0x3267B8E5, 0x050D7AE4, + 0x384A26EF, 0x0F20E4EE, 0x569EA2EC, 0x61F460ED, + 0xE4E22FE8, 0xD388EDE9, 0x8A36ABEB, 0xBD5C69EA, + 0xF0B813FD, 0xC7D2D1FC, 0x9E6C97FE, 0xA90655FF, + 0x2C101AFA, 0x1B7AD8FB, 0x42C49EF9, 0x75AE5CF8, + 0x48E900F3, 0x7F83C2F2, 0x263D84F0, 0x115746F1, + 0x944109F4, 0xA32BCBF5, 0xFA958DF7, 0xCDFF4FF6, + 0x605D78D9, 0x5737BAD8, 0x0E89FCDA, 0x39E33EDB, + 0xBCF571DE, 0x8B9FB3DF, 0xD221F5DD, 0xE54B37DC, + 0xD80C6BD7, 0xEF66A9D6, 0xB6D8EFD4, 0x81B22DD5, + 0x04A462D0, 0x33CEA0D1, 0x6A70E6D3, 0x5D1A24D2, + 0x10FE5EC5, 0x27949CC4, 0x7E2ADAC6, 0x494018C7, + 0xCC5657C2, 0xFB3C95C3, 0xA282D3C1, 0x95E811C0, + 0xA8AF4DCB, 0x9FC58FCA, 0xC67BC9C8, 0xF1110BC9, + 0x740744CC, 0x436D86CD, 0x1AD3C0CF, 0x2DB902CE, + 0x4096AF91, 0x77FC6D90, 0x2E422B92, 0x1928E993, + 0x9C3EA696, 0xAB546497, 0xF2EA2295, 0xC580E094, + 0xF8C7BC9F, 0xCFAD7E9E, 0x9613389C, 0xA179FA9D, + 0x246FB598, 0x13057799, 0x4ABB319B, 0x7DD1F39A, + 0x3035898D, 0x075F4B8C, 0x5EE10D8E, 0x698BCF8F, + 0xEC9D808A, 0xDBF7428B, 0x82490489, 0xB523C688, + 0x88649A83, 0xBF0E5882, 0xE6B01E80, 0xD1DADC81, + 0x54CC9384, 0x63A65185, 0x3A181787, 0x0D72D586, + 0xA0D0E2A9, 0x97BA20A8, 0xCE0466AA, 0xF96EA4AB, + 0x7C78EBAE, 0x4B1229AF, 0x12AC6FAD, 0x25C6ADAC, + 0x1881F1A7, 0x2FEB33A6, 0x765575A4, 0x413FB7A5, + 0xC429F8A0, 0xF3433AA1, 0xAAFD7CA3, 0x9D97BEA2, + 0xD073C4B5, 0xE71906B4, 0xBEA740B6, 0x89CD82B7, + 0x0CDBCDB2, 0x3BB10FB3, 0x620F49B1, 0x55658BB0, + 0x6822D7BB, 0x5F4815BA, 0x06F653B8, 0x319C91B9, + 0xB48ADEBC, 0x83E01CBD, 0xDA5E5ABF, 0xED3498BE + }, { + 0x00000000, 0x6567BCB8, 0x8BC809AA, 0xEEAFB512, + 0x5797628F, 0x32F0DE37, 0xDC5F6B25, 0xB938D79D, + 0xEF28B4C5, 0x8A4F087D, 0x64E0BD6F, 0x018701D7, + 0xB8BFD64A, 0xDDD86AF2, 0x3377DFE0, 0x56106358, + 0x9F571950, 0xFA30A5E8, 0x149F10FA, 0x71F8AC42, + 0xC8C07BDF, 0xADA7C767, 0x43087275, 0x266FCECD, + 0x707FAD95, 0x1518112D, 0xFBB7A43F, 0x9ED01887, + 0x27E8CF1A, 0x428F73A2, 0xAC20C6B0, 0xC9477A08, + 0x3EAF32A0, 0x5BC88E18, 0xB5673B0A, 0xD00087B2, + 0x6938502F, 0x0C5FEC97, 0xE2F05985, 0x8797E53D, + 0xD1878665, 0xB4E03ADD, 0x5A4F8FCF, 0x3F283377, + 0x8610E4EA, 0xE3775852, 0x0DD8ED40, 0x68BF51F8, + 0xA1F82BF0, 0xC49F9748, 0x2A30225A, 0x4F579EE2, + 0xF66F497F, 0x9308F5C7, 0x7DA740D5, 0x18C0FC6D, + 0x4ED09F35, 0x2BB7238D, 0xC518969F, 0xA07F2A27, + 0x1947FDBA, 0x7C204102, 0x928FF410, 0xF7E848A8, + 0x3D58149B, 0x583FA823, 0xB6901D31, 0xD3F7A189, + 0x6ACF7614, 0x0FA8CAAC, 0xE1077FBE, 0x8460C306, + 0xD270A05E, 0xB7171CE6, 0x59B8A9F4, 0x3CDF154C, + 0x85E7C2D1, 0xE0807E69, 0x0E2FCB7B, 0x6B4877C3, + 0xA20F0DCB, 0xC768B173, 0x29C70461, 0x4CA0B8D9, + 0xF5986F44, 0x90FFD3FC, 0x7E5066EE, 0x1B37DA56, + 0x4D27B90E, 0x284005B6, 0xC6EFB0A4, 0xA3880C1C, + 0x1AB0DB81, 0x7FD76739, 0x9178D22B, 0xF41F6E93, + 0x03F7263B, 0x66909A83, 0x883F2F91, 0xED589329, + 0x546044B4, 0x3107F80C, 0xDFA84D1E, 0xBACFF1A6, + 0xECDF92FE, 0x89B82E46, 0x67179B54, 0x027027EC, + 0xBB48F071, 0xDE2F4CC9, 0x3080F9DB, 0x55E74563, + 0x9CA03F6B, 0xF9C783D3, 0x176836C1, 0x720F8A79, + 0xCB375DE4, 0xAE50E15C, 0x40FF544E, 0x2598E8F6, + 0x73888BAE, 0x16EF3716, 0xF8408204, 0x9D273EBC, + 0x241FE921, 0x41785599, 0xAFD7E08B, 0xCAB05C33, + 0x3BB659ED, 0x5ED1E555, 0xB07E5047, 0xD519ECFF, + 0x6C213B62, 0x094687DA, 0xE7E932C8, 0x828E8E70, + 0xD49EED28, 0xB1F95190, 0x5F56E482, 0x3A31583A, + 0x83098FA7, 0xE66E331F, 0x08C1860D, 0x6DA63AB5, + 0xA4E140BD, 0xC186FC05, 0x2F294917, 0x4A4EF5AF, + 0xF3762232, 0x96119E8A, 0x78BE2B98, 0x1DD99720, + 0x4BC9F478, 0x2EAE48C0, 0xC001FDD2, 0xA566416A, + 0x1C5E96F7, 0x79392A4F, 0x97969F5D, 0xF2F123E5, + 0x05196B4D, 0x607ED7F5, 0x8ED162E7, 0xEBB6DE5F, + 0x528E09C2, 0x37E9B57A, 0xD9460068, 0xBC21BCD0, + 0xEA31DF88, 0x8F566330, 0x61F9D622, 0x049E6A9A, + 0xBDA6BD07, 0xD8C101BF, 0x366EB4AD, 0x53090815, + 0x9A4E721D, 0xFF29CEA5, 0x11867BB7, 0x74E1C70F, + 0xCDD91092, 0xA8BEAC2A, 0x46111938, 0x2376A580, + 0x7566C6D8, 0x10017A60, 0xFEAECF72, 0x9BC973CA, + 0x22F1A457, 0x479618EF, 0xA939ADFD, 0xCC5E1145, + 0x06EE4D76, 0x6389F1CE, 0x8D2644DC, 0xE841F864, + 0x51792FF9, 0x341E9341, 0xDAB12653, 0xBFD69AEB, + 0xE9C6F9B3, 0x8CA1450B, 0x620EF019, 0x07694CA1, + 0xBE519B3C, 0xDB362784, 0x35999296, 0x50FE2E2E, + 0x99B95426, 0xFCDEE89E, 0x12715D8C, 0x7716E134, + 0xCE2E36A9, 0xAB498A11, 0x45E63F03, 0x208183BB, + 0x7691E0E3, 0x13F65C5B, 0xFD59E949, 0x983E55F1, + 0x2106826C, 0x44613ED4, 0xAACE8BC6, 0xCFA9377E, + 0x38417FD6, 0x5D26C36E, 0xB389767C, 0xD6EECAC4, + 0x6FD61D59, 0x0AB1A1E1, 0xE41E14F3, 0x8179A84B, + 0xD769CB13, 0xB20E77AB, 0x5CA1C2B9, 0x39C67E01, + 0x80FEA99C, 0xE5991524, 0x0B36A036, 0x6E511C8E, + 0xA7166686, 0xC271DA3E, 0x2CDE6F2C, 0x49B9D394, + 0xF0810409, 0x95E6B8B1, 0x7B490DA3, 0x1E2EB11B, + 0x483ED243, 0x2D596EFB, 0xC3F6DBE9, 0xA6916751, + 0x1FA9B0CC, 0x7ACE0C74, 0x9461B966, 0xF10605DE + }, { + 0x00000000, 0xB029603D, 0x6053C07A, 0xD07AA047, + 0xC0A680F5, 0x708FE0C8, 0xA0F5408F, 0x10DC20B2, + 0xC14B7030, 0x7162100D, 0xA118B04A, 0x1131D077, + 0x01EDF0C5, 0xB1C490F8, 0x61BE30BF, 0xD1975082, + 0x8297E060, 0x32BE805D, 0xE2C4201A, 0x52ED4027, + 0x42316095, 0xF21800A8, 0x2262A0EF, 0x924BC0D2, + 0x43DC9050, 0xF3F5F06D, 0x238F502A, 0x93A63017, + 0x837A10A5, 0x33537098, 0xE329D0DF, 0x5300B0E2, + 0x042FC1C1, 0xB406A1FC, 0x647C01BB, 0xD4556186, + 0xC4894134, 0x74A02109, 0xA4DA814E, 0x14F3E173, + 0xC564B1F1, 0x754DD1CC, 0xA537718B, 0x151E11B6, + 0x05C23104, 0xB5EB5139, 0x6591F17E, 0xD5B89143, + 0x86B821A1, 0x3691419C, 0xE6EBE1DB, 0x56C281E6, + 0x461EA154, 0xF637C169, 0x264D612E, 0x96640113, + 0x47F35191, 0xF7DA31AC, 0x27A091EB, 0x9789F1D6, + 0x8755D164, 0x377CB159, 0xE706111E, 0x572F7123, + 0x4958F358, 0xF9719365, 0x290B3322, 0x9922531F, + 0x89FE73AD, 0x39D71390, 0xE9ADB3D7, 0x5984D3EA, + 0x88138368, 0x383AE355, 0xE8404312, 0x5869232F, + 0x48B5039D, 0xF89C63A0, 0x28E6C3E7, 0x98CFA3DA, + 0xCBCF1338, 0x7BE67305, 0xAB9CD342, 0x1BB5B37F, + 0x0B6993CD, 0xBB40F3F0, 0x6B3A53B7, 0xDB13338A, + 0x0A846308, 0xBAAD0335, 0x6AD7A372, 0xDAFEC34F, + 0xCA22E3FD, 0x7A0B83C0, 0xAA712387, 0x1A5843BA, + 0x4D773299, 0xFD5E52A4, 0x2D24F2E3, 0x9D0D92DE, + 0x8DD1B26C, 0x3DF8D251, 0xED827216, 0x5DAB122B, + 0x8C3C42A9, 0x3C152294, 0xEC6F82D3, 0x5C46E2EE, + 0x4C9AC25C, 0xFCB3A261, 0x2CC90226, 0x9CE0621B, + 0xCFE0D2F9, 0x7FC9B2C4, 0xAFB31283, 0x1F9A72BE, + 0x0F46520C, 0xBF6F3231, 0x6F159276, 0xDF3CF24B, + 0x0EABA2C9, 0xBE82C2F4, 0x6EF862B3, 0xDED1028E, + 0xCE0D223C, 0x7E244201, 0xAE5EE246, 0x1E77827B, + 0x92B0E6B1, 0x2299868C, 0xF2E326CB, 0x42CA46F6, + 0x52166644, 0xE23F0679, 0x3245A63E, 0x826CC603, + 0x53FB9681, 0xE3D2F6BC, 0x33A856FB, 0x838136C6, + 0x935D1674, 0x23747649, 0xF30ED60E, 0x4327B633, + 0x102706D1, 0xA00E66EC, 0x7074C6AB, 0xC05DA696, + 0xD0818624, 0x60A8E619, 0xB0D2465E, 0x00FB2663, + 0xD16C76E1, 0x614516DC, 0xB13FB69B, 0x0116D6A6, + 0x11CAF614, 0xA1E39629, 0x7199366E, 0xC1B05653, + 0x969F2770, 0x26B6474D, 0xF6CCE70A, 0x46E58737, + 0x5639A785, 0xE610C7B8, 0x366A67FF, 0x864307C2, + 0x57D45740, 0xE7FD377D, 0x3787973A, 0x87AEF707, + 0x9772D7B5, 0x275BB788, 0xF72117CF, 0x470877F2, + 0x1408C710, 0xA421A72D, 0x745B076A, 0xC4726757, + 0xD4AE47E5, 0x648727D8, 0xB4FD879F, 0x04D4E7A2, + 0xD543B720, 0x656AD71D, 0xB510775A, 0x05391767, + 0x15E537D5, 0xA5CC57E8, 0x75B6F7AF, 0xC59F9792, + 0xDBE815E9, 0x6BC175D4, 0xBBBBD593, 0x0B92B5AE, + 0x1B4E951C, 0xAB67F521, 0x7B1D5566, 0xCB34355B, + 0x1AA365D9, 0xAA8A05E4, 0x7AF0A5A3, 0xCAD9C59E, + 0xDA05E52C, 0x6A2C8511, 0xBA562556, 0x0A7F456B, + 0x597FF589, 0xE95695B4, 0x392C35F3, 0x890555CE, + 0x99D9757C, 0x29F01541, 0xF98AB506, 0x49A3D53B, + 0x983485B9, 0x281DE584, 0xF86745C3, 0x484E25FE, + 0x5892054C, 0xE8BB6571, 0x38C1C536, 0x88E8A50B, + 0xDFC7D428, 0x6FEEB415, 0xBF941452, 0x0FBD746F, + 0x1F6154DD, 0xAF4834E0, 0x7F3294A7, 0xCF1BF49A, + 0x1E8CA418, 0xAEA5C425, 0x7EDF6462, 0xCEF6045F, + 0xDE2A24ED, 0x6E0344D0, 0xBE79E497, 0x0E5084AA, + 0x5D503448, 0xED795475, 0x3D03F432, 0x8D2A940F, + 0x9DF6B4BD, 0x2DDFD480, 0xFDA574C7, 0x4D8C14FA, + 0x9C1B4478, 0x2C322445, 0xFC488402, 0x4C61E43F, + 0x5CBDC48D, 0xEC94A4B0, 0x3CEE04F7, 0x8CC764CA + }, { + 0x00000000, 0xA5D35CCB, 0x0BA1C84D, 0xAE729486, + 0x1642919B, 0xB391CD50, 0x1DE359D6, 0xB830051D, + 0x6D8253EC, 0xC8510F27, 0x66239BA1, 0xC3F0C76A, + 0x7BC0C277, 0xDE139EBC, 0x70610A3A, 0xD5B256F1, + 0x9B02D603, 0x3ED18AC8, 0x90A31E4E, 0x35704285, + 0x8D404798, 0x28931B53, 0x86E18FD5, 0x2332D31E, + 0xF68085EF, 0x5353D924, 0xFD214DA2, 0x58F21169, + 0xE0C21474, 0x451148BF, 0xEB63DC39, 0x4EB080F2, + 0x3605AC07, 0x93D6F0CC, 0x3DA4644A, 0x98773881, + 0x20473D9C, 0x85946157, 0x2BE6F5D1, 0x8E35A91A, + 0x5B87FFEB, 0xFE54A320, 0x502637A6, 0xF5F56B6D, + 0x4DC56E70, 0xE81632BB, 0x4664A63D, 0xE3B7FAF6, + 0xAD077A04, 0x08D426CF, 0xA6A6B249, 0x0375EE82, + 0xBB45EB9F, 0x1E96B754, 0xB0E423D2, 0x15377F19, + 0xC08529E8, 0x65567523, 0xCB24E1A5, 0x6EF7BD6E, + 0xD6C7B873, 0x7314E4B8, 0xDD66703E, 0x78B52CF5, + 0x6C0A580F, 0xC9D904C4, 0x67AB9042, 0xC278CC89, + 0x7A48C994, 0xDF9B955F, 0x71E901D9, 0xD43A5D12, + 0x01880BE3, 0xA45B5728, 0x0A29C3AE, 0xAFFA9F65, + 0x17CA9A78, 0xB219C6B3, 0x1C6B5235, 0xB9B80EFE, + 0xF7088E0C, 0x52DBD2C7, 0xFCA94641, 0x597A1A8A, + 0xE14A1F97, 0x4499435C, 0xEAEBD7DA, 0x4F388B11, + 0x9A8ADDE0, 0x3F59812B, 0x912B15AD, 0x34F84966, + 0x8CC84C7B, 0x291B10B0, 0x87698436, 0x22BAD8FD, + 0x5A0FF408, 0xFFDCA8C3, 0x51AE3C45, 0xF47D608E, + 0x4C4D6593, 0xE99E3958, 0x47ECADDE, 0xE23FF115, + 0x378DA7E4, 0x925EFB2F, 0x3C2C6FA9, 0x99FF3362, + 0x21CF367F, 0x841C6AB4, 0x2A6EFE32, 0x8FBDA2F9, + 0xC10D220B, 0x64DE7EC0, 0xCAACEA46, 0x6F7FB68D, + 0xD74FB390, 0x729CEF5B, 0xDCEE7BDD, 0x793D2716, + 0xAC8F71E7, 0x095C2D2C, 0xA72EB9AA, 0x02FDE561, + 0xBACDE07C, 0x1F1EBCB7, 0xB16C2831, 0x14BF74FA, + 0xD814B01E, 0x7DC7ECD5, 0xD3B57853, 0x76662498, + 0xCE562185, 0x6B857D4E, 0xC5F7E9C8, 0x6024B503, + 0xB596E3F2, 0x1045BF39, 0xBE372BBF, 0x1BE47774, + 0xA3D47269, 0x06072EA2, 0xA875BA24, 0x0DA6E6EF, + 0x4316661D, 0xE6C53AD6, 0x48B7AE50, 0xED64F29B, + 0x5554F786, 0xF087AB4D, 0x5EF53FCB, 0xFB266300, + 0x2E9435F1, 0x8B47693A, 0x2535FDBC, 0x80E6A177, + 0x38D6A46A, 0x9D05F8A1, 0x33776C27, 0x96A430EC, + 0xEE111C19, 0x4BC240D2, 0xE5B0D454, 0x4063889F, + 0xF8538D82, 0x5D80D149, 0xF3F245CF, 0x56211904, + 0x83934FF5, 0x2640133E, 0x883287B8, 0x2DE1DB73, + 0x95D1DE6E, 0x300282A5, 0x9E701623, 0x3BA34AE8, + 0x7513CA1A, 0xD0C096D1, 0x7EB20257, 0xDB615E9C, + 0x63515B81, 0xC682074A, 0x68F093CC, 0xCD23CF07, + 0x189199F6, 0xBD42C53D, 0x133051BB, 0xB6E30D70, + 0x0ED3086D, 0xAB0054A6, 0x0572C020, 0xA0A19CEB, + 0xB41EE811, 0x11CDB4DA, 0xBFBF205C, 0x1A6C7C97, + 0xA25C798A, 0x078F2541, 0xA9FDB1C7, 0x0C2EED0C, + 0xD99CBBFD, 0x7C4FE736, 0xD23D73B0, 0x77EE2F7B, + 0xCFDE2A66, 0x6A0D76AD, 0xC47FE22B, 0x61ACBEE0, + 0x2F1C3E12, 0x8ACF62D9, 0x24BDF65F, 0x816EAA94, + 0x395EAF89, 0x9C8DF342, 0x32FF67C4, 0x972C3B0F, + 0x429E6DFE, 0xE74D3135, 0x493FA5B3, 0xECECF978, + 0x54DCFC65, 0xF10FA0AE, 0x5F7D3428, 0xFAAE68E3, + 0x821B4416, 0x27C818DD, 0x89BA8C5B, 0x2C69D090, + 0x9459D58D, 0x318A8946, 0x9FF81DC0, 0x3A2B410B, + 0xEF9917FA, 0x4A4A4B31, 0xE438DFB7, 0x41EB837C, + 0xF9DB8661, 0x5C08DAAA, 0xF27A4E2C, 0x57A912E7, + 0x19199215, 0xBCCACEDE, 0x12B85A58, 0xB76B0693, + 0x0F5B038E, 0xAA885F45, 0x04FACBC3, 0xA1299708, + 0x749BC1F9, 0xD1489D32, 0x7F3A09B4, 0xDAE9557F, + 0x62D95062, 0xC70A0CA9, 0x6978982F, 0xCCABC4E4 + }, { + 0x00000000, 0xB40B77A6, 0x29119F97, 0x9D1AE831, + 0x13244FF4, 0xA72F3852, 0x3A35D063, 0x8E3EA7C5, + 0x674EEF33, 0xD3459895, 0x4E5F70A4, 0xFA540702, + 0x746AA0C7, 0xC061D761, 0x5D7B3F50, 0xE97048F6, + 0xCE9CDE67, 0x7A97A9C1, 0xE78D41F0, 0x53863656, + 0xDDB89193, 0x69B3E635, 0xF4A90E04, 0x40A279A2, + 0xA9D23154, 0x1DD946F2, 0x80C3AEC3, 0x34C8D965, + 0xBAF67EA0, 0x0EFD0906, 0x93E7E137, 0x27EC9691, + 0x9C39BDCF, 0x2832CA69, 0xB5282258, 0x012355FE, + 0x8F1DF23B, 0x3B16859D, 0xA60C6DAC, 0x12071A0A, + 0xFB7752FC, 0x4F7C255A, 0xD266CD6B, 0x666DBACD, + 0xE8531D08, 0x5C586AAE, 0xC142829F, 0x7549F539, + 0x52A563A8, 0xE6AE140E, 0x7BB4FC3F, 0xCFBF8B99, + 0x41812C5C, 0xF58A5BFA, 0x6890B3CB, 0xDC9BC46D, + 0x35EB8C9B, 0x81E0FB3D, 0x1CFA130C, 0xA8F164AA, + 0x26CFC36F, 0x92C4B4C9, 0x0FDE5CF8, 0xBBD52B5E, + 0x79750B44, 0xCD7E7CE2, 0x506494D3, 0xE46FE375, + 0x6A5144B0, 0xDE5A3316, 0x4340DB27, 0xF74BAC81, + 0x1E3BE477, 0xAA3093D1, 0x372A7BE0, 0x83210C46, + 0x0D1FAB83, 0xB914DC25, 0x240E3414, 0x900543B2, + 0xB7E9D523, 0x03E2A285, 0x9EF84AB4, 0x2AF33D12, + 0xA4CD9AD7, 0x10C6ED71, 0x8DDC0540, 0x39D772E6, + 0xD0A73A10, 0x64AC4DB6, 0xF9B6A587, 0x4DBDD221, + 0xC38375E4, 0x77880242, 0xEA92EA73, 0x5E999DD5, + 0xE54CB68B, 0x5147C12D, 0xCC5D291C, 0x78565EBA, + 0xF668F97F, 0x42638ED9, 0xDF7966E8, 0x6B72114E, + 0x820259B8, 0x36092E1E, 0xAB13C62F, 0x1F18B189, + 0x9126164C, 0x252D61EA, 0xB83789DB, 0x0C3CFE7D, + 0x2BD068EC, 0x9FDB1F4A, 0x02C1F77B, 0xB6CA80DD, + 0x38F42718, 0x8CFF50BE, 0x11E5B88F, 0xA5EECF29, + 0x4C9E87DF, 0xF895F079, 0x658F1848, 0xD1846FEE, + 0x5FBAC82B, 0xEBB1BF8D, 0x76AB57BC, 0xC2A0201A, + 0xF2EA1688, 0x46E1612E, 0xDBFB891F, 0x6FF0FEB9, + 0xE1CE597C, 0x55C52EDA, 0xC8DFC6EB, 0x7CD4B14D, + 0x95A4F9BB, 0x21AF8E1D, 0xBCB5662C, 0x08BE118A, + 0x8680B64F, 0x328BC1E9, 0xAF9129D8, 0x1B9A5E7E, + 0x3C76C8EF, 0x887DBF49, 0x15675778, 0xA16C20DE, + 0x2F52871B, 0x9B59F0BD, 0x0643188C, 0xB2486F2A, + 0x5B3827DC, 0xEF33507A, 0x7229B84B, 0xC622CFED, + 0x481C6828, 0xFC171F8E, 0x610DF7BF, 0xD5068019, + 0x6ED3AB47, 0xDAD8DCE1, 0x47C234D0, 0xF3C94376, + 0x7DF7E4B3, 0xC9FC9315, 0x54E67B24, 0xE0ED0C82, + 0x099D4474, 0xBD9633D2, 0x208CDBE3, 0x9487AC45, + 0x1AB90B80, 0xAEB27C26, 0x33A89417, 0x87A3E3B1, + 0xA04F7520, 0x14440286, 0x895EEAB7, 0x3D559D11, + 0xB36B3AD4, 0x07604D72, 0x9A7AA543, 0x2E71D2E5, + 0xC7019A13, 0x730AEDB5, 0xEE100584, 0x5A1B7222, + 0xD425D5E7, 0x602EA241, 0xFD344A70, 0x493F3DD6, + 0x8B9F1DCC, 0x3F946A6A, 0xA28E825B, 0x1685F5FD, + 0x98BB5238, 0x2CB0259E, 0xB1AACDAF, 0x05A1BA09, + 0xECD1F2FF, 0x58DA8559, 0xC5C06D68, 0x71CB1ACE, + 0xFFF5BD0B, 0x4BFECAAD, 0xD6E4229C, 0x62EF553A, + 0x4503C3AB, 0xF108B40D, 0x6C125C3C, 0xD8192B9A, + 0x56278C5F, 0xE22CFBF9, 0x7F3613C8, 0xCB3D646E, + 0x224D2C98, 0x96465B3E, 0x0B5CB30F, 0xBF57C4A9, + 0x3169636C, 0x856214CA, 0x1878FCFB, 0xAC738B5D, + 0x17A6A003, 0xA3ADD7A5, 0x3EB73F94, 0x8ABC4832, + 0x0482EFF7, 0xB0899851, 0x2D937060, 0x999807C6, + 0x70E84F30, 0xC4E33896, 0x59F9D0A7, 0xEDF2A701, + 0x63CC00C4, 0xD7C77762, 0x4ADD9F53, 0xFED6E8F5, + 0xD93A7E64, 0x6D3109C2, 0xF02BE1F3, 0x44209655, + 0xCA1E3190, 0x7E154636, 0xE30FAE07, 0x5704D9A1, + 0xBE749157, 0x0A7FE6F1, 0x97650EC0, 0x236E7966, + 0xAD50DEA3, 0x195BA905, 0x84414134, 0x304A3692 + }, { + 0x00000000, 0x9E00AACC, 0x7D072542, 0xE3078F8E, + 0xFA0E4A84, 0x640EE048, 0x87096FC6, 0x1909C50A, + 0xB51BE5D3, 0x2B1B4F1F, 0xC81CC091, 0x561C6A5D, + 0x4F15AF57, 0xD115059B, 0x32128A15, 0xAC1220D9, + 0x2B31BB7C, 0xB53111B0, 0x56369E3E, 0xC83634F2, + 0xD13FF1F8, 0x4F3F5B34, 0xAC38D4BA, 0x32387E76, + 0x9E2A5EAF, 0x002AF463, 0xE32D7BED, 0x7D2DD121, + 0x6424142B, 0xFA24BEE7, 0x19233169, 0x87239BA5, + 0x566276F9, 0xC862DC35, 0x2B6553BB, 0xB565F977, + 0xAC6C3C7D, 0x326C96B1, 0xD16B193F, 0x4F6BB3F3, + 0xE379932A, 0x7D7939E6, 0x9E7EB668, 0x007E1CA4, + 0x1977D9AE, 0x87777362, 0x6470FCEC, 0xFA705620, + 0x7D53CD85, 0xE3536749, 0x0054E8C7, 0x9E54420B, + 0x875D8701, 0x195D2DCD, 0xFA5AA243, 0x645A088F, + 0xC8482856, 0x5648829A, 0xB54F0D14, 0x2B4FA7D8, + 0x324662D2, 0xAC46C81E, 0x4F414790, 0xD141ED5C, + 0xEDC29D29, 0x73C237E5, 0x90C5B86B, 0x0EC512A7, + 0x17CCD7AD, 0x89CC7D61, 0x6ACBF2EF, 0xF4CB5823, + 0x58D978FA, 0xC6D9D236, 0x25DE5DB8, 0xBBDEF774, + 0xA2D7327E, 0x3CD798B2, 0xDFD0173C, 0x41D0BDF0, + 0xC6F32655, 0x58F38C99, 0xBBF40317, 0x25F4A9DB, + 0x3CFD6CD1, 0xA2FDC61D, 0x41FA4993, 0xDFFAE35F, + 0x73E8C386, 0xEDE8694A, 0x0EEFE6C4, 0x90EF4C08, + 0x89E68902, 0x17E623CE, 0xF4E1AC40, 0x6AE1068C, + 0xBBA0EBD0, 0x25A0411C, 0xC6A7CE92, 0x58A7645E, + 0x41AEA154, 0xDFAE0B98, 0x3CA98416, 0xA2A92EDA, + 0x0EBB0E03, 0x90BBA4CF, 0x73BC2B41, 0xEDBC818D, + 0xF4B54487, 0x6AB5EE4B, 0x89B261C5, 0x17B2CB09, + 0x909150AC, 0x0E91FA60, 0xED9675EE, 0x7396DF22, + 0x6A9F1A28, 0xF49FB0E4, 0x17983F6A, 0x899895A6, + 0x258AB57F, 0xBB8A1FB3, 0x588D903D, 0xC68D3AF1, + 0xDF84FFFB, 0x41845537, 0xA283DAB9, 0x3C837075, + 0xDA853B53, 0x4485919F, 0xA7821E11, 0x3982B4DD, + 0x208B71D7, 0xBE8BDB1B, 0x5D8C5495, 0xC38CFE59, + 0x6F9EDE80, 0xF19E744C, 0x1299FBC2, 0x8C99510E, + 0x95909404, 0x0B903EC8, 0xE897B146, 0x76971B8A, + 0xF1B4802F, 0x6FB42AE3, 0x8CB3A56D, 0x12B30FA1, + 0x0BBACAAB, 0x95BA6067, 0x76BDEFE9, 0xE8BD4525, + 0x44AF65FC, 0xDAAFCF30, 0x39A840BE, 0xA7A8EA72, + 0xBEA12F78, 0x20A185B4, 0xC3A60A3A, 0x5DA6A0F6, + 0x8CE74DAA, 0x12E7E766, 0xF1E068E8, 0x6FE0C224, + 0x76E9072E, 0xE8E9ADE2, 0x0BEE226C, 0x95EE88A0, + 0x39FCA879, 0xA7FC02B5, 0x44FB8D3B, 0xDAFB27F7, + 0xC3F2E2FD, 0x5DF24831, 0xBEF5C7BF, 0x20F56D73, + 0xA7D6F6D6, 0x39D65C1A, 0xDAD1D394, 0x44D17958, + 0x5DD8BC52, 0xC3D8169E, 0x20DF9910, 0xBEDF33DC, + 0x12CD1305, 0x8CCDB9C9, 0x6FCA3647, 0xF1CA9C8B, + 0xE8C35981, 0x76C3F34D, 0x95C47CC3, 0x0BC4D60F, + 0x3747A67A, 0xA9470CB6, 0x4A408338, 0xD44029F4, + 0xCD49ECFE, 0x53494632, 0xB04EC9BC, 0x2E4E6370, + 0x825C43A9, 0x1C5CE965, 0xFF5B66EB, 0x615BCC27, + 0x7852092D, 0xE652A3E1, 0x05552C6F, 0x9B5586A3, + 0x1C761D06, 0x8276B7CA, 0x61713844, 0xFF719288, + 0xE6785782, 0x7878FD4E, 0x9B7F72C0, 0x057FD80C, + 0xA96DF8D5, 0x376D5219, 0xD46ADD97, 0x4A6A775B, + 0x5363B251, 0xCD63189D, 0x2E649713, 0xB0643DDF, + 0x6125D083, 0xFF257A4F, 0x1C22F5C1, 0x82225F0D, + 0x9B2B9A07, 0x052B30CB, 0xE62CBF45, 0x782C1589, + 0xD43E3550, 0x4A3E9F9C, 0xA9391012, 0x3739BADE, + 0x2E307FD4, 0xB030D518, 0x53375A96, 0xCD37F05A, + 0x4A146BFF, 0xD414C133, 0x37134EBD, 0xA913E471, + 0xB01A217B, 0x2E1A8BB7, 0xCD1D0439, 0x531DAEF5, + 0xFF0F8E2C, 0x610F24E0, 0x8208AB6E, 0x1C0801A2, + 0x0501C4A8, 0x9B016E64, 0x7806E1EA, 0xE6064B26 + } +}; diff --git a/src/liblzma/check/crc32_table_le.h b/src/liblzma/check/crc32_table_le.h new file mode 100644 index 00000000..a96d772a --- /dev/null +++ b/src/liblzma/check/crc32_table_le.h @@ -0,0 +1,527 @@ +/* This file has been automatically generated by crc32_tablegen.c. */ + +#include <inttypes.h> + +const uint32_t lzma_crc32_table[8][256] = { + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }, { + 0x00000000, 0x191B3141, 0x32366282, 0x2B2D53C3, + 0x646CC504, 0x7D77F445, 0x565AA786, 0x4F4196C7, + 0xC8D98A08, 0xD1C2BB49, 0xFAEFE88A, 0xE3F4D9CB, + 0xACB54F0C, 0xB5AE7E4D, 0x9E832D8E, 0x87981CCF, + 0x4AC21251, 0x53D92310, 0x78F470D3, 0x61EF4192, + 0x2EAED755, 0x37B5E614, 0x1C98B5D7, 0x05838496, + 0x821B9859, 0x9B00A918, 0xB02DFADB, 0xA936CB9A, + 0xE6775D5D, 0xFF6C6C1C, 0xD4413FDF, 0xCD5A0E9E, + 0x958424A2, 0x8C9F15E3, 0xA7B24620, 0xBEA97761, + 0xF1E8E1A6, 0xE8F3D0E7, 0xC3DE8324, 0xDAC5B265, + 0x5D5DAEAA, 0x44469FEB, 0x6F6BCC28, 0x7670FD69, + 0x39316BAE, 0x202A5AEF, 0x0B07092C, 0x121C386D, + 0xDF4636F3, 0xC65D07B2, 0xED705471, 0xF46B6530, + 0xBB2AF3F7, 0xA231C2B6, 0x891C9175, 0x9007A034, + 0x179FBCFB, 0x0E848DBA, 0x25A9DE79, 0x3CB2EF38, + 0x73F379FF, 0x6AE848BE, 0x41C51B7D, 0x58DE2A3C, + 0xF0794F05, 0xE9627E44, 0xC24F2D87, 0xDB541CC6, + 0x94158A01, 0x8D0EBB40, 0xA623E883, 0xBF38D9C2, + 0x38A0C50D, 0x21BBF44C, 0x0A96A78F, 0x138D96CE, + 0x5CCC0009, 0x45D73148, 0x6EFA628B, 0x77E153CA, + 0xBABB5D54, 0xA3A06C15, 0x888D3FD6, 0x91960E97, + 0xDED79850, 0xC7CCA911, 0xECE1FAD2, 0xF5FACB93, + 0x7262D75C, 0x6B79E61D, 0x4054B5DE, 0x594F849F, + 0x160E1258, 0x0F152319, 0x243870DA, 0x3D23419B, + 0x65FD6BA7, 0x7CE65AE6, 0x57CB0925, 0x4ED03864, + 0x0191AEA3, 0x188A9FE2, 0x33A7CC21, 0x2ABCFD60, + 0xAD24E1AF, 0xB43FD0EE, 0x9F12832D, 0x8609B26C, + 0xC94824AB, 0xD05315EA, 0xFB7E4629, 0xE2657768, + 0x2F3F79F6, 0x362448B7, 0x1D091B74, 0x04122A35, + 0x4B53BCF2, 0x52488DB3, 0x7965DE70, 0x607EEF31, + 0xE7E6F3FE, 0xFEFDC2BF, 0xD5D0917C, 0xCCCBA03D, + 0x838A36FA, 0x9A9107BB, 0xB1BC5478, 0xA8A76539, + 0x3B83984B, 0x2298A90A, 0x09B5FAC9, 0x10AECB88, + 0x5FEF5D4F, 0x46F46C0E, 0x6DD93FCD, 0x74C20E8C, + 0xF35A1243, 0xEA412302, 0xC16C70C1, 0xD8774180, + 0x9736D747, 0x8E2DE606, 0xA500B5C5, 0xBC1B8484, + 0x71418A1A, 0x685ABB5B, 0x4377E898, 0x5A6CD9D9, + 0x152D4F1E, 0x0C367E5F, 0x271B2D9C, 0x3E001CDD, + 0xB9980012, 0xA0833153, 0x8BAE6290, 0x92B553D1, + 0xDDF4C516, 0xC4EFF457, 0xEFC2A794, 0xF6D996D5, + 0xAE07BCE9, 0xB71C8DA8, 0x9C31DE6B, 0x852AEF2A, + 0xCA6B79ED, 0xD37048AC, 0xF85D1B6F, 0xE1462A2E, + 0x66DE36E1, 0x7FC507A0, 0x54E85463, 0x4DF36522, + 0x02B2F3E5, 0x1BA9C2A4, 0x30849167, 0x299FA026, + 0xE4C5AEB8, 0xFDDE9FF9, 0xD6F3CC3A, 0xCFE8FD7B, + 0x80A96BBC, 0x99B25AFD, 0xB29F093E, 0xAB84387F, + 0x2C1C24B0, 0x350715F1, 0x1E2A4632, 0x07317773, + 0x4870E1B4, 0x516BD0F5, 0x7A468336, 0x635DB277, + 0xCBFAD74E, 0xD2E1E60F, 0xF9CCB5CC, 0xE0D7848D, + 0xAF96124A, 0xB68D230B, 0x9DA070C8, 0x84BB4189, + 0x03235D46, 0x1A386C07, 0x31153FC4, 0x280E0E85, + 0x674F9842, 0x7E54A903, 0x5579FAC0, 0x4C62CB81, + 0x8138C51F, 0x9823F45E, 0xB30EA79D, 0xAA1596DC, + 0xE554001B, 0xFC4F315A, 0xD7626299, 0xCE7953D8, + 0x49E14F17, 0x50FA7E56, 0x7BD72D95, 0x62CC1CD4, + 0x2D8D8A13, 0x3496BB52, 0x1FBBE891, 0x06A0D9D0, + 0x5E7EF3EC, 0x4765C2AD, 0x6C48916E, 0x7553A02F, + 0x3A1236E8, 0x230907A9, 0x0824546A, 0x113F652B, + 0x96A779E4, 0x8FBC48A5, 0xA4911B66, 0xBD8A2A27, + 0xF2CBBCE0, 0xEBD08DA1, 0xC0FDDE62, 0xD9E6EF23, + 0x14BCE1BD, 0x0DA7D0FC, 0x268A833F, 0x3F91B27E, + 0x70D024B9, 0x69CB15F8, 0x42E6463B, 0x5BFD777A, + 0xDC656BB5, 0xC57E5AF4, 0xEE530937, 0xF7483876, + 0xB809AEB1, 0xA1129FF0, 0x8A3FCC33, 0x9324FD72 + }, { + 0x00000000, 0x01C26A37, 0x0384D46E, 0x0246BE59, + 0x0709A8DC, 0x06CBC2EB, 0x048D7CB2, 0x054F1685, + 0x0E1351B8, 0x0FD13B8F, 0x0D9785D6, 0x0C55EFE1, + 0x091AF964, 0x08D89353, 0x0A9E2D0A, 0x0B5C473D, + 0x1C26A370, 0x1DE4C947, 0x1FA2771E, 0x1E601D29, + 0x1B2F0BAC, 0x1AED619B, 0x18ABDFC2, 0x1969B5F5, + 0x1235F2C8, 0x13F798FF, 0x11B126A6, 0x10734C91, + 0x153C5A14, 0x14FE3023, 0x16B88E7A, 0x177AE44D, + 0x384D46E0, 0x398F2CD7, 0x3BC9928E, 0x3A0BF8B9, + 0x3F44EE3C, 0x3E86840B, 0x3CC03A52, 0x3D025065, + 0x365E1758, 0x379C7D6F, 0x35DAC336, 0x3418A901, + 0x3157BF84, 0x3095D5B3, 0x32D36BEA, 0x331101DD, + 0x246BE590, 0x25A98FA7, 0x27EF31FE, 0x262D5BC9, + 0x23624D4C, 0x22A0277B, 0x20E69922, 0x2124F315, + 0x2A78B428, 0x2BBADE1F, 0x29FC6046, 0x283E0A71, + 0x2D711CF4, 0x2CB376C3, 0x2EF5C89A, 0x2F37A2AD, + 0x709A8DC0, 0x7158E7F7, 0x731E59AE, 0x72DC3399, + 0x7793251C, 0x76514F2B, 0x7417F172, 0x75D59B45, + 0x7E89DC78, 0x7F4BB64F, 0x7D0D0816, 0x7CCF6221, + 0x798074A4, 0x78421E93, 0x7A04A0CA, 0x7BC6CAFD, + 0x6CBC2EB0, 0x6D7E4487, 0x6F38FADE, 0x6EFA90E9, + 0x6BB5866C, 0x6A77EC5B, 0x68315202, 0x69F33835, + 0x62AF7F08, 0x636D153F, 0x612BAB66, 0x60E9C151, + 0x65A6D7D4, 0x6464BDE3, 0x662203BA, 0x67E0698D, + 0x48D7CB20, 0x4915A117, 0x4B531F4E, 0x4A917579, + 0x4FDE63FC, 0x4E1C09CB, 0x4C5AB792, 0x4D98DDA5, + 0x46C49A98, 0x4706F0AF, 0x45404EF6, 0x448224C1, + 0x41CD3244, 0x400F5873, 0x4249E62A, 0x438B8C1D, + 0x54F16850, 0x55330267, 0x5775BC3E, 0x56B7D609, + 0x53F8C08C, 0x523AAABB, 0x507C14E2, 0x51BE7ED5, + 0x5AE239E8, 0x5B2053DF, 0x5966ED86, 0x58A487B1, + 0x5DEB9134, 0x5C29FB03, 0x5E6F455A, 0x5FAD2F6D, + 0xE1351B80, 0xE0F771B7, 0xE2B1CFEE, 0xE373A5D9, + 0xE63CB35C, 0xE7FED96B, 0xE5B86732, 0xE47A0D05, + 0xEF264A38, 0xEEE4200F, 0xECA29E56, 0xED60F461, + 0xE82FE2E4, 0xE9ED88D3, 0xEBAB368A, 0xEA695CBD, + 0xFD13B8F0, 0xFCD1D2C7, 0xFE976C9E, 0xFF5506A9, + 0xFA1A102C, 0xFBD87A1B, 0xF99EC442, 0xF85CAE75, + 0xF300E948, 0xF2C2837F, 0xF0843D26, 0xF1465711, + 0xF4094194, 0xF5CB2BA3, 0xF78D95FA, 0xF64FFFCD, + 0xD9785D60, 0xD8BA3757, 0xDAFC890E, 0xDB3EE339, + 0xDE71F5BC, 0xDFB39F8B, 0xDDF521D2, 0xDC374BE5, + 0xD76B0CD8, 0xD6A966EF, 0xD4EFD8B6, 0xD52DB281, + 0xD062A404, 0xD1A0CE33, 0xD3E6706A, 0xD2241A5D, + 0xC55EFE10, 0xC49C9427, 0xC6DA2A7E, 0xC7184049, + 0xC25756CC, 0xC3953CFB, 0xC1D382A2, 0xC011E895, + 0xCB4DAFA8, 0xCA8FC59F, 0xC8C97BC6, 0xC90B11F1, + 0xCC440774, 0xCD866D43, 0xCFC0D31A, 0xCE02B92D, + 0x91AF9640, 0x906DFC77, 0x922B422E, 0x93E92819, + 0x96A63E9C, 0x976454AB, 0x9522EAF2, 0x94E080C5, + 0x9FBCC7F8, 0x9E7EADCF, 0x9C381396, 0x9DFA79A1, + 0x98B56F24, 0x99770513, 0x9B31BB4A, 0x9AF3D17D, + 0x8D893530, 0x8C4B5F07, 0x8E0DE15E, 0x8FCF8B69, + 0x8A809DEC, 0x8B42F7DB, 0x89044982, 0x88C623B5, + 0x839A6488, 0x82580EBF, 0x801EB0E6, 0x81DCDAD1, + 0x8493CC54, 0x8551A663, 0x8717183A, 0x86D5720D, + 0xA9E2D0A0, 0xA820BA97, 0xAA6604CE, 0xABA46EF9, + 0xAEEB787C, 0xAF29124B, 0xAD6FAC12, 0xACADC625, + 0xA7F18118, 0xA633EB2F, 0xA4755576, 0xA5B73F41, + 0xA0F829C4, 0xA13A43F3, 0xA37CFDAA, 0xA2BE979D, + 0xB5C473D0, 0xB40619E7, 0xB640A7BE, 0xB782CD89, + 0xB2CDDB0C, 0xB30FB13B, 0xB1490F62, 0xB08B6555, + 0xBBD72268, 0xBA15485F, 0xB853F606, 0xB9919C31, + 0xBCDE8AB4, 0xBD1CE083, 0xBF5A5EDA, 0xBE9834ED + }, { + 0x00000000, 0xB8BC6765, 0xAA09C88B, 0x12B5AFEE, + 0x8F629757, 0x37DEF032, 0x256B5FDC, 0x9DD738B9, + 0xC5B428EF, 0x7D084F8A, 0x6FBDE064, 0xD7018701, + 0x4AD6BFB8, 0xF26AD8DD, 0xE0DF7733, 0x58631056, + 0x5019579F, 0xE8A530FA, 0xFA109F14, 0x42ACF871, + 0xDF7BC0C8, 0x67C7A7AD, 0x75720843, 0xCDCE6F26, + 0x95AD7F70, 0x2D111815, 0x3FA4B7FB, 0x8718D09E, + 0x1ACFE827, 0xA2738F42, 0xB0C620AC, 0x087A47C9, + 0xA032AF3E, 0x188EC85B, 0x0A3B67B5, 0xB28700D0, + 0x2F503869, 0x97EC5F0C, 0x8559F0E2, 0x3DE59787, + 0x658687D1, 0xDD3AE0B4, 0xCF8F4F5A, 0x7733283F, + 0xEAE41086, 0x525877E3, 0x40EDD80D, 0xF851BF68, + 0xF02BF8A1, 0x48979FC4, 0x5A22302A, 0xE29E574F, + 0x7F496FF6, 0xC7F50893, 0xD540A77D, 0x6DFCC018, + 0x359FD04E, 0x8D23B72B, 0x9F9618C5, 0x272A7FA0, + 0xBAFD4719, 0x0241207C, 0x10F48F92, 0xA848E8F7, + 0x9B14583D, 0x23A83F58, 0x311D90B6, 0x89A1F7D3, + 0x1476CF6A, 0xACCAA80F, 0xBE7F07E1, 0x06C36084, + 0x5EA070D2, 0xE61C17B7, 0xF4A9B859, 0x4C15DF3C, + 0xD1C2E785, 0x697E80E0, 0x7BCB2F0E, 0xC377486B, + 0xCB0D0FA2, 0x73B168C7, 0x6104C729, 0xD9B8A04C, + 0x446F98F5, 0xFCD3FF90, 0xEE66507E, 0x56DA371B, + 0x0EB9274D, 0xB6054028, 0xA4B0EFC6, 0x1C0C88A3, + 0x81DBB01A, 0x3967D77F, 0x2BD27891, 0x936E1FF4, + 0x3B26F703, 0x839A9066, 0x912F3F88, 0x299358ED, + 0xB4446054, 0x0CF80731, 0x1E4DA8DF, 0xA6F1CFBA, + 0xFE92DFEC, 0x462EB889, 0x549B1767, 0xEC277002, + 0x71F048BB, 0xC94C2FDE, 0xDBF98030, 0x6345E755, + 0x6B3FA09C, 0xD383C7F9, 0xC1366817, 0x798A0F72, + 0xE45D37CB, 0x5CE150AE, 0x4E54FF40, 0xF6E89825, + 0xAE8B8873, 0x1637EF16, 0x048240F8, 0xBC3E279D, + 0x21E91F24, 0x99557841, 0x8BE0D7AF, 0x335CB0CA, + 0xED59B63B, 0x55E5D15E, 0x47507EB0, 0xFFEC19D5, + 0x623B216C, 0xDA874609, 0xC832E9E7, 0x708E8E82, + 0x28ED9ED4, 0x9051F9B1, 0x82E4565F, 0x3A58313A, + 0xA78F0983, 0x1F336EE6, 0x0D86C108, 0xB53AA66D, + 0xBD40E1A4, 0x05FC86C1, 0x1749292F, 0xAFF54E4A, + 0x322276F3, 0x8A9E1196, 0x982BBE78, 0x2097D91D, + 0x78F4C94B, 0xC048AE2E, 0xD2FD01C0, 0x6A4166A5, + 0xF7965E1C, 0x4F2A3979, 0x5D9F9697, 0xE523F1F2, + 0x4D6B1905, 0xF5D77E60, 0xE762D18E, 0x5FDEB6EB, + 0xC2098E52, 0x7AB5E937, 0x680046D9, 0xD0BC21BC, + 0x88DF31EA, 0x3063568F, 0x22D6F961, 0x9A6A9E04, + 0x07BDA6BD, 0xBF01C1D8, 0xADB46E36, 0x15080953, + 0x1D724E9A, 0xA5CE29FF, 0xB77B8611, 0x0FC7E174, + 0x9210D9CD, 0x2AACBEA8, 0x38191146, 0x80A57623, + 0xD8C66675, 0x607A0110, 0x72CFAEFE, 0xCA73C99B, + 0x57A4F122, 0xEF189647, 0xFDAD39A9, 0x45115ECC, + 0x764DEE06, 0xCEF18963, 0xDC44268D, 0x64F841E8, + 0xF92F7951, 0x41931E34, 0x5326B1DA, 0xEB9AD6BF, + 0xB3F9C6E9, 0x0B45A18C, 0x19F00E62, 0xA14C6907, + 0x3C9B51BE, 0x842736DB, 0x96929935, 0x2E2EFE50, + 0x2654B999, 0x9EE8DEFC, 0x8C5D7112, 0x34E11677, + 0xA9362ECE, 0x118A49AB, 0x033FE645, 0xBB838120, + 0xE3E09176, 0x5B5CF613, 0x49E959FD, 0xF1553E98, + 0x6C820621, 0xD43E6144, 0xC68BCEAA, 0x7E37A9CF, + 0xD67F4138, 0x6EC3265D, 0x7C7689B3, 0xC4CAEED6, + 0x591DD66F, 0xE1A1B10A, 0xF3141EE4, 0x4BA87981, + 0x13CB69D7, 0xAB770EB2, 0xB9C2A15C, 0x017EC639, + 0x9CA9FE80, 0x241599E5, 0x36A0360B, 0x8E1C516E, + 0x866616A7, 0x3EDA71C2, 0x2C6FDE2C, 0x94D3B949, + 0x090481F0, 0xB1B8E695, 0xA30D497B, 0x1BB12E1E, + 0x43D23E48, 0xFB6E592D, 0xE9DBF6C3, 0x516791A6, + 0xCCB0A91F, 0x740CCE7A, 0x66B96194, 0xDE0506F1 + }, { + 0x00000000, 0x3D6029B0, 0x7AC05360, 0x47A07AD0, + 0xF580A6C0, 0xC8E08F70, 0x8F40F5A0, 0xB220DC10, + 0x30704BC1, 0x0D106271, 0x4AB018A1, 0x77D03111, + 0xC5F0ED01, 0xF890C4B1, 0xBF30BE61, 0x825097D1, + 0x60E09782, 0x5D80BE32, 0x1A20C4E2, 0x2740ED52, + 0x95603142, 0xA80018F2, 0xEFA06222, 0xD2C04B92, + 0x5090DC43, 0x6DF0F5F3, 0x2A508F23, 0x1730A693, + 0xA5107A83, 0x98705333, 0xDFD029E3, 0xE2B00053, + 0xC1C12F04, 0xFCA106B4, 0xBB017C64, 0x866155D4, + 0x344189C4, 0x0921A074, 0x4E81DAA4, 0x73E1F314, + 0xF1B164C5, 0xCCD14D75, 0x8B7137A5, 0xB6111E15, + 0x0431C205, 0x3951EBB5, 0x7EF19165, 0x4391B8D5, + 0xA121B886, 0x9C419136, 0xDBE1EBE6, 0xE681C256, + 0x54A11E46, 0x69C137F6, 0x2E614D26, 0x13016496, + 0x9151F347, 0xAC31DAF7, 0xEB91A027, 0xD6F18997, + 0x64D15587, 0x59B17C37, 0x1E1106E7, 0x23712F57, + 0x58F35849, 0x659371F9, 0x22330B29, 0x1F532299, + 0xAD73FE89, 0x9013D739, 0xD7B3ADE9, 0xEAD38459, + 0x68831388, 0x55E33A38, 0x124340E8, 0x2F236958, + 0x9D03B548, 0xA0639CF8, 0xE7C3E628, 0xDAA3CF98, + 0x3813CFCB, 0x0573E67B, 0x42D39CAB, 0x7FB3B51B, + 0xCD93690B, 0xF0F340BB, 0xB7533A6B, 0x8A3313DB, + 0x0863840A, 0x3503ADBA, 0x72A3D76A, 0x4FC3FEDA, + 0xFDE322CA, 0xC0830B7A, 0x872371AA, 0xBA43581A, + 0x9932774D, 0xA4525EFD, 0xE3F2242D, 0xDE920D9D, + 0x6CB2D18D, 0x51D2F83D, 0x167282ED, 0x2B12AB5D, + 0xA9423C8C, 0x9422153C, 0xD3826FEC, 0xEEE2465C, + 0x5CC29A4C, 0x61A2B3FC, 0x2602C92C, 0x1B62E09C, + 0xF9D2E0CF, 0xC4B2C97F, 0x8312B3AF, 0xBE729A1F, + 0x0C52460F, 0x31326FBF, 0x7692156F, 0x4BF23CDF, + 0xC9A2AB0E, 0xF4C282BE, 0xB362F86E, 0x8E02D1DE, + 0x3C220DCE, 0x0142247E, 0x46E25EAE, 0x7B82771E, + 0xB1E6B092, 0x8C869922, 0xCB26E3F2, 0xF646CA42, + 0x44661652, 0x79063FE2, 0x3EA64532, 0x03C66C82, + 0x8196FB53, 0xBCF6D2E3, 0xFB56A833, 0xC6368183, + 0x74165D93, 0x49767423, 0x0ED60EF3, 0x33B62743, + 0xD1062710, 0xEC660EA0, 0xABC67470, 0x96A65DC0, + 0x248681D0, 0x19E6A860, 0x5E46D2B0, 0x6326FB00, + 0xE1766CD1, 0xDC164561, 0x9BB63FB1, 0xA6D61601, + 0x14F6CA11, 0x2996E3A1, 0x6E369971, 0x5356B0C1, + 0x70279F96, 0x4D47B626, 0x0AE7CCF6, 0x3787E546, + 0x85A73956, 0xB8C710E6, 0xFF676A36, 0xC2074386, + 0x4057D457, 0x7D37FDE7, 0x3A978737, 0x07F7AE87, + 0xB5D77297, 0x88B75B27, 0xCF1721F7, 0xF2770847, + 0x10C70814, 0x2DA721A4, 0x6A075B74, 0x576772C4, + 0xE547AED4, 0xD8278764, 0x9F87FDB4, 0xA2E7D404, + 0x20B743D5, 0x1DD76A65, 0x5A7710B5, 0x67173905, + 0xD537E515, 0xE857CCA5, 0xAFF7B675, 0x92979FC5, + 0xE915E8DB, 0xD475C16B, 0x93D5BBBB, 0xAEB5920B, + 0x1C954E1B, 0x21F567AB, 0x66551D7B, 0x5B3534CB, + 0xD965A31A, 0xE4058AAA, 0xA3A5F07A, 0x9EC5D9CA, + 0x2CE505DA, 0x11852C6A, 0x562556BA, 0x6B457F0A, + 0x89F57F59, 0xB49556E9, 0xF3352C39, 0xCE550589, + 0x7C75D999, 0x4115F029, 0x06B58AF9, 0x3BD5A349, + 0xB9853498, 0x84E51D28, 0xC34567F8, 0xFE254E48, + 0x4C059258, 0x7165BBE8, 0x36C5C138, 0x0BA5E888, + 0x28D4C7DF, 0x15B4EE6F, 0x521494BF, 0x6F74BD0F, + 0xDD54611F, 0xE03448AF, 0xA794327F, 0x9AF41BCF, + 0x18A48C1E, 0x25C4A5AE, 0x6264DF7E, 0x5F04F6CE, + 0xED242ADE, 0xD044036E, 0x97E479BE, 0xAA84500E, + 0x4834505D, 0x755479ED, 0x32F4033D, 0x0F942A8D, + 0xBDB4F69D, 0x80D4DF2D, 0xC774A5FD, 0xFA148C4D, + 0x78441B9C, 0x4524322C, 0x028448FC, 0x3FE4614C, + 0x8DC4BD5C, 0xB0A494EC, 0xF704EE3C, 0xCA64C78C + }, { + 0x00000000, 0xCB5CD3A5, 0x4DC8A10B, 0x869472AE, + 0x9B914216, 0x50CD91B3, 0xD659E31D, 0x1D0530B8, + 0xEC53826D, 0x270F51C8, 0xA19B2366, 0x6AC7F0C3, + 0x77C2C07B, 0xBC9E13DE, 0x3A0A6170, 0xF156B2D5, + 0x03D6029B, 0xC88AD13E, 0x4E1EA390, 0x85427035, + 0x9847408D, 0x531B9328, 0xD58FE186, 0x1ED33223, + 0xEF8580F6, 0x24D95353, 0xA24D21FD, 0x6911F258, + 0x7414C2E0, 0xBF481145, 0x39DC63EB, 0xF280B04E, + 0x07AC0536, 0xCCF0D693, 0x4A64A43D, 0x81387798, + 0x9C3D4720, 0x57619485, 0xD1F5E62B, 0x1AA9358E, + 0xEBFF875B, 0x20A354FE, 0xA6372650, 0x6D6BF5F5, + 0x706EC54D, 0xBB3216E8, 0x3DA66446, 0xF6FAB7E3, + 0x047A07AD, 0xCF26D408, 0x49B2A6A6, 0x82EE7503, + 0x9FEB45BB, 0x54B7961E, 0xD223E4B0, 0x197F3715, + 0xE82985C0, 0x23755665, 0xA5E124CB, 0x6EBDF76E, + 0x73B8C7D6, 0xB8E41473, 0x3E7066DD, 0xF52CB578, + 0x0F580A6C, 0xC404D9C9, 0x4290AB67, 0x89CC78C2, + 0x94C9487A, 0x5F959BDF, 0xD901E971, 0x125D3AD4, + 0xE30B8801, 0x28575BA4, 0xAEC3290A, 0x659FFAAF, + 0x789ACA17, 0xB3C619B2, 0x35526B1C, 0xFE0EB8B9, + 0x0C8E08F7, 0xC7D2DB52, 0x4146A9FC, 0x8A1A7A59, + 0x971F4AE1, 0x5C439944, 0xDAD7EBEA, 0x118B384F, + 0xE0DD8A9A, 0x2B81593F, 0xAD152B91, 0x6649F834, + 0x7B4CC88C, 0xB0101B29, 0x36846987, 0xFDD8BA22, + 0x08F40F5A, 0xC3A8DCFF, 0x453CAE51, 0x8E607DF4, + 0x93654D4C, 0x58399EE9, 0xDEADEC47, 0x15F13FE2, + 0xE4A78D37, 0x2FFB5E92, 0xA96F2C3C, 0x6233FF99, + 0x7F36CF21, 0xB46A1C84, 0x32FE6E2A, 0xF9A2BD8F, + 0x0B220DC1, 0xC07EDE64, 0x46EAACCA, 0x8DB67F6F, + 0x90B34FD7, 0x5BEF9C72, 0xDD7BEEDC, 0x16273D79, + 0xE7718FAC, 0x2C2D5C09, 0xAAB92EA7, 0x61E5FD02, + 0x7CE0CDBA, 0xB7BC1E1F, 0x31286CB1, 0xFA74BF14, + 0x1EB014D8, 0xD5ECC77D, 0x5378B5D3, 0x98246676, + 0x852156CE, 0x4E7D856B, 0xC8E9F7C5, 0x03B52460, + 0xF2E396B5, 0x39BF4510, 0xBF2B37BE, 0x7477E41B, + 0x6972D4A3, 0xA22E0706, 0x24BA75A8, 0xEFE6A60D, + 0x1D661643, 0xD63AC5E6, 0x50AEB748, 0x9BF264ED, + 0x86F75455, 0x4DAB87F0, 0xCB3FF55E, 0x006326FB, + 0xF135942E, 0x3A69478B, 0xBCFD3525, 0x77A1E680, + 0x6AA4D638, 0xA1F8059D, 0x276C7733, 0xEC30A496, + 0x191C11EE, 0xD240C24B, 0x54D4B0E5, 0x9F886340, + 0x828D53F8, 0x49D1805D, 0xCF45F2F3, 0x04192156, + 0xF54F9383, 0x3E134026, 0xB8873288, 0x73DBE12D, + 0x6EDED195, 0xA5820230, 0x2316709E, 0xE84AA33B, + 0x1ACA1375, 0xD196C0D0, 0x5702B27E, 0x9C5E61DB, + 0x815B5163, 0x4A0782C6, 0xCC93F068, 0x07CF23CD, + 0xF6999118, 0x3DC542BD, 0xBB513013, 0x700DE3B6, + 0x6D08D30E, 0xA65400AB, 0x20C07205, 0xEB9CA1A0, + 0x11E81EB4, 0xDAB4CD11, 0x5C20BFBF, 0x977C6C1A, + 0x8A795CA2, 0x41258F07, 0xC7B1FDA9, 0x0CED2E0C, + 0xFDBB9CD9, 0x36E74F7C, 0xB0733DD2, 0x7B2FEE77, + 0x662ADECF, 0xAD760D6A, 0x2BE27FC4, 0xE0BEAC61, + 0x123E1C2F, 0xD962CF8A, 0x5FF6BD24, 0x94AA6E81, + 0x89AF5E39, 0x42F38D9C, 0xC467FF32, 0x0F3B2C97, + 0xFE6D9E42, 0x35314DE7, 0xB3A53F49, 0x78F9ECEC, + 0x65FCDC54, 0xAEA00FF1, 0x28347D5F, 0xE368AEFA, + 0x16441B82, 0xDD18C827, 0x5B8CBA89, 0x90D0692C, + 0x8DD55994, 0x46898A31, 0xC01DF89F, 0x0B412B3A, + 0xFA1799EF, 0x314B4A4A, 0xB7DF38E4, 0x7C83EB41, + 0x6186DBF9, 0xAADA085C, 0x2C4E7AF2, 0xE712A957, + 0x15921919, 0xDECECABC, 0x585AB812, 0x93066BB7, + 0x8E035B0F, 0x455F88AA, 0xC3CBFA04, 0x089729A1, + 0xF9C19B74, 0x329D48D1, 0xB4093A7F, 0x7F55E9DA, + 0x6250D962, 0xA90C0AC7, 0x2F987869, 0xE4C4ABCC + }, { + 0x00000000, 0xA6770BB4, 0x979F1129, 0x31E81A9D, + 0xF44F2413, 0x52382FA7, 0x63D0353A, 0xC5A73E8E, + 0x33EF4E67, 0x959845D3, 0xA4705F4E, 0x020754FA, + 0xC7A06A74, 0x61D761C0, 0x503F7B5D, 0xF64870E9, + 0x67DE9CCE, 0xC1A9977A, 0xF0418DE7, 0x56368653, + 0x9391B8DD, 0x35E6B369, 0x040EA9F4, 0xA279A240, + 0x5431D2A9, 0xF246D91D, 0xC3AEC380, 0x65D9C834, + 0xA07EF6BA, 0x0609FD0E, 0x37E1E793, 0x9196EC27, + 0xCFBD399C, 0x69CA3228, 0x582228B5, 0xFE552301, + 0x3BF21D8F, 0x9D85163B, 0xAC6D0CA6, 0x0A1A0712, + 0xFC5277FB, 0x5A257C4F, 0x6BCD66D2, 0xCDBA6D66, + 0x081D53E8, 0xAE6A585C, 0x9F8242C1, 0x39F54975, + 0xA863A552, 0x0E14AEE6, 0x3FFCB47B, 0x998BBFCF, + 0x5C2C8141, 0xFA5B8AF5, 0xCBB39068, 0x6DC49BDC, + 0x9B8CEB35, 0x3DFBE081, 0x0C13FA1C, 0xAA64F1A8, + 0x6FC3CF26, 0xC9B4C492, 0xF85CDE0F, 0x5E2BD5BB, + 0x440B7579, 0xE27C7ECD, 0xD3946450, 0x75E36FE4, + 0xB044516A, 0x16335ADE, 0x27DB4043, 0x81AC4BF7, + 0x77E43B1E, 0xD19330AA, 0xE07B2A37, 0x460C2183, + 0x83AB1F0D, 0x25DC14B9, 0x14340E24, 0xB2430590, + 0x23D5E9B7, 0x85A2E203, 0xB44AF89E, 0x123DF32A, + 0xD79ACDA4, 0x71EDC610, 0x4005DC8D, 0xE672D739, + 0x103AA7D0, 0xB64DAC64, 0x87A5B6F9, 0x21D2BD4D, + 0xE47583C3, 0x42028877, 0x73EA92EA, 0xD59D995E, + 0x8BB64CE5, 0x2DC14751, 0x1C295DCC, 0xBA5E5678, + 0x7FF968F6, 0xD98E6342, 0xE86679DF, 0x4E11726B, + 0xB8590282, 0x1E2E0936, 0x2FC613AB, 0x89B1181F, + 0x4C162691, 0xEA612D25, 0xDB8937B8, 0x7DFE3C0C, + 0xEC68D02B, 0x4A1FDB9F, 0x7BF7C102, 0xDD80CAB6, + 0x1827F438, 0xBE50FF8C, 0x8FB8E511, 0x29CFEEA5, + 0xDF879E4C, 0x79F095F8, 0x48188F65, 0xEE6F84D1, + 0x2BC8BA5F, 0x8DBFB1EB, 0xBC57AB76, 0x1A20A0C2, + 0x8816EAF2, 0x2E61E146, 0x1F89FBDB, 0xB9FEF06F, + 0x7C59CEE1, 0xDA2EC555, 0xEBC6DFC8, 0x4DB1D47C, + 0xBBF9A495, 0x1D8EAF21, 0x2C66B5BC, 0x8A11BE08, + 0x4FB68086, 0xE9C18B32, 0xD82991AF, 0x7E5E9A1B, + 0xEFC8763C, 0x49BF7D88, 0x78576715, 0xDE206CA1, + 0x1B87522F, 0xBDF0599B, 0x8C184306, 0x2A6F48B2, + 0xDC27385B, 0x7A5033EF, 0x4BB82972, 0xEDCF22C6, + 0x28681C48, 0x8E1F17FC, 0xBFF70D61, 0x198006D5, + 0x47ABD36E, 0xE1DCD8DA, 0xD034C247, 0x7643C9F3, + 0xB3E4F77D, 0x1593FCC9, 0x247BE654, 0x820CEDE0, + 0x74449D09, 0xD23396BD, 0xE3DB8C20, 0x45AC8794, + 0x800BB91A, 0x267CB2AE, 0x1794A833, 0xB1E3A387, + 0x20754FA0, 0x86024414, 0xB7EA5E89, 0x119D553D, + 0xD43A6BB3, 0x724D6007, 0x43A57A9A, 0xE5D2712E, + 0x139A01C7, 0xB5ED0A73, 0x840510EE, 0x22721B5A, + 0xE7D525D4, 0x41A22E60, 0x704A34FD, 0xD63D3F49, + 0xCC1D9F8B, 0x6A6A943F, 0x5B828EA2, 0xFDF58516, + 0x3852BB98, 0x9E25B02C, 0xAFCDAAB1, 0x09BAA105, + 0xFFF2D1EC, 0x5985DA58, 0x686DC0C5, 0xCE1ACB71, + 0x0BBDF5FF, 0xADCAFE4B, 0x9C22E4D6, 0x3A55EF62, + 0xABC30345, 0x0DB408F1, 0x3C5C126C, 0x9A2B19D8, + 0x5F8C2756, 0xF9FB2CE2, 0xC813367F, 0x6E643DCB, + 0x982C4D22, 0x3E5B4696, 0x0FB35C0B, 0xA9C457BF, + 0x6C636931, 0xCA146285, 0xFBFC7818, 0x5D8B73AC, + 0x03A0A617, 0xA5D7ADA3, 0x943FB73E, 0x3248BC8A, + 0xF7EF8204, 0x519889B0, 0x6070932D, 0xC6079899, + 0x304FE870, 0x9638E3C4, 0xA7D0F959, 0x01A7F2ED, + 0xC400CC63, 0x6277C7D7, 0x539FDD4A, 0xF5E8D6FE, + 0x647E3AD9, 0xC209316D, 0xF3E12BF0, 0x55962044, + 0x90311ECA, 0x3646157E, 0x07AE0FE3, 0xA1D90457, + 0x579174BE, 0xF1E67F0A, 0xC00E6597, 0x66796E23, + 0xA3DE50AD, 0x05A95B19, 0x34414184, 0x92364A30 + }, { + 0x00000000, 0xCCAA009E, 0x4225077D, 0x8E8F07E3, + 0x844A0EFA, 0x48E00E64, 0xC66F0987, 0x0AC50919, + 0xD3E51BB5, 0x1F4F1B2B, 0x91C01CC8, 0x5D6A1C56, + 0x57AF154F, 0x9B0515D1, 0x158A1232, 0xD92012AC, + 0x7CBB312B, 0xB01131B5, 0x3E9E3656, 0xF23436C8, + 0xF8F13FD1, 0x345B3F4F, 0xBAD438AC, 0x767E3832, + 0xAF5E2A9E, 0x63F42A00, 0xED7B2DE3, 0x21D12D7D, + 0x2B142464, 0xE7BE24FA, 0x69312319, 0xA59B2387, + 0xF9766256, 0x35DC62C8, 0xBB53652B, 0x77F965B5, + 0x7D3C6CAC, 0xB1966C32, 0x3F196BD1, 0xF3B36B4F, + 0x2A9379E3, 0xE639797D, 0x68B67E9E, 0xA41C7E00, + 0xAED97719, 0x62737787, 0xECFC7064, 0x205670FA, + 0x85CD537D, 0x496753E3, 0xC7E85400, 0x0B42549E, + 0x01875D87, 0xCD2D5D19, 0x43A25AFA, 0x8F085A64, + 0x562848C8, 0x9A824856, 0x140D4FB5, 0xD8A74F2B, + 0xD2624632, 0x1EC846AC, 0x9047414F, 0x5CED41D1, + 0x299DC2ED, 0xE537C273, 0x6BB8C590, 0xA712C50E, + 0xADD7CC17, 0x617DCC89, 0xEFF2CB6A, 0x2358CBF4, + 0xFA78D958, 0x36D2D9C6, 0xB85DDE25, 0x74F7DEBB, + 0x7E32D7A2, 0xB298D73C, 0x3C17D0DF, 0xF0BDD041, + 0x5526F3C6, 0x998CF358, 0x1703F4BB, 0xDBA9F425, + 0xD16CFD3C, 0x1DC6FDA2, 0x9349FA41, 0x5FE3FADF, + 0x86C3E873, 0x4A69E8ED, 0xC4E6EF0E, 0x084CEF90, + 0x0289E689, 0xCE23E617, 0x40ACE1F4, 0x8C06E16A, + 0xD0EBA0BB, 0x1C41A025, 0x92CEA7C6, 0x5E64A758, + 0x54A1AE41, 0x980BAEDF, 0x1684A93C, 0xDA2EA9A2, + 0x030EBB0E, 0xCFA4BB90, 0x412BBC73, 0x8D81BCED, + 0x8744B5F4, 0x4BEEB56A, 0xC561B289, 0x09CBB217, + 0xAC509190, 0x60FA910E, 0xEE7596ED, 0x22DF9673, + 0x281A9F6A, 0xE4B09FF4, 0x6A3F9817, 0xA6959889, + 0x7FB58A25, 0xB31F8ABB, 0x3D908D58, 0xF13A8DC6, + 0xFBFF84DF, 0x37558441, 0xB9DA83A2, 0x7570833C, + 0x533B85DA, 0x9F918544, 0x111E82A7, 0xDDB48239, + 0xD7718B20, 0x1BDB8BBE, 0x95548C5D, 0x59FE8CC3, + 0x80DE9E6F, 0x4C749EF1, 0xC2FB9912, 0x0E51998C, + 0x04949095, 0xC83E900B, 0x46B197E8, 0x8A1B9776, + 0x2F80B4F1, 0xE32AB46F, 0x6DA5B38C, 0xA10FB312, + 0xABCABA0B, 0x6760BA95, 0xE9EFBD76, 0x2545BDE8, + 0xFC65AF44, 0x30CFAFDA, 0xBE40A839, 0x72EAA8A7, + 0x782FA1BE, 0xB485A120, 0x3A0AA6C3, 0xF6A0A65D, + 0xAA4DE78C, 0x66E7E712, 0xE868E0F1, 0x24C2E06F, + 0x2E07E976, 0xE2ADE9E8, 0x6C22EE0B, 0xA088EE95, + 0x79A8FC39, 0xB502FCA7, 0x3B8DFB44, 0xF727FBDA, + 0xFDE2F2C3, 0x3148F25D, 0xBFC7F5BE, 0x736DF520, + 0xD6F6D6A7, 0x1A5CD639, 0x94D3D1DA, 0x5879D144, + 0x52BCD85D, 0x9E16D8C3, 0x1099DF20, 0xDC33DFBE, + 0x0513CD12, 0xC9B9CD8C, 0x4736CA6F, 0x8B9CCAF1, + 0x8159C3E8, 0x4DF3C376, 0xC37CC495, 0x0FD6C40B, + 0x7AA64737, 0xB60C47A9, 0x3883404A, 0xF42940D4, + 0xFEEC49CD, 0x32464953, 0xBCC94EB0, 0x70634E2E, + 0xA9435C82, 0x65E95C1C, 0xEB665BFF, 0x27CC5B61, + 0x2D095278, 0xE1A352E6, 0x6F2C5505, 0xA386559B, + 0x061D761C, 0xCAB77682, 0x44387161, 0x889271FF, + 0x825778E6, 0x4EFD7878, 0xC0727F9B, 0x0CD87F05, + 0xD5F86DA9, 0x19526D37, 0x97DD6AD4, 0x5B776A4A, + 0x51B26353, 0x9D1863CD, 0x1397642E, 0xDF3D64B0, + 0x83D02561, 0x4F7A25FF, 0xC1F5221C, 0x0D5F2282, + 0x079A2B9B, 0xCB302B05, 0x45BF2CE6, 0x89152C78, + 0x50353ED4, 0x9C9F3E4A, 0x121039A9, 0xDEBA3937, + 0xD47F302E, 0x18D530B0, 0x965A3753, 0x5AF037CD, + 0xFF6B144A, 0x33C114D4, 0xBD4E1337, 0x71E413A9, + 0x7B211AB0, 0xB78B1A2E, 0x39041DCD, 0xF5AE1D53, + 0x2C8E0FFF, 0xE0240F61, 0x6EAB0882, 0xA201081C, + 0xA8C40105, 0x646E019B, 0xEAE10678, 0x264B06E6 + } +}; diff --git a/src/liblzma/check/crc32_tablegen.c b/src/liblzma/check/crc32_tablegen.c new file mode 100644 index 00000000..280d3b12 --- /dev/null +++ b/src/liblzma/check/crc32_tablegen.c @@ -0,0 +1,55 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_tablegen.c +/// \brief Generates CRC32 crc32_table.c +/// +/// Compiling: gcc -std=c99 -o crc32_tablegen crc32_tablegen.c crc32_init.c +/// Add -DWORDS_BIGENDIAN to generate big endian table. +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#include <sys/types.h> +#include <inttypes.h> +#include <stdio.h> + + +extern void lzma_crc32_init(void); + +extern uint32_t lzma_crc32_table[8][256]; + + +int +main() +{ + lzma_crc32_init(); + + printf("/* This file has been automatically generated by " + "crc32_tablegen.c. */\n\n" + "#include <inttypes.h>\n\n" + "const uint32_t lzma_crc32_table[8][256] = {\n\t{"); + + for (size_t s = 0; s < 8; ++s) { + for (size_t b = 0; b < 256; ++b) { + if ((b % 4) == 0) + printf("\n\t\t"); + + printf("0x%08" PRIX32, lzma_crc32_table[s][b]); + + if (b != 255) + printf(", "); + } + + if (s == 7) + printf("\n\t}\n};\n"); + else + printf("\n\t}, {"); + } + + return 0; +} diff --git a/src/liblzma/check/crc32_x86.s b/src/liblzma/check/crc32_x86.s new file mode 100644 index 00000000..ad4ef097 --- /dev/null +++ b/src/liblzma/check/crc32_x86.s @@ -0,0 +1,217 @@ +/* + * Speed-optimized CRC32 using slicing-by-eight algorithm + * Instruction set: i386 + * Optimized for: i686 + * + * This code has been put into the public domain by its authors: + * Original code by Igor Pavlov <http://7-zip.org/> + * Position-independent version by Lasse Collin <lasse.collin@tukaani.org> + * + * This code needs lzma_crc32_table, which can be created using the + * following C code: + +uint32_t lzma_crc32_table[8][256]; + +void +init_table(void) +{ + // IEEE-802.3 (CRC32) + static const uint32_t poly32 = UINT32_C(0xEDB88320); + + // Castagnoli (CRC32C) + // static const uint32_t poly32 = UINT32_C(0x82F63B78); + + // Koopman + // static const uint32_t poly32 = UINT32_C(0xEB31D82E); + + for (size_t s = 0; s < 8; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint32_t r = s == 0 ? b : lzma_crc32_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly32; + else + r >>= 1; + } + + lzma_crc32_table[s][b] = r; + } + } +} + + * The prototype of the CRC32 function: + * extern uint32_t lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc); + */ + + .text + .global lzma_crc32 + .type lzma_crc32, @function + + .align 16 +lzma_crc32: + /* + * Register usage: + * %eax crc + * %esi buf + * %edi size or buf + size + * %ebx lzma_crc32_table + * %ebp Table index + * %ecx Temporary + * %edx Temporary + */ + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + movl 0x14(%esp), %esi /* buf */ + movl 0x18(%esp), %edi /* size */ + movl 0x1C(%esp), %eax /* crc */ + + /* + * Store the address of lzma_crc32_table to %ebx. This is needed to + * get position-independent code (PIC). + */ + call .L_PIC +.L_PIC: + popl %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-.L_PIC], %ebx + movl lzma_crc32_table@GOT(%ebx), %ebx + + /* Complement the initial value. */ + notl %eax + + .align 16 +.L_align: + /* + * Check if there is enough input to use slicing-by-eight. + * We need 16 bytes, because the loop pre-reads eight bytes. + */ + cmpl $16, %edi + jl .L_rest + + /* Check if we have reached alignment of eight bytes. */ + testl $7, %esi + jz .L_slice + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrl $8, %eax + xorl (%ebx, %ebp, 4), %eax + decl %edi + jmp .L_align + + .align 4 +.L_slice: + /* + * If we get here, there's at least 16 bytes of aligned input + * available. Make %edi multiple of eight bytes. Store the possible + * remainder over the "size" variable in the argument stack. + */ + movl %edi, 0x18(%esp) + andl $-8, %edi + subl %edi, 0x18(%esp) + + /* + * Let %edi be buf + size - 8 while running the main loop. This way + * we can compare for equality to determine when exit the loop. + */ + addl %esi, %edi + subl $8, %edi + + /* Read in the first eight aligned bytes. */ + xorl (%esi), %eax + movl 4(%esi), %ecx + movzbl %cl, %ebp + +.L_loop: + movl 0x0C00(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl 0x0800(%ebx, %ebp, 4), %edx + shrl $16, %ecx + xorl 8(%esi), %edx + movzbl %cl, %ebp + xorl 0x0400(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl (%ebx, %ebp, 4), %edx + movzbl %al, %ebp + + /* + * Read the next four bytes, for which the CRC is calculated + * on the next interation of the loop. + */ + movl 12(%esi), %ecx + + xorl 0x1C00(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + shrl $16, %eax + xorl 0x1800(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + movzbl %al, %eax + movl 0x1400(%ebx, %eax, 4), %eax + addl $8, %esi + xorl %edx, %eax + xorl 0x1000(%ebx, %ebp, 4), %eax + + /* Check for end of aligned input. */ + cmpl %edi, %esi + movzbl %cl, %ebp + jne .L_loop + + /* + * Process the remaining eight bytes, which we have already + * copied to %ecx and %edx. + */ + movl 0x0C00(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl 0x0800(%ebx, %ebp, 4), %edx + shrl $16, %ecx + movzbl %cl, %ebp + xorl 0x0400(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl (%ebx, %ebp, 4), %edx + movzbl %al, %ebp + + xorl 0x1C00(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + shrl $16, %eax + xorl 0x1800(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + movzbl %al, %eax + movl 0x1400(%ebx, %eax, 4), %eax + addl $8, %esi + xorl %edx, %eax + xorl 0x1000(%ebx, %ebp, 4), %eax + + /* Copy the number of remaining bytes to %edi. */ + movl 0x18(%esp), %edi + +.L_rest: + /* Check for end of input. */ + testl %edi, %edi + jz .L_return + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrl $8, %eax + xorl (%ebx, %ebp, 4), %eax + decl %edi + jmp .L_rest + +.L_return: + /* Complement the final value. */ + notl %eax + + popl %ebp + popl %edi + popl %esi + popl %ebx + ret + + .size lzma_crc32, .-lzma_crc32 diff --git a/src/liblzma/check/crc64.c b/src/liblzma/check/crc64.c new file mode 100644 index 00000000..e31bc7ff --- /dev/null +++ b/src/liblzma/check/crc64.c @@ -0,0 +1,75 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64.c +/// \brief CRC64 calculation +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" +#include "crc_macros.h" + + +#ifdef WORDS_BIGENDIAN +# define A1(x) ((x) >> 56) +#else +# define A1 A +#endif + + +// See comments in crc32.c. +extern uint64_t +lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) +{ + crc = ~crc; + +#ifdef WORDS_BIGENDIAN + crc = bswap_64(crc); +#endif + + if (size > 4) { + while ((uintptr_t)(buf) & 3) { + crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); + --size; + } + + const uint8_t *const limit = buf + (size & ~(size_t)(3)); + size &= (size_t)(3); + + // Calculate the CRC64 using the slice-by-four algorithm. + // + // In contrast to CRC32 code, this one seems to be fastest + // with -O3 -fomit-frame-pointer. + while (buf < limit) { +#ifdef WORDS_BIGENDIAN + const uint32_t tmp = (crc >> 32) ^ *(uint32_t *)(buf); +#else + const uint32_t tmp = crc ^ *(uint32_t *)(buf); +#endif + buf += 4; + + // It is critical for performance, that + // the crc variable is XORed between the + // two table-lookup pairs. + crc = lzma_crc64_table[3][A(tmp)] + ^ lzma_crc64_table[2][B(tmp)] + ^ S32(crc) + ^ lzma_crc64_table[1][C(tmp)] + ^ lzma_crc64_table[0][D(tmp)]; + } + } + + while (size-- != 0) + crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); + +#ifdef WORDS_BIGENDIAN + crc = bswap_64(crc); +#endif + + return ~crc; +} diff --git a/src/liblzma/check/crc64_init.c b/src/liblzma/check/crc64_init.c new file mode 100644 index 00000000..49c59e93 --- /dev/null +++ b/src/liblzma/check/crc64_init.c @@ -0,0 +1,58 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64_init.c +/// \brief CRC64 table initialization +// +// This code is based on various public domain sources. +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <sys/types.h> +#include <inttypes.h> + +#ifdef WORDS_BIGENDIAN +# include "check_byteswap.h" +#endif + + +uint64_t lzma_crc64_table[4][256]; + + +extern void +lzma_crc64_init(void) +{ + static const uint64_t poly64 = UINT64_C(0xC96C5795D7870F42); + + for (size_t s = 0; s < 4; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint64_t r = s == 0 ? b : lzma_crc64_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly64; + else + r >>= 1; + } + + lzma_crc64_table[s][b] = r; + } + } + +#ifdef WORDS_BIGENDIAN + for (size_t s = 0; s < 4; ++s) + for (size_t b = 0; b < 256; ++b) + lzma_crc64_table[s][b] + = bswap_64(lzma_crc64_table[s][b]); +#endif + + return; +} diff --git a/src/liblzma/check/crc64_table.c b/src/liblzma/check/crc64_table.c new file mode 100644 index 00000000..0f2d1fb1 --- /dev/null +++ b/src/liblzma/check/crc64_table.c @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64_table.c +/// \brief Precalculated CRC64 table with correct endianness +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#ifdef WORDS_BIGENDIAN +# include "crc64_table_be.h" +#else +# include "crc64_table_le.h" +#endif diff --git a/src/liblzma/check/crc64_table_be.h b/src/liblzma/check/crc64_table_be.h new file mode 100644 index 00000000..99518400 --- /dev/null +++ b/src/liblzma/check/crc64_table_be.h @@ -0,0 +1,523 @@ +/* This file has been automatically generated by crc64_tablegen.c. */ + +#include <inttypes.h> + +const uint64_t lzma_crc64_table[4][256] = { + { + UINT64_C(0x0000000000000000), UINT64_C(0x6F5FA703BE4C2EB3), + UINT64_C(0x5BA040A8573684F4), UINT64_C(0x34FFE7ABE97AAA47), + UINT64_C(0x335E8FFF84C3D07B), UINT64_C(0x5C0128FC3A8FFEC8), + UINT64_C(0x68FECF57D3F5548F), UINT64_C(0x07A168546DB97A3C), + UINT64_C(0x66BC1EFF0987A1F7), UINT64_C(0x09E3B9FCB7CB8F44), + UINT64_C(0x3D1C5E575EB12503), UINT64_C(0x5243F954E0FD0BB0), + UINT64_C(0x55E291008D44718C), UINT64_C(0x3ABD360333085F3F), + UINT64_C(0x0E42D1A8DA72F578), UINT64_C(0x611D76AB643EDBCB), + UINT64_C(0x4966335138A19B7D), UINT64_C(0x2639945286EDB5CE), + UINT64_C(0x12C673F96F971F89), UINT64_C(0x7D99D4FAD1DB313A), + UINT64_C(0x7A38BCAEBC624B06), UINT64_C(0x15671BAD022E65B5), + UINT64_C(0x2198FC06EB54CFF2), UINT64_C(0x4EC75B055518E141), + UINT64_C(0x2FDA2DAE31263A8A), UINT64_C(0x40858AAD8F6A1439), + UINT64_C(0x747A6D066610BE7E), UINT64_C(0x1B25CA05D85C90CD), + UINT64_C(0x1C84A251B5E5EAF1), UINT64_C(0x73DB05520BA9C442), + UINT64_C(0x4724E2F9E2D36E05), UINT64_C(0x287B45FA5C9F40B6), + UINT64_C(0x92CC66A2704237FB), UINT64_C(0xFD93C1A1CE0E1948), + UINT64_C(0xC96C260A2774B30F), UINT64_C(0xA633810999389DBC), + UINT64_C(0xA192E95DF481E780), UINT64_C(0xCECD4E5E4ACDC933), + UINT64_C(0xFA32A9F5A3B76374), UINT64_C(0x956D0EF61DFB4DC7), + UINT64_C(0xF470785D79C5960C), UINT64_C(0x9B2FDF5EC789B8BF), + UINT64_C(0xAFD038F52EF312F8), UINT64_C(0xC08F9FF690BF3C4B), + UINT64_C(0xC72EF7A2FD064677), UINT64_C(0xA87150A1434A68C4), + UINT64_C(0x9C8EB70AAA30C283), UINT64_C(0xF3D11009147CEC30), + UINT64_C(0xDBAA55F348E3AC86), UINT64_C(0xB4F5F2F0F6AF8235), + UINT64_C(0x800A155B1FD52872), UINT64_C(0xEF55B258A19906C1), + UINT64_C(0xE8F4DA0CCC207CFD), UINT64_C(0x87AB7D0F726C524E), + UINT64_C(0xB3549AA49B16F809), UINT64_C(0xDC0B3DA7255AD6BA), + UINT64_C(0xBD164B0C41640D71), UINT64_C(0xD249EC0FFF2823C2), + UINT64_C(0xE6B60BA416528985), UINT64_C(0x89E9ACA7A81EA736), + UINT64_C(0x8E48C4F3C5A7DD0A), UINT64_C(0xE11763F07BEBF3B9), + UINT64_C(0xD5E8845B929159FE), UINT64_C(0xBAB723582CDD774D), + UINT64_C(0xA187C3EBCA2BB664), UINT64_C(0xCED864E8746798D7), + UINT64_C(0xFA2783439D1D3290), UINT64_C(0x9578244023511C23), + UINT64_C(0x92D94C144EE8661F), UINT64_C(0xFD86EB17F0A448AC), + UINT64_C(0xC9790CBC19DEE2EB), UINT64_C(0xA626ABBFA792CC58), + UINT64_C(0xC73BDD14C3AC1793), UINT64_C(0xA8647A177DE03920), + UINT64_C(0x9C9B9DBC949A9367), UINT64_C(0xF3C43ABF2AD6BDD4), + UINT64_C(0xF46552EB476FC7E8), UINT64_C(0x9B3AF5E8F923E95B), + UINT64_C(0xAFC512431059431C), UINT64_C(0xC09AB540AE156DAF), + UINT64_C(0xE8E1F0BAF28A2D19), UINT64_C(0x87BE57B94CC603AA), + UINT64_C(0xB341B012A5BCA9ED), UINT64_C(0xDC1E17111BF0875E), + UINT64_C(0xDBBF7F457649FD62), UINT64_C(0xB4E0D846C805D3D1), + UINT64_C(0x801F3FED217F7996), UINT64_C(0xEF4098EE9F335725), + UINT64_C(0x8E5DEE45FB0D8CEE), UINT64_C(0xE10249464541A25D), + UINT64_C(0xD5FDAEEDAC3B081A), UINT64_C(0xBAA209EE127726A9), + UINT64_C(0xBD0361BA7FCE5C95), UINT64_C(0xD25CC6B9C1827226), + UINT64_C(0xE6A3211228F8D861), UINT64_C(0x89FC861196B4F6D2), + UINT64_C(0x334BA549BA69819F), UINT64_C(0x5C14024A0425AF2C), + UINT64_C(0x68EBE5E1ED5F056B), UINT64_C(0x07B442E253132BD8), + UINT64_C(0x00152AB63EAA51E4), UINT64_C(0x6F4A8DB580E67F57), + UINT64_C(0x5BB56A1E699CD510), UINT64_C(0x34EACD1DD7D0FBA3), + UINT64_C(0x55F7BBB6B3EE2068), UINT64_C(0x3AA81CB50DA20EDB), + UINT64_C(0x0E57FB1EE4D8A49C), UINT64_C(0x61085C1D5A948A2F), + UINT64_C(0x66A93449372DF013), UINT64_C(0x09F6934A8961DEA0), + UINT64_C(0x3D0974E1601B74E7), UINT64_C(0x5256D3E2DE575A54), + UINT64_C(0x7A2D961882C81AE2), UINT64_C(0x1572311B3C843451), + UINT64_C(0x218DD6B0D5FE9E16), UINT64_C(0x4ED271B36BB2B0A5), + UINT64_C(0x497319E7060BCA99), UINT64_C(0x262CBEE4B847E42A), + UINT64_C(0x12D3594F513D4E6D), UINT64_C(0x7D8CFE4CEF7160DE), + UINT64_C(0x1C9188E78B4FBB15), UINT64_C(0x73CE2FE4350395A6), + UINT64_C(0x4731C84FDC793FE1), UINT64_C(0x286E6F4C62351152), + UINT64_C(0x2FCF07180F8C6B6E), UINT64_C(0x4090A01BB1C045DD), + UINT64_C(0x746F47B058BAEF9A), UINT64_C(0x1B30E0B3E6F6C129), + UINT64_C(0x420F87D795576CC9), UINT64_C(0x2D5020D42B1B427A), + UINT64_C(0x19AFC77FC261E83D), UINT64_C(0x76F0607C7C2DC68E), + UINT64_C(0x715108281194BCB2), UINT64_C(0x1E0EAF2BAFD89201), + UINT64_C(0x2AF1488046A23846), UINT64_C(0x45AEEF83F8EE16F5), + UINT64_C(0x24B399289CD0CD3E), UINT64_C(0x4BEC3E2B229CE38D), + UINT64_C(0x7F13D980CBE649CA), UINT64_C(0x104C7E8375AA6779), + UINT64_C(0x17ED16D718131D45), UINT64_C(0x78B2B1D4A65F33F6), + UINT64_C(0x4C4D567F4F2599B1), UINT64_C(0x2312F17CF169B702), + UINT64_C(0x0B69B486ADF6F7B4), UINT64_C(0x6436138513BAD907), + UINT64_C(0x50C9F42EFAC07340), UINT64_C(0x3F96532D448C5DF3), + UINT64_C(0x38373B79293527CF), UINT64_C(0x57689C7A9779097C), + UINT64_C(0x63977BD17E03A33B), UINT64_C(0x0CC8DCD2C04F8D88), + UINT64_C(0x6DD5AA79A4715643), UINT64_C(0x028A0D7A1A3D78F0), + UINT64_C(0x3675EAD1F347D2B7), UINT64_C(0x592A4DD24D0BFC04), + UINT64_C(0x5E8B258620B28638), UINT64_C(0x31D482859EFEA88B), + UINT64_C(0x052B652E778402CC), UINT64_C(0x6A74C22DC9C82C7F), + UINT64_C(0xD0C3E175E5155B32), UINT64_C(0xBF9C46765B597581), + UINT64_C(0x8B63A1DDB223DFC6), UINT64_C(0xE43C06DE0C6FF175), + UINT64_C(0xE39D6E8A61D68B49), UINT64_C(0x8CC2C989DF9AA5FA), + UINT64_C(0xB83D2E2236E00FBD), UINT64_C(0xD762892188AC210E), + UINT64_C(0xB67FFF8AEC92FAC5), UINT64_C(0xD920588952DED476), + UINT64_C(0xEDDFBF22BBA47E31), UINT64_C(0x8280182105E85082), + UINT64_C(0x8521707568512ABE), UINT64_C(0xEA7ED776D61D040D), + UINT64_C(0xDE8130DD3F67AE4A), UINT64_C(0xB1DE97DE812B80F9), + UINT64_C(0x99A5D224DDB4C04F), UINT64_C(0xF6FA752763F8EEFC), + UINT64_C(0xC205928C8A8244BB), UINT64_C(0xAD5A358F34CE6A08), + UINT64_C(0xAAFB5DDB59771034), UINT64_C(0xC5A4FAD8E73B3E87), + UINT64_C(0xF15B1D730E4194C0), UINT64_C(0x9E04BA70B00DBA73), + UINT64_C(0xFF19CCDBD43361B8), UINT64_C(0x90466BD86A7F4F0B), + UINT64_C(0xA4B98C738305E54C), UINT64_C(0xCBE62B703D49CBFF), + UINT64_C(0xCC47432450F0B1C3), UINT64_C(0xA318E427EEBC9F70), + UINT64_C(0x97E7038C07C63537), UINT64_C(0xF8B8A48FB98A1B84), + UINT64_C(0xE388443C5F7CDAAD), UINT64_C(0x8CD7E33FE130F41E), + UINT64_C(0xB8280494084A5E59), UINT64_C(0xD777A397B60670EA), + UINT64_C(0xD0D6CBC3DBBF0AD6), UINT64_C(0xBF896CC065F32465), + UINT64_C(0x8B768B6B8C898E22), UINT64_C(0xE4292C6832C5A091), + UINT64_C(0x85345AC356FB7B5A), UINT64_C(0xEA6BFDC0E8B755E9), + UINT64_C(0xDE941A6B01CDFFAE), UINT64_C(0xB1CBBD68BF81D11D), + UINT64_C(0xB66AD53CD238AB21), UINT64_C(0xD935723F6C748592), + UINT64_C(0xEDCA9594850E2FD5), UINT64_C(0x829532973B420166), + UINT64_C(0xAAEE776D67DD41D0), UINT64_C(0xC5B1D06ED9916F63), + UINT64_C(0xF14E37C530EBC524), UINT64_C(0x9E1190C68EA7EB97), + UINT64_C(0x99B0F892E31E91AB), UINT64_C(0xF6EF5F915D52BF18), + UINT64_C(0xC210B83AB428155F), UINT64_C(0xAD4F1F390A643BEC), + UINT64_C(0xCC5269926E5AE027), UINT64_C(0xA30DCE91D016CE94), + UINT64_C(0x97F2293A396C64D3), UINT64_C(0xF8AD8E3987204A60), + UINT64_C(0xFF0CE66DEA99305C), UINT64_C(0x9053416E54D51EEF), + UINT64_C(0xA4ACA6C5BDAFB4A8), UINT64_C(0xCBF301C603E39A1B), + UINT64_C(0x7144229E2F3EED56), UINT64_C(0x1E1B859D9172C3E5), + UINT64_C(0x2AE46236780869A2), UINT64_C(0x45BBC535C6444711), + UINT64_C(0x421AAD61ABFD3D2D), UINT64_C(0x2D450A6215B1139E), + UINT64_C(0x19BAEDC9FCCBB9D9), UINT64_C(0x76E54ACA4287976A), + UINT64_C(0x17F83C6126B94CA1), UINT64_C(0x78A79B6298F56212), + UINT64_C(0x4C587CC9718FC855), UINT64_C(0x2307DBCACFC3E6E6), + UINT64_C(0x24A6B39EA27A9CDA), UINT64_C(0x4BF9149D1C36B269), + UINT64_C(0x7F06F336F54C182E), UINT64_C(0x105954354B00369D), + UINT64_C(0x382211CF179F762B), UINT64_C(0x577DB6CCA9D35898), + UINT64_C(0x6382516740A9F2DF), UINT64_C(0x0CDDF664FEE5DC6C), + UINT64_C(0x0B7C9E30935CA650), UINT64_C(0x642339332D1088E3), + UINT64_C(0x50DCDE98C46A22A4), UINT64_C(0x3F83799B7A260C17), + UINT64_C(0x5E9E0F301E18D7DC), UINT64_C(0x31C1A833A054F96F), + UINT64_C(0x053E4F98492E5328), UINT64_C(0x6A61E89BF7627D9B), + UINT64_C(0x6DC080CF9ADB07A7), UINT64_C(0x029F27CC24972914), + UINT64_C(0x3660C067CDED8353), UINT64_C(0x593F676473A1ADE0) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x0DF1D05C9279E954), + UINT64_C(0x1AE2A1B924F3D2A9), UINT64_C(0x171371E5B68A3BFD), + UINT64_C(0xB1DA4DDC62497DC1), UINT64_C(0xBC2B9D80F0309495), + UINT64_C(0xAB38EC6546BAAF68), UINT64_C(0xA6C93C39D4C3463C), + UINT64_C(0xE7AB9517EE3D2210), UINT64_C(0xEA5A454B7C44CB44), + UINT64_C(0xFD4934AECACEF0B9), UINT64_C(0xF0B8E4F258B719ED), + UINT64_C(0x5671D8CB8C745FD1), UINT64_C(0x5B8008971E0DB685), + UINT64_C(0x4C937972A8878D78), UINT64_C(0x4162A92E3AFE642C), + UINT64_C(0xCE572B2FDC7B4420), UINT64_C(0xC3A6FB734E02AD74), + UINT64_C(0xD4B58A96F8889689), UINT64_C(0xD9445ACA6AF17FDD), + UINT64_C(0x7F8D66F3BE3239E1), UINT64_C(0x727CB6AF2C4BD0B5), + UINT64_C(0x656FC74A9AC1EB48), UINT64_C(0x689E171608B8021C), + UINT64_C(0x29FCBE3832466630), UINT64_C(0x240D6E64A03F8F64), + UINT64_C(0x331E1F8116B5B499), UINT64_C(0x3EEFCFDD84CC5DCD), + UINT64_C(0x9826F3E4500F1BF1), UINT64_C(0x95D723B8C276F2A5), + UINT64_C(0x82C4525D74FCC958), UINT64_C(0x8F358201E685200C), + UINT64_C(0x9CAF565EB8F78840), UINT64_C(0x915E86022A8E6114), + UINT64_C(0x864DF7E79C045AE9), UINT64_C(0x8BBC27BB0E7DB3BD), + UINT64_C(0x2D751B82DABEF581), UINT64_C(0x2084CBDE48C71CD5), + UINT64_C(0x3797BA3BFE4D2728), UINT64_C(0x3A666A676C34CE7C), + UINT64_C(0x7B04C34956CAAA50), UINT64_C(0x76F51315C4B34304), + UINT64_C(0x61E662F0723978F9), UINT64_C(0x6C17B2ACE04091AD), + UINT64_C(0xCADE8E953483D791), UINT64_C(0xC72F5EC9A6FA3EC5), + UINT64_C(0xD03C2F2C10700538), UINT64_C(0xDDCDFF708209EC6C), + UINT64_C(0x52F87D71648CCC60), UINT64_C(0x5F09AD2DF6F52534), + UINT64_C(0x481ADCC8407F1EC9), UINT64_C(0x45EB0C94D206F79D), + UINT64_C(0xE32230AD06C5B1A1), UINT64_C(0xEED3E0F194BC58F5), + UINT64_C(0xF9C0911422366308), UINT64_C(0xF4314148B04F8A5C), + UINT64_C(0xB553E8668AB1EE70), UINT64_C(0xB8A2383A18C80724), + UINT64_C(0xAFB149DFAE423CD9), UINT64_C(0xA24099833C3BD58D), + UINT64_C(0x0489A5BAE8F893B1), UINT64_C(0x097875E67A817AE5), + UINT64_C(0x1E6B0403CC0B4118), UINT64_C(0x139AD45F5E72A84C), + UINT64_C(0x385FADBC70EF1181), UINT64_C(0x35AE7DE0E296F8D5), + UINT64_C(0x22BD0C05541CC328), UINT64_C(0x2F4CDC59C6652A7C), + UINT64_C(0x8985E06012A66C40), UINT64_C(0x8474303C80DF8514), + UINT64_C(0x936741D93655BEE9), UINT64_C(0x9E969185A42C57BD), + UINT64_C(0xDFF438AB9ED23391), UINT64_C(0xD205E8F70CABDAC5), + UINT64_C(0xC5169912BA21E138), UINT64_C(0xC8E7494E2858086C), + UINT64_C(0x6E2E7577FC9B4E50), UINT64_C(0x63DFA52B6EE2A704), + UINT64_C(0x74CCD4CED8689CF9), UINT64_C(0x793D04924A1175AD), + UINT64_C(0xF6088693AC9455A1), UINT64_C(0xFBF956CF3EEDBCF5), + UINT64_C(0xECEA272A88678708), UINT64_C(0xE11BF7761A1E6E5C), + UINT64_C(0x47D2CB4FCEDD2860), UINT64_C(0x4A231B135CA4C134), + UINT64_C(0x5D306AF6EA2EFAC9), UINT64_C(0x50C1BAAA7857139D), + UINT64_C(0x11A3138442A977B1), UINT64_C(0x1C52C3D8D0D09EE5), + UINT64_C(0x0B41B23D665AA518), UINT64_C(0x06B06261F4234C4C), + UINT64_C(0xA0795E5820E00A70), UINT64_C(0xAD888E04B299E324), + UINT64_C(0xBA9BFFE10413D8D9), UINT64_C(0xB76A2FBD966A318D), + UINT64_C(0xA4F0FBE2C81899C1), UINT64_C(0xA9012BBE5A617095), + UINT64_C(0xBE125A5BECEB4B68), UINT64_C(0xB3E38A077E92A23C), + UINT64_C(0x152AB63EAA51E400), UINT64_C(0x18DB666238280D54), + UINT64_C(0x0FC817878EA236A9), UINT64_C(0x0239C7DB1CDBDFFD), + UINT64_C(0x435B6EF52625BBD1), UINT64_C(0x4EAABEA9B45C5285), + UINT64_C(0x59B9CF4C02D66978), UINT64_C(0x54481F1090AF802C), + UINT64_C(0xF2812329446CC610), UINT64_C(0xFF70F375D6152F44), + UINT64_C(0xE8638290609F14B9), UINT64_C(0xE59252CCF2E6FDED), + UINT64_C(0x6AA7D0CD1463DDE1), UINT64_C(0x67560091861A34B5), + UINT64_C(0x7045717430900F48), UINT64_C(0x7DB4A128A2E9E61C), + UINT64_C(0xDB7D9D11762AA020), UINT64_C(0xD68C4D4DE4534974), + UINT64_C(0xC19F3CA852D97289), UINT64_C(0xCC6EECF4C0A09BDD), + UINT64_C(0x8D0C45DAFA5EFFF1), UINT64_C(0x80FD9586682716A5), + UINT64_C(0x97EEE463DEAD2D58), UINT64_C(0x9A1F343F4CD4C40C), + UINT64_C(0x3CD6080698178230), UINT64_C(0x3127D85A0A6E6B64), + UINT64_C(0x2634A9BFBCE45099), UINT64_C(0x2BC579E32E9DB9CD), + UINT64_C(0xF5A054D6CA71FB90), UINT64_C(0xF851848A580812C4), + UINT64_C(0xEF42F56FEE822939), UINT64_C(0xE2B325337CFBC06D), + UINT64_C(0x447A190AA8388651), UINT64_C(0x498BC9563A416F05), + UINT64_C(0x5E98B8B38CCB54F8), UINT64_C(0x536968EF1EB2BDAC), + UINT64_C(0x120BC1C1244CD980), UINT64_C(0x1FFA119DB63530D4), + UINT64_C(0x08E9607800BF0B29), UINT64_C(0x0518B02492C6E27D), + UINT64_C(0xA3D18C1D4605A441), UINT64_C(0xAE205C41D47C4D15), + UINT64_C(0xB9332DA462F676E8), UINT64_C(0xB4C2FDF8F08F9FBC), + UINT64_C(0x3BF77FF9160ABFB0), UINT64_C(0x3606AFA5847356E4), + UINT64_C(0x2115DE4032F96D19), UINT64_C(0x2CE40E1CA080844D), + UINT64_C(0x8A2D32257443C271), UINT64_C(0x87DCE279E63A2B25), + UINT64_C(0x90CF939C50B010D8), UINT64_C(0x9D3E43C0C2C9F98C), + UINT64_C(0xDC5CEAEEF8379DA0), UINT64_C(0xD1AD3AB26A4E74F4), + UINT64_C(0xC6BE4B57DCC44F09), UINT64_C(0xCB4F9B0B4EBDA65D), + UINT64_C(0x6D86A7329A7EE061), UINT64_C(0x6077776E08070935), + UINT64_C(0x7764068BBE8D32C8), UINT64_C(0x7A95D6D72CF4DB9C), + UINT64_C(0x690F0288728673D0), UINT64_C(0x64FED2D4E0FF9A84), + UINT64_C(0x73EDA3315675A179), UINT64_C(0x7E1C736DC40C482D), + UINT64_C(0xD8D54F5410CF0E11), UINT64_C(0xD5249F0882B6E745), + UINT64_C(0xC237EEED343CDCB8), UINT64_C(0xCFC63EB1A64535EC), + UINT64_C(0x8EA4979F9CBB51C0), UINT64_C(0x835547C30EC2B894), + UINT64_C(0x94463626B8488369), UINT64_C(0x99B7E67A2A316A3D), + UINT64_C(0x3F7EDA43FEF22C01), UINT64_C(0x328F0A1F6C8BC555), + UINT64_C(0x259C7BFADA01FEA8), UINT64_C(0x286DABA6487817FC), + UINT64_C(0xA75829A7AEFD37F0), UINT64_C(0xAAA9F9FB3C84DEA4), + UINT64_C(0xBDBA881E8A0EE559), UINT64_C(0xB04B584218770C0D), + UINT64_C(0x1682647BCCB44A31), UINT64_C(0x1B73B4275ECDA365), + UINT64_C(0x0C60C5C2E8479898), UINT64_C(0x0191159E7A3E71CC), + UINT64_C(0x40F3BCB040C015E0), UINT64_C(0x4D026CECD2B9FCB4), + UINT64_C(0x5A111D096433C749), UINT64_C(0x57E0CD55F64A2E1D), + UINT64_C(0xF129F16C22896821), UINT64_C(0xFCD82130B0F08175), + UINT64_C(0xEBCB50D5067ABA88), UINT64_C(0xE63A8089940353DC), + UINT64_C(0xCDFFF96ABA9EEA11), UINT64_C(0xC00E293628E70345), + UINT64_C(0xD71D58D39E6D38B8), UINT64_C(0xDAEC888F0C14D1EC), + UINT64_C(0x7C25B4B6D8D797D0), UINT64_C(0x71D464EA4AAE7E84), + UINT64_C(0x66C7150FFC244579), UINT64_C(0x6B36C5536E5DAC2D), + UINT64_C(0x2A546C7D54A3C801), UINT64_C(0x27A5BC21C6DA2155), + UINT64_C(0x30B6CDC470501AA8), UINT64_C(0x3D471D98E229F3FC), + UINT64_C(0x9B8E21A136EAB5C0), UINT64_C(0x967FF1FDA4935C94), + UINT64_C(0x816C801812196769), UINT64_C(0x8C9D504480608E3D), + UINT64_C(0x03A8D24566E5AE31), UINT64_C(0x0E590219F49C4765), + UINT64_C(0x194A73FC42167C98), UINT64_C(0x14BBA3A0D06F95CC), + UINT64_C(0xB2729F9904ACD3F0), UINT64_C(0xBF834FC596D53AA4), + UINT64_C(0xA8903E20205F0159), UINT64_C(0xA561EE7CB226E80D), + UINT64_C(0xE403475288D88C21), UINT64_C(0xE9F2970E1AA16575), + UINT64_C(0xFEE1E6EBAC2B5E88), UINT64_C(0xF31036B73E52B7DC), + UINT64_C(0x55D90A8EEA91F1E0), UINT64_C(0x5828DAD278E818B4), + UINT64_C(0x4F3BAB37CE622349), UINT64_C(0x42CA7B6B5C1BCA1D), + UINT64_C(0x5150AF3402696251), UINT64_C(0x5CA17F6890108B05), + UINT64_C(0x4BB20E8D269AB0F8), UINT64_C(0x4643DED1B4E359AC), + UINT64_C(0xE08AE2E860201F90), UINT64_C(0xED7B32B4F259F6C4), + UINT64_C(0xFA68435144D3CD39), UINT64_C(0xF799930DD6AA246D), + UINT64_C(0xB6FB3A23EC544041), UINT64_C(0xBB0AEA7F7E2DA915), + UINT64_C(0xAC199B9AC8A792E8), UINT64_C(0xA1E84BC65ADE7BBC), + UINT64_C(0x072177FF8E1D3D80), UINT64_C(0x0AD0A7A31C64D4D4), + UINT64_C(0x1DC3D646AAEEEF29), UINT64_C(0x1032061A3897067D), + UINT64_C(0x9F07841BDE122671), UINT64_C(0x92F654474C6BCF25), + UINT64_C(0x85E525A2FAE1F4D8), UINT64_C(0x8814F5FE68981D8C), + UINT64_C(0x2EDDC9C7BC5B5BB0), UINT64_C(0x232C199B2E22B2E4), + UINT64_C(0x343F687E98A88919), UINT64_C(0x39CEB8220AD1604D), + UINT64_C(0x78AC110C302F0461), UINT64_C(0x755DC150A256ED35), + UINT64_C(0x624EB0B514DCD6C8), UINT64_C(0x6FBF60E986A53F9C), + UINT64_C(0xC9765CD0526679A0), UINT64_C(0xC4878C8CC01F90F4), + UINT64_C(0xD394FD697695AB09), UINT64_C(0xDE652D35E4EC425D) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0xCB6D6A914AE10B3F), + UINT64_C(0x96DBD42295C2177E), UINT64_C(0x5DB6BEB3DF231C41), + UINT64_C(0x2CB7A9452A852FFC), UINT64_C(0xE7DAC3D4606424C3), + UINT64_C(0xBA6C7D67BF473882), UINT64_C(0x710117F6F5A633BD), + UINT64_C(0xDD705D247FA5876A), UINT64_C(0x161D37B535448C55), + UINT64_C(0x4BAB8906EA679014), UINT64_C(0x80C6E397A0869B2B), + UINT64_C(0xF1C7F4615520A896), UINT64_C(0x3AAA9EF01FC1A3A9), + UINT64_C(0x671C2043C0E2BFE8), UINT64_C(0xAC714AD28A03B4D7), + UINT64_C(0xBAE1BA48FE4A0FD5), UINT64_C(0x718CD0D9B4AB04EA), + UINT64_C(0x2C3A6E6A6B8818AB), UINT64_C(0xE75704FB21691394), + UINT64_C(0x9656130DD4CF2029), UINT64_C(0x5D3B799C9E2E2B16), + UINT64_C(0x008DC72F410D3757), UINT64_C(0xCBE0ADBE0BEC3C68), + UINT64_C(0x6791E76C81EF88BF), UINT64_C(0xACFC8DFDCB0E8380), + UINT64_C(0xF14A334E142D9FC1), UINT64_C(0x3A2759DF5ECC94FE), + UINT64_C(0x4B264E29AB6AA743), UINT64_C(0x804B24B8E18BAC7C), + UINT64_C(0xDDFD9A0B3EA8B03D), UINT64_C(0x1690F09A7449BB02), + UINT64_C(0xF1DD7B3ED73AC638), UINT64_C(0x3AB011AF9DDBCD07), + UINT64_C(0x6706AF1C42F8D146), UINT64_C(0xAC6BC58D0819DA79), + UINT64_C(0xDD6AD27BFDBFE9C4), UINT64_C(0x1607B8EAB75EE2FB), + UINT64_C(0x4BB10659687DFEBA), UINT64_C(0x80DC6CC8229CF585), + UINT64_C(0x2CAD261AA89F4152), UINT64_C(0xE7C04C8BE27E4A6D), + UINT64_C(0xBA76F2383D5D562C), UINT64_C(0x711B98A977BC5D13), + UINT64_C(0x001A8F5F821A6EAE), UINT64_C(0xCB77E5CEC8FB6591), + UINT64_C(0x96C15B7D17D879D0), UINT64_C(0x5DAC31EC5D3972EF), + UINT64_C(0x4B3CC1762970C9ED), UINT64_C(0x8051ABE76391C2D2), + UINT64_C(0xDDE71554BCB2DE93), UINT64_C(0x168A7FC5F653D5AC), + UINT64_C(0x678B683303F5E611), UINT64_C(0xACE602A24914ED2E), + UINT64_C(0xF150BC119637F16F), UINT64_C(0x3A3DD680DCD6FA50), + UINT64_C(0x964C9C5256D54E87), UINT64_C(0x5D21F6C31C3445B8), + UINT64_C(0x00974870C31759F9), UINT64_C(0xCBFA22E189F652C6), + UINT64_C(0xBAFB35177C50617B), UINT64_C(0x71965F8636B16A44), + UINT64_C(0x2C20E135E9927605), UINT64_C(0xE74D8BA4A3737D3A), + UINT64_C(0xE2BBF77CAE758C71), UINT64_C(0x29D69DEDE494874E), + UINT64_C(0x7460235E3BB79B0F), UINT64_C(0xBF0D49CF71569030), + UINT64_C(0xCE0C5E3984F0A38D), UINT64_C(0x056134A8CE11A8B2), + UINT64_C(0x58D78A1B1132B4F3), UINT64_C(0x93BAE08A5BD3BFCC), + UINT64_C(0x3FCBAA58D1D00B1B), UINT64_C(0xF4A6C0C99B310024), + UINT64_C(0xA9107E7A44121C65), UINT64_C(0x627D14EB0EF3175A), + UINT64_C(0x137C031DFB5524E7), UINT64_C(0xD811698CB1B42FD8), + UINT64_C(0x85A7D73F6E973399), UINT64_C(0x4ECABDAE247638A6), + UINT64_C(0x585A4D34503F83A4), UINT64_C(0x933727A51ADE889B), + UINT64_C(0xCE819916C5FD94DA), UINT64_C(0x05ECF3878F1C9FE5), + UINT64_C(0x74EDE4717ABAAC58), UINT64_C(0xBF808EE0305BA767), + UINT64_C(0xE2363053EF78BB26), UINT64_C(0x295B5AC2A599B019), + UINT64_C(0x852A10102F9A04CE), UINT64_C(0x4E477A81657B0FF1), + UINT64_C(0x13F1C432BA5813B0), UINT64_C(0xD89CAEA3F0B9188F), + UINT64_C(0xA99DB955051F2B32), UINT64_C(0x62F0D3C44FFE200D), + UINT64_C(0x3F466D7790DD3C4C), UINT64_C(0xF42B07E6DA3C3773), + UINT64_C(0x13668C42794F4A49), UINT64_C(0xD80BE6D333AE4176), + UINT64_C(0x85BD5860EC8D5D37), UINT64_C(0x4ED032F1A66C5608), + UINT64_C(0x3FD1250753CA65B5), UINT64_C(0xF4BC4F96192B6E8A), + UINT64_C(0xA90AF125C60872CB), UINT64_C(0x62679BB48CE979F4), + UINT64_C(0xCE16D16606EACD23), UINT64_C(0x057BBBF74C0BC61C), + UINT64_C(0x58CD05449328DA5D), UINT64_C(0x93A06FD5D9C9D162), + UINT64_C(0xE2A178232C6FE2DF), UINT64_C(0x29CC12B2668EE9E0), + UINT64_C(0x747AAC01B9ADF5A1), UINT64_C(0xBF17C690F34CFE9E), + UINT64_C(0xA987360A8705459C), UINT64_C(0x62EA5C9BCDE44EA3), + UINT64_C(0x3F5CE22812C752E2), UINT64_C(0xF43188B9582659DD), + UINT64_C(0x85309F4FAD806A60), UINT64_C(0x4E5DF5DEE761615F), + UINT64_C(0x13EB4B6D38427D1E), UINT64_C(0xD88621FC72A37621), + UINT64_C(0x74F76B2EF8A0C2F6), UINT64_C(0xBF9A01BFB241C9C9), + UINT64_C(0xE22CBF0C6D62D588), UINT64_C(0x2941D59D2783DEB7), + UINT64_C(0x5840C26BD225ED0A), UINT64_C(0x932DA8FA98C4E635), + UINT64_C(0xCE9B164947E7FA74), UINT64_C(0x05F67CD80D06F14B), + UINT64_C(0xC477EFF95CEB18E3), UINT64_C(0x0F1A8568160A13DC), + UINT64_C(0x52AC3BDBC9290F9D), UINT64_C(0x99C1514A83C804A2), + UINT64_C(0xE8C046BC766E371F), UINT64_C(0x23AD2C2D3C8F3C20), + UINT64_C(0x7E1B929EE3AC2061), UINT64_C(0xB576F80FA94D2B5E), + UINT64_C(0x1907B2DD234E9F89), UINT64_C(0xD26AD84C69AF94B6), + UINT64_C(0x8FDC66FFB68C88F7), UINT64_C(0x44B10C6EFC6D83C8), + UINT64_C(0x35B01B9809CBB075), UINT64_C(0xFEDD7109432ABB4A), + UINT64_C(0xA36BCFBA9C09A70B), UINT64_C(0x6806A52BD6E8AC34), + UINT64_C(0x7E9655B1A2A11736), UINT64_C(0xB5FB3F20E8401C09), + UINT64_C(0xE84D819337630048), UINT64_C(0x2320EB027D820B77), + UINT64_C(0x5221FCF4882438CA), UINT64_C(0x994C9665C2C533F5), + UINT64_C(0xC4FA28D61DE62FB4), UINT64_C(0x0F9742475707248B), + UINT64_C(0xA3E60895DD04905C), UINT64_C(0x688B620497E59B63), + UINT64_C(0x353DDCB748C68722), UINT64_C(0xFE50B62602278C1D), + UINT64_C(0x8F51A1D0F781BFA0), UINT64_C(0x443CCB41BD60B49F), + UINT64_C(0x198A75F26243A8DE), UINT64_C(0xD2E71F6328A2A3E1), + UINT64_C(0x35AA94C78BD1DEDB), UINT64_C(0xFEC7FE56C130D5E4), + UINT64_C(0xA37140E51E13C9A5), UINT64_C(0x681C2A7454F2C29A), + UINT64_C(0x191D3D82A154F127), UINT64_C(0xD2705713EBB5FA18), + UINT64_C(0x8FC6E9A03496E659), UINT64_C(0x44AB83317E77ED66), + UINT64_C(0xE8DAC9E3F47459B1), UINT64_C(0x23B7A372BE95528E), + UINT64_C(0x7E011DC161B64ECF), UINT64_C(0xB56C77502B5745F0), + UINT64_C(0xC46D60A6DEF1764D), UINT64_C(0x0F000A3794107D72), + UINT64_C(0x52B6B4844B336133), UINT64_C(0x99DBDE1501D26A0C), + UINT64_C(0x8F4B2E8F759BD10E), UINT64_C(0x4426441E3F7ADA31), + UINT64_C(0x1990FAADE059C670), UINT64_C(0xD2FD903CAAB8CD4F), + UINT64_C(0xA3FC87CA5F1EFEF2), UINT64_C(0x6891ED5B15FFF5CD), + UINT64_C(0x352753E8CADCE98C), UINT64_C(0xFE4A3979803DE2B3), + UINT64_C(0x523B73AB0A3E5664), UINT64_C(0x9956193A40DF5D5B), + UINT64_C(0xC4E0A7899FFC411A), UINT64_C(0x0F8DCD18D51D4A25), + UINT64_C(0x7E8CDAEE20BB7998), UINT64_C(0xB5E1B07F6A5A72A7), + UINT64_C(0xE8570ECCB5796EE6), UINT64_C(0x233A645DFF9865D9), + UINT64_C(0x26CC1885F29E9492), UINT64_C(0xEDA17214B87F9FAD), + UINT64_C(0xB017CCA7675C83EC), UINT64_C(0x7B7AA6362DBD88D3), + UINT64_C(0x0A7BB1C0D81BBB6E), UINT64_C(0xC116DB5192FAB051), + UINT64_C(0x9CA065E24DD9AC10), UINT64_C(0x57CD0F730738A72F), + UINT64_C(0xFBBC45A18D3B13F8), UINT64_C(0x30D12F30C7DA18C7), + UINT64_C(0x6D67918318F90486), UINT64_C(0xA60AFB1252180FB9), + UINT64_C(0xD70BECE4A7BE3C04), UINT64_C(0x1C668675ED5F373B), + UINT64_C(0x41D038C6327C2B7A), UINT64_C(0x8ABD5257789D2045), + UINT64_C(0x9C2DA2CD0CD49B47), UINT64_C(0x5740C85C46359078), + UINT64_C(0x0AF676EF99168C39), UINT64_C(0xC19B1C7ED3F78706), + UINT64_C(0xB09A0B882651B4BB), UINT64_C(0x7BF761196CB0BF84), + UINT64_C(0x2641DFAAB393A3C5), UINT64_C(0xED2CB53BF972A8FA), + UINT64_C(0x415DFFE973711C2D), UINT64_C(0x8A30957839901712), + UINT64_C(0xD7862BCBE6B30B53), UINT64_C(0x1CEB415AAC52006C), + UINT64_C(0x6DEA56AC59F433D1), UINT64_C(0xA6873C3D131538EE), + UINT64_C(0xFB31828ECC3624AF), UINT64_C(0x305CE81F86D72F90), + UINT64_C(0xD71163BB25A452AA), UINT64_C(0x1C7C092A6F455995), + UINT64_C(0x41CAB799B06645D4), UINT64_C(0x8AA7DD08FA874EEB), + UINT64_C(0xFBA6CAFE0F217D56), UINT64_C(0x30CBA06F45C07669), + UINT64_C(0x6D7D1EDC9AE36A28), UINT64_C(0xA610744DD0026117), + UINT64_C(0x0A613E9F5A01D5C0), UINT64_C(0xC10C540E10E0DEFF), + UINT64_C(0x9CBAEABDCFC3C2BE), UINT64_C(0x57D7802C8522C981), + UINT64_C(0x26D697DA7084FA3C), UINT64_C(0xEDBBFD4B3A65F103), + UINT64_C(0xB00D43F8E546ED42), UINT64_C(0x7B602969AFA7E67D), + UINT64_C(0x6DF0D9F3DBEE5D7F), UINT64_C(0xA69DB362910F5640), + UINT64_C(0xFB2B0DD14E2C4A01), UINT64_C(0x3046674004CD413E), + UINT64_C(0x414770B6F16B7283), UINT64_C(0x8A2A1A27BB8A79BC), + UINT64_C(0xD79CA49464A965FD), UINT64_C(0x1CF1CE052E486EC2), + UINT64_C(0xB08084D7A44BDA15), UINT64_C(0x7BEDEE46EEAAD12A), + UINT64_C(0x265B50F53189CD6B), UINT64_C(0xED363A647B68C654), + UINT64_C(0x9C372D928ECEF5E9), UINT64_C(0x575A4703C42FFED6), + UINT64_C(0x0AECF9B01B0CE297), UINT64_C(0xC181932151EDE9A8) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0xDCA12C225E8AEE1D), + UINT64_C(0xB8435944BC14DD3B), UINT64_C(0x64E27566E29E3326), + UINT64_C(0x7087B2887829BA77), UINT64_C(0xAC269EAA26A3546A), + UINT64_C(0xC8C4EBCCC43D674C), UINT64_C(0x1465C7EE9AB78951), + UINT64_C(0xE00E6511F15274EF), UINT64_C(0x3CAF4933AFD89AF2), + UINT64_C(0x584D3C554D46A9D4), UINT64_C(0x84EC107713CC47C9), + UINT64_C(0x9089D799897BCE98), UINT64_C(0x4C28FBBBD7F12085), + UINT64_C(0x28CA8EDD356F13A3), UINT64_C(0xF46BA2FF6BE5FDBE), + UINT64_C(0x4503C48DC90A304C), UINT64_C(0x99A2E8AF9780DE51), + UINT64_C(0xFD409DC9751EED77), UINT64_C(0x21E1B1EB2B94036A), + UINT64_C(0x35847605B1238A3B), UINT64_C(0xE9255A27EFA96426), + UINT64_C(0x8DC72F410D375700), UINT64_C(0x5166036353BDB91D), + UINT64_C(0xA50DA19C385844A3), UINT64_C(0x79AC8DBE66D2AABE), + UINT64_C(0x1D4EF8D8844C9998), UINT64_C(0xC1EFD4FADAC67785), + UINT64_C(0xD58A13144071FED4), UINT64_C(0x092B3F361EFB10C9), + UINT64_C(0x6DC94A50FC6523EF), UINT64_C(0xB1686672A2EFCDF2), + UINT64_C(0x8A06881B93156098), UINT64_C(0x56A7A439CD9F8E85), + UINT64_C(0x3245D15F2F01BDA3), UINT64_C(0xEEE4FD7D718B53BE), + UINT64_C(0xFA813A93EB3CDAEF), UINT64_C(0x262016B1B5B634F2), + UINT64_C(0x42C263D7572807D4), UINT64_C(0x9E634FF509A2E9C9), + UINT64_C(0x6A08ED0A62471477), UINT64_C(0xB6A9C1283CCDFA6A), + UINT64_C(0xD24BB44EDE53C94C), UINT64_C(0x0EEA986C80D92751), + UINT64_C(0x1A8F5F821A6EAE00), UINT64_C(0xC62E73A044E4401D), + UINT64_C(0xA2CC06C6A67A733B), UINT64_C(0x7E6D2AE4F8F09D26), + UINT64_C(0xCF054C965A1F50D4), UINT64_C(0x13A460B40495BEC9), + UINT64_C(0x774615D2E60B8DEF), UINT64_C(0xABE739F0B88163F2), + UINT64_C(0xBF82FE1E2236EAA3), UINT64_C(0x6323D23C7CBC04BE), + UINT64_C(0x07C1A75A9E223798), UINT64_C(0xDB608B78C0A8D985), + UINT64_C(0x2F0B2987AB4D243B), UINT64_C(0xF3AA05A5F5C7CA26), + UINT64_C(0x974870C31759F900), UINT64_C(0x4BE95CE149D3171D), + UINT64_C(0x5F8C9B0FD3649E4C), UINT64_C(0x832DB72D8DEE7051), + UINT64_C(0xE7CFC24B6F704377), UINT64_C(0x3B6EEE6931FAAD6A), + UINT64_C(0x91131E980D8418A2), UINT64_C(0x4DB232BA530EF6BF), + UINT64_C(0x295047DCB190C599), UINT64_C(0xF5F16BFEEF1A2B84), + UINT64_C(0xE194AC1075ADA2D5), UINT64_C(0x3D3580322B274CC8), + UINT64_C(0x59D7F554C9B97FEE), UINT64_C(0x8576D976973391F3), + UINT64_C(0x711D7B89FCD66C4D), UINT64_C(0xADBC57ABA25C8250), + UINT64_C(0xC95E22CD40C2B176), UINT64_C(0x15FF0EEF1E485F6B), + UINT64_C(0x019AC90184FFD63A), UINT64_C(0xDD3BE523DA753827), + UINT64_C(0xB9D9904538EB0B01), UINT64_C(0x6578BC676661E51C), + UINT64_C(0xD410DA15C48E28EE), UINT64_C(0x08B1F6379A04C6F3), + UINT64_C(0x6C538351789AF5D5), UINT64_C(0xB0F2AF7326101BC8), + UINT64_C(0xA497689DBCA79299), UINT64_C(0x783644BFE22D7C84), + UINT64_C(0x1CD431D900B34FA2), UINT64_C(0xC0751DFB5E39A1BF), + UINT64_C(0x341EBF0435DC5C01), UINT64_C(0xE8BF93266B56B21C), + UINT64_C(0x8C5DE64089C8813A), UINT64_C(0x50FCCA62D7426F27), + UINT64_C(0x44990D8C4DF5E676), UINT64_C(0x983821AE137F086B), + UINT64_C(0xFCDA54C8F1E13B4D), UINT64_C(0x207B78EAAF6BD550), + UINT64_C(0x1B1596839E91783A), UINT64_C(0xC7B4BAA1C01B9627), + UINT64_C(0xA356CFC72285A501), UINT64_C(0x7FF7E3E57C0F4B1C), + UINT64_C(0x6B92240BE6B8C24D), UINT64_C(0xB7330829B8322C50), + UINT64_C(0xD3D17D4F5AAC1F76), UINT64_C(0x0F70516D0426F16B), + UINT64_C(0xFB1BF3926FC30CD5), UINT64_C(0x27BADFB03149E2C8), + UINT64_C(0x4358AAD6D3D7D1EE), UINT64_C(0x9FF986F48D5D3FF3), + UINT64_C(0x8B9C411A17EAB6A2), UINT64_C(0x573D6D38496058BF), + UINT64_C(0x33DF185EABFE6B99), UINT64_C(0xEF7E347CF5748584), + UINT64_C(0x5E16520E579B4876), UINT64_C(0x82B77E2C0911A66B), + UINT64_C(0xE6550B4AEB8F954D), UINT64_C(0x3AF42768B5057B50), + UINT64_C(0x2E91E0862FB2F201), UINT64_C(0xF230CCA471381C1C), + UINT64_C(0x96D2B9C293A62F3A), UINT64_C(0x4A7395E0CD2CC127), + UINT64_C(0xBE18371FA6C93C99), UINT64_C(0x62B91B3DF843D284), + UINT64_C(0x065B6E5B1ADDE1A2), UINT64_C(0xDAFA427944570FBF), + UINT64_C(0xCE9F8597DEE086EE), UINT64_C(0x123EA9B5806A68F3), + UINT64_C(0x76DCDCD362F45BD5), UINT64_C(0xAA7DF0F13C7EB5C8), + UINT64_C(0xA739329F30A7E9D6), UINT64_C(0x7B981EBD6E2D07CB), + UINT64_C(0x1F7A6BDB8CB334ED), UINT64_C(0xC3DB47F9D239DAF0), + UINT64_C(0xD7BE8017488E53A1), UINT64_C(0x0B1FAC351604BDBC), + UINT64_C(0x6FFDD953F49A8E9A), UINT64_C(0xB35CF571AA106087), + UINT64_C(0x4737578EC1F59D39), UINT64_C(0x9B967BAC9F7F7324), + UINT64_C(0xFF740ECA7DE14002), UINT64_C(0x23D522E8236BAE1F), + UINT64_C(0x37B0E506B9DC274E), UINT64_C(0xEB11C924E756C953), + UINT64_C(0x8FF3BC4205C8FA75), UINT64_C(0x535290605B421468), + UINT64_C(0xE23AF612F9ADD99A), UINT64_C(0x3E9BDA30A7273787), + UINT64_C(0x5A79AF5645B904A1), UINT64_C(0x86D883741B33EABC), + UINT64_C(0x92BD449A818463ED), UINT64_C(0x4E1C68B8DF0E8DF0), + UINT64_C(0x2AFE1DDE3D90BED6), UINT64_C(0xF65F31FC631A50CB), + UINT64_C(0x0234930308FFAD75), UINT64_C(0xDE95BF2156754368), + UINT64_C(0xBA77CA47B4EB704E), UINT64_C(0x66D6E665EA619E53), + UINT64_C(0x72B3218B70D61702), UINT64_C(0xAE120DA92E5CF91F), + UINT64_C(0xCAF078CFCCC2CA39), UINT64_C(0x165154ED92482424), + UINT64_C(0x2D3FBA84A3B2894E), UINT64_C(0xF19E96A6FD386753), + UINT64_C(0x957CE3C01FA65475), UINT64_C(0x49DDCFE2412CBA68), + UINT64_C(0x5DB8080CDB9B3339), UINT64_C(0x8119242E8511DD24), + UINT64_C(0xE5FB5148678FEE02), UINT64_C(0x395A7D6A3905001F), + UINT64_C(0xCD31DF9552E0FDA1), UINT64_C(0x1190F3B70C6A13BC), + UINT64_C(0x757286D1EEF4209A), UINT64_C(0xA9D3AAF3B07ECE87), + UINT64_C(0xBDB66D1D2AC947D6), UINT64_C(0x6117413F7443A9CB), + UINT64_C(0x05F5345996DD9AED), UINT64_C(0xD954187BC85774F0), + UINT64_C(0x683C7E096AB8B902), UINT64_C(0xB49D522B3432571F), + UINT64_C(0xD07F274DD6AC6439), UINT64_C(0x0CDE0B6F88268A24), + UINT64_C(0x18BBCC8112910375), UINT64_C(0xC41AE0A34C1BED68), + UINT64_C(0xA0F895C5AE85DE4E), UINT64_C(0x7C59B9E7F00F3053), + UINT64_C(0x88321B189BEACDED), UINT64_C(0x5493373AC56023F0), + UINT64_C(0x3071425C27FE10D6), UINT64_C(0xECD06E7E7974FECB), + UINT64_C(0xF8B5A990E3C3779A), UINT64_C(0x241485B2BD499987), + UINT64_C(0x40F6F0D45FD7AAA1), UINT64_C(0x9C57DCF6015D44BC), + UINT64_C(0x362A2C073D23F174), UINT64_C(0xEA8B002563A91F69), + UINT64_C(0x8E69754381372C4F), UINT64_C(0x52C85961DFBDC252), + UINT64_C(0x46AD9E8F450A4B03), UINT64_C(0x9A0CB2AD1B80A51E), + UINT64_C(0xFEEEC7CBF91E9638), UINT64_C(0x224FEBE9A7947825), + UINT64_C(0xD6244916CC71859B), UINT64_C(0x0A85653492FB6B86), + UINT64_C(0x6E671052706558A0), UINT64_C(0xB2C63C702EEFB6BD), + UINT64_C(0xA6A3FB9EB4583FEC), UINT64_C(0x7A02D7BCEAD2D1F1), + UINT64_C(0x1EE0A2DA084CE2D7), UINT64_C(0xC2418EF856C60CCA), + UINT64_C(0x7329E88AF429C138), UINT64_C(0xAF88C4A8AAA32F25), + UINT64_C(0xCB6AB1CE483D1C03), UINT64_C(0x17CB9DEC16B7F21E), + UINT64_C(0x03AE5A028C007B4F), UINT64_C(0xDF0F7620D28A9552), + UINT64_C(0xBBED03463014A674), UINT64_C(0x674C2F646E9E4869), + UINT64_C(0x93278D9B057BB5D7), UINT64_C(0x4F86A1B95BF15BCA), + UINT64_C(0x2B64D4DFB96F68EC), UINT64_C(0xF7C5F8FDE7E586F1), + UINT64_C(0xE3A03F137D520FA0), UINT64_C(0x3F01133123D8E1BD), + UINT64_C(0x5BE36657C146D29B), UINT64_C(0x87424A759FCC3C86), + UINT64_C(0xBC2CA41CAE3691EC), UINT64_C(0x608D883EF0BC7FF1), + UINT64_C(0x046FFD5812224CD7), UINT64_C(0xD8CED17A4CA8A2CA), + UINT64_C(0xCCAB1694D61F2B9B), UINT64_C(0x100A3AB68895C586), + UINT64_C(0x74E84FD06A0BF6A0), UINT64_C(0xA84963F2348118BD), + UINT64_C(0x5C22C10D5F64E503), UINT64_C(0x8083ED2F01EE0B1E), + UINT64_C(0xE4619849E3703838), UINT64_C(0x38C0B46BBDFAD625), + UINT64_C(0x2CA57385274D5F74), UINT64_C(0xF0045FA779C7B169), + UINT64_C(0x94E62AC19B59824F), UINT64_C(0x484706E3C5D36C52), + UINT64_C(0xF92F6091673CA1A0), UINT64_C(0x258E4CB339B64FBD), + UINT64_C(0x416C39D5DB287C9B), UINT64_C(0x9DCD15F785A29286), + UINT64_C(0x89A8D2191F151BD7), UINT64_C(0x5509FE3B419FF5CA), + UINT64_C(0x31EB8B5DA301C6EC), UINT64_C(0xED4AA77FFD8B28F1), + UINT64_C(0x19210580966ED54F), UINT64_C(0xC58029A2C8E43B52), + UINT64_C(0xA1625CC42A7A0874), UINT64_C(0x7DC370E674F0E669), + UINT64_C(0x69A6B708EE476F38), UINT64_C(0xB5079B2AB0CD8125), + UINT64_C(0xD1E5EE4C5253B203), UINT64_C(0x0D44C26E0CD95C1E) + } +}; diff --git a/src/liblzma/check/crc64_table_le.h b/src/liblzma/check/crc64_table_le.h new file mode 100644 index 00000000..3047ea16 --- /dev/null +++ b/src/liblzma/check/crc64_table_le.h @@ -0,0 +1,523 @@ +/* This file has been automatically generated by crc64_tablegen.c. */ + +#include <inttypes.h> + +const uint64_t lzma_crc64_table[4][256] = { + { + UINT64_C(0x0000000000000000), UINT64_C(0xB32E4CBE03A75F6F), + UINT64_C(0xF4843657A840A05B), UINT64_C(0x47AA7AE9ABE7FF34), + UINT64_C(0x7BD0C384FF8F5E33), UINT64_C(0xC8FE8F3AFC28015C), + UINT64_C(0x8F54F5D357CFFE68), UINT64_C(0x3C7AB96D5468A107), + UINT64_C(0xF7A18709FF1EBC66), UINT64_C(0x448FCBB7FCB9E309), + UINT64_C(0x0325B15E575E1C3D), UINT64_C(0xB00BFDE054F94352), + UINT64_C(0x8C71448D0091E255), UINT64_C(0x3F5F08330336BD3A), + UINT64_C(0x78F572DAA8D1420E), UINT64_C(0xCBDB3E64AB761D61), + UINT64_C(0x7D9BA13851336649), UINT64_C(0xCEB5ED8652943926), + UINT64_C(0x891F976FF973C612), UINT64_C(0x3A31DBD1FAD4997D), + UINT64_C(0x064B62BCAEBC387A), UINT64_C(0xB5652E02AD1B6715), + UINT64_C(0xF2CF54EB06FC9821), UINT64_C(0x41E11855055BC74E), + UINT64_C(0x8A3A2631AE2DDA2F), UINT64_C(0x39146A8FAD8A8540), + UINT64_C(0x7EBE1066066D7A74), UINT64_C(0xCD905CD805CA251B), + UINT64_C(0xF1EAE5B551A2841C), UINT64_C(0x42C4A90B5205DB73), + UINT64_C(0x056ED3E2F9E22447), UINT64_C(0xB6409F5CFA457B28), + UINT64_C(0xFB374270A266CC92), UINT64_C(0x48190ECEA1C193FD), + UINT64_C(0x0FB374270A266CC9), UINT64_C(0xBC9D3899098133A6), + UINT64_C(0x80E781F45DE992A1), UINT64_C(0x33C9CD4A5E4ECDCE), + UINT64_C(0x7463B7A3F5A932FA), UINT64_C(0xC74DFB1DF60E6D95), + UINT64_C(0x0C96C5795D7870F4), UINT64_C(0xBFB889C75EDF2F9B), + UINT64_C(0xF812F32EF538D0AF), UINT64_C(0x4B3CBF90F69F8FC0), + UINT64_C(0x774606FDA2F72EC7), UINT64_C(0xC4684A43A15071A8), + UINT64_C(0x83C230AA0AB78E9C), UINT64_C(0x30EC7C140910D1F3), + UINT64_C(0x86ACE348F355AADB), UINT64_C(0x3582AFF6F0F2F5B4), + UINT64_C(0x7228D51F5B150A80), UINT64_C(0xC10699A158B255EF), + UINT64_C(0xFD7C20CC0CDAF4E8), UINT64_C(0x4E526C720F7DAB87), + UINT64_C(0x09F8169BA49A54B3), UINT64_C(0xBAD65A25A73D0BDC), + UINT64_C(0x710D64410C4B16BD), UINT64_C(0xC22328FF0FEC49D2), + UINT64_C(0x85895216A40BB6E6), UINT64_C(0x36A71EA8A7ACE989), + UINT64_C(0x0ADDA7C5F3C4488E), UINT64_C(0xB9F3EB7BF06317E1), + UINT64_C(0xFE5991925B84E8D5), UINT64_C(0x4D77DD2C5823B7BA), + UINT64_C(0x64B62BCAEBC387A1), UINT64_C(0xD7986774E864D8CE), + UINT64_C(0x90321D9D438327FA), UINT64_C(0x231C512340247895), + UINT64_C(0x1F66E84E144CD992), UINT64_C(0xAC48A4F017EB86FD), + UINT64_C(0xEBE2DE19BC0C79C9), UINT64_C(0x58CC92A7BFAB26A6), + UINT64_C(0x9317ACC314DD3BC7), UINT64_C(0x2039E07D177A64A8), + UINT64_C(0x67939A94BC9D9B9C), UINT64_C(0xD4BDD62ABF3AC4F3), + UINT64_C(0xE8C76F47EB5265F4), UINT64_C(0x5BE923F9E8F53A9B), + UINT64_C(0x1C4359104312C5AF), UINT64_C(0xAF6D15AE40B59AC0), + UINT64_C(0x192D8AF2BAF0E1E8), UINT64_C(0xAA03C64CB957BE87), + UINT64_C(0xEDA9BCA512B041B3), UINT64_C(0x5E87F01B11171EDC), + UINT64_C(0x62FD4976457FBFDB), UINT64_C(0xD1D305C846D8E0B4), + UINT64_C(0x96797F21ED3F1F80), UINT64_C(0x2557339FEE9840EF), + UINT64_C(0xEE8C0DFB45EE5D8E), UINT64_C(0x5DA24145464902E1), + UINT64_C(0x1A083BACEDAEFDD5), UINT64_C(0xA9267712EE09A2BA), + UINT64_C(0x955CCE7FBA6103BD), UINT64_C(0x267282C1B9C65CD2), + UINT64_C(0x61D8F8281221A3E6), UINT64_C(0xD2F6B4961186FC89), + UINT64_C(0x9F8169BA49A54B33), UINT64_C(0x2CAF25044A02145C), + UINT64_C(0x6B055FEDE1E5EB68), UINT64_C(0xD82B1353E242B407), + UINT64_C(0xE451AA3EB62A1500), UINT64_C(0x577FE680B58D4A6F), + UINT64_C(0x10D59C691E6AB55B), UINT64_C(0xA3FBD0D71DCDEA34), + UINT64_C(0x6820EEB3B6BBF755), UINT64_C(0xDB0EA20DB51CA83A), + UINT64_C(0x9CA4D8E41EFB570E), UINT64_C(0x2F8A945A1D5C0861), + UINT64_C(0x13F02D374934A966), UINT64_C(0xA0DE61894A93F609), + UINT64_C(0xE7741B60E174093D), UINT64_C(0x545A57DEE2D35652), + UINT64_C(0xE21AC88218962D7A), UINT64_C(0x5134843C1B317215), + UINT64_C(0x169EFED5B0D68D21), UINT64_C(0xA5B0B26BB371D24E), + UINT64_C(0x99CA0B06E7197349), UINT64_C(0x2AE447B8E4BE2C26), + UINT64_C(0x6D4E3D514F59D312), UINT64_C(0xDE6071EF4CFE8C7D), + UINT64_C(0x15BB4F8BE788911C), UINT64_C(0xA6950335E42FCE73), + UINT64_C(0xE13F79DC4FC83147), UINT64_C(0x521135624C6F6E28), + UINT64_C(0x6E6B8C0F1807CF2F), UINT64_C(0xDD45C0B11BA09040), + UINT64_C(0x9AEFBA58B0476F74), UINT64_C(0x29C1F6E6B3E0301B), + UINT64_C(0xC96C5795D7870F42), UINT64_C(0x7A421B2BD420502D), + UINT64_C(0x3DE861C27FC7AF19), UINT64_C(0x8EC62D7C7C60F076), + UINT64_C(0xB2BC941128085171), UINT64_C(0x0192D8AF2BAF0E1E), + UINT64_C(0x4638A2468048F12A), UINT64_C(0xF516EEF883EFAE45), + UINT64_C(0x3ECDD09C2899B324), UINT64_C(0x8DE39C222B3EEC4B), + UINT64_C(0xCA49E6CB80D9137F), UINT64_C(0x7967AA75837E4C10), + UINT64_C(0x451D1318D716ED17), UINT64_C(0xF6335FA6D4B1B278), + UINT64_C(0xB199254F7F564D4C), UINT64_C(0x02B769F17CF11223), + UINT64_C(0xB4F7F6AD86B4690B), UINT64_C(0x07D9BA1385133664), + UINT64_C(0x4073C0FA2EF4C950), UINT64_C(0xF35D8C442D53963F), + UINT64_C(0xCF273529793B3738), UINT64_C(0x7C0979977A9C6857), + UINT64_C(0x3BA3037ED17B9763), UINT64_C(0x888D4FC0D2DCC80C), + UINT64_C(0x435671A479AAD56D), UINT64_C(0xF0783D1A7A0D8A02), + UINT64_C(0xB7D247F3D1EA7536), UINT64_C(0x04FC0B4DD24D2A59), + UINT64_C(0x3886B22086258B5E), UINT64_C(0x8BA8FE9E8582D431), + UINT64_C(0xCC0284772E652B05), UINT64_C(0x7F2CC8C92DC2746A), + UINT64_C(0x325B15E575E1C3D0), UINT64_C(0x8175595B76469CBF), + UINT64_C(0xC6DF23B2DDA1638B), UINT64_C(0x75F16F0CDE063CE4), + UINT64_C(0x498BD6618A6E9DE3), UINT64_C(0xFAA59ADF89C9C28C), + UINT64_C(0xBD0FE036222E3DB8), UINT64_C(0x0E21AC88218962D7), + UINT64_C(0xC5FA92EC8AFF7FB6), UINT64_C(0x76D4DE52895820D9), + UINT64_C(0x317EA4BB22BFDFED), UINT64_C(0x8250E80521188082), + UINT64_C(0xBE2A516875702185), UINT64_C(0x0D041DD676D77EEA), + UINT64_C(0x4AAE673FDD3081DE), UINT64_C(0xF9802B81DE97DEB1), + UINT64_C(0x4FC0B4DD24D2A599), UINT64_C(0xFCEEF8632775FAF6), + UINT64_C(0xBB44828A8C9205C2), UINT64_C(0x086ACE348F355AAD), + UINT64_C(0x34107759DB5DFBAA), UINT64_C(0x873E3BE7D8FAA4C5), + UINT64_C(0xC094410E731D5BF1), UINT64_C(0x73BA0DB070BA049E), + UINT64_C(0xB86133D4DBCC19FF), UINT64_C(0x0B4F7F6AD86B4690), + UINT64_C(0x4CE50583738CB9A4), UINT64_C(0xFFCB493D702BE6CB), + UINT64_C(0xC3B1F050244347CC), UINT64_C(0x709FBCEE27E418A3), + UINT64_C(0x3735C6078C03E797), UINT64_C(0x841B8AB98FA4B8F8), + UINT64_C(0xADDA7C5F3C4488E3), UINT64_C(0x1EF430E13FE3D78C), + UINT64_C(0x595E4A08940428B8), UINT64_C(0xEA7006B697A377D7), + UINT64_C(0xD60ABFDBC3CBD6D0), UINT64_C(0x6524F365C06C89BF), + UINT64_C(0x228E898C6B8B768B), UINT64_C(0x91A0C532682C29E4), + UINT64_C(0x5A7BFB56C35A3485), UINT64_C(0xE955B7E8C0FD6BEA), + UINT64_C(0xAEFFCD016B1A94DE), UINT64_C(0x1DD181BF68BDCBB1), + UINT64_C(0x21AB38D23CD56AB6), UINT64_C(0x9285746C3F7235D9), + UINT64_C(0xD52F0E859495CAED), UINT64_C(0x6601423B97329582), + UINT64_C(0xD041DD676D77EEAA), UINT64_C(0x636F91D96ED0B1C5), + UINT64_C(0x24C5EB30C5374EF1), UINT64_C(0x97EBA78EC690119E), + UINT64_C(0xAB911EE392F8B099), UINT64_C(0x18BF525D915FEFF6), + UINT64_C(0x5F1528B43AB810C2), UINT64_C(0xEC3B640A391F4FAD), + UINT64_C(0x27E05A6E926952CC), UINT64_C(0x94CE16D091CE0DA3), + UINT64_C(0xD3646C393A29F297), UINT64_C(0x604A2087398EADF8), + UINT64_C(0x5C3099EA6DE60CFF), UINT64_C(0xEF1ED5546E415390), + UINT64_C(0xA8B4AFBDC5A6ACA4), UINT64_C(0x1B9AE303C601F3CB), + UINT64_C(0x56ED3E2F9E224471), UINT64_C(0xE5C372919D851B1E), + UINT64_C(0xA26908783662E42A), UINT64_C(0x114744C635C5BB45), + UINT64_C(0x2D3DFDAB61AD1A42), UINT64_C(0x9E13B115620A452D), + UINT64_C(0xD9B9CBFCC9EDBA19), UINT64_C(0x6A978742CA4AE576), + UINT64_C(0xA14CB926613CF817), UINT64_C(0x1262F598629BA778), + UINT64_C(0x55C88F71C97C584C), UINT64_C(0xE6E6C3CFCADB0723), + UINT64_C(0xDA9C7AA29EB3A624), UINT64_C(0x69B2361C9D14F94B), + UINT64_C(0x2E184CF536F3067F), UINT64_C(0x9D36004B35545910), + UINT64_C(0x2B769F17CF112238), UINT64_C(0x9858D3A9CCB67D57), + UINT64_C(0xDFF2A94067518263), UINT64_C(0x6CDCE5FE64F6DD0C), + UINT64_C(0x50A65C93309E7C0B), UINT64_C(0xE388102D33392364), + UINT64_C(0xA4226AC498DEDC50), UINT64_C(0x170C267A9B79833F), + UINT64_C(0xDCD7181E300F9E5E), UINT64_C(0x6FF954A033A8C131), + UINT64_C(0x28532E49984F3E05), UINT64_C(0x9B7D62F79BE8616A), + UINT64_C(0xA707DB9ACF80C06D), UINT64_C(0x14299724CC279F02), + UINT64_C(0x5383EDCD67C06036), UINT64_C(0xE0ADA17364673F59) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x54E979925CD0F10D), + UINT64_C(0xA9D2F324B9A1E21A), UINT64_C(0xFD3B8AB6E5711317), + UINT64_C(0xC17D4962DC4DDAB1), UINT64_C(0x959430F0809D2BBC), + UINT64_C(0x68AFBA4665EC38AB), UINT64_C(0x3C46C3D4393CC9A6), + UINT64_C(0x10223DEE1795ABE7), UINT64_C(0x44CB447C4B455AEA), + UINT64_C(0xB9F0CECAAE3449FD), UINT64_C(0xED19B758F2E4B8F0), + UINT64_C(0xD15F748CCBD87156), UINT64_C(0x85B60D1E9708805B), + UINT64_C(0x788D87A87279934C), UINT64_C(0x2C64FE3A2EA96241), + UINT64_C(0x20447BDC2F2B57CE), UINT64_C(0x74AD024E73FBA6C3), + UINT64_C(0x899688F8968AB5D4), UINT64_C(0xDD7FF16ACA5A44D9), + UINT64_C(0xE13932BEF3668D7F), UINT64_C(0xB5D04B2CAFB67C72), + UINT64_C(0x48EBC19A4AC76F65), UINT64_C(0x1C02B80816179E68), + UINT64_C(0x3066463238BEFC29), UINT64_C(0x648F3FA0646E0D24), + UINT64_C(0x99B4B516811F1E33), UINT64_C(0xCD5DCC84DDCFEF3E), + UINT64_C(0xF11B0F50E4F32698), UINT64_C(0xA5F276C2B823D795), + UINT64_C(0x58C9FC745D52C482), UINT64_C(0x0C2085E60182358F), + UINT64_C(0x4088F7B85E56AF9C), UINT64_C(0x14618E2A02865E91), + UINT64_C(0xE95A049CE7F74D86), UINT64_C(0xBDB37D0EBB27BC8B), + UINT64_C(0x81F5BEDA821B752D), UINT64_C(0xD51CC748DECB8420), + UINT64_C(0x28274DFE3BBA9737), UINT64_C(0x7CCE346C676A663A), + UINT64_C(0x50AACA5649C3047B), UINT64_C(0x0443B3C41513F576), + UINT64_C(0xF9783972F062E661), UINT64_C(0xAD9140E0ACB2176C), + UINT64_C(0x91D78334958EDECA), UINT64_C(0xC53EFAA6C95E2FC7), + UINT64_C(0x380570102C2F3CD0), UINT64_C(0x6CEC098270FFCDDD), + UINT64_C(0x60CC8C64717DF852), UINT64_C(0x3425F5F62DAD095F), + UINT64_C(0xC91E7F40C8DC1A48), UINT64_C(0x9DF706D2940CEB45), + UINT64_C(0xA1B1C506AD3022E3), UINT64_C(0xF558BC94F1E0D3EE), + UINT64_C(0x086336221491C0F9), UINT64_C(0x5C8A4FB0484131F4), + UINT64_C(0x70EEB18A66E853B5), UINT64_C(0x2407C8183A38A2B8), + UINT64_C(0xD93C42AEDF49B1AF), UINT64_C(0x8DD53B3C839940A2), + UINT64_C(0xB193F8E8BAA58904), UINT64_C(0xE57A817AE6757809), + UINT64_C(0x18410BCC03046B1E), UINT64_C(0x4CA8725E5FD49A13), + UINT64_C(0x8111EF70BCAD5F38), UINT64_C(0xD5F896E2E07DAE35), + UINT64_C(0x28C31C54050CBD22), UINT64_C(0x7C2A65C659DC4C2F), + UINT64_C(0x406CA61260E08589), UINT64_C(0x1485DF803C307484), + UINT64_C(0xE9BE5536D9416793), UINT64_C(0xBD572CA48591969E), + UINT64_C(0x9133D29EAB38F4DF), UINT64_C(0xC5DAAB0CF7E805D2), + UINT64_C(0x38E121BA129916C5), UINT64_C(0x6C0858284E49E7C8), + UINT64_C(0x504E9BFC77752E6E), UINT64_C(0x04A7E26E2BA5DF63), + UINT64_C(0xF99C68D8CED4CC74), UINT64_C(0xAD75114A92043D79), + UINT64_C(0xA15594AC938608F6), UINT64_C(0xF5BCED3ECF56F9FB), + UINT64_C(0x088767882A27EAEC), UINT64_C(0x5C6E1E1A76F71BE1), + UINT64_C(0x6028DDCE4FCBD247), UINT64_C(0x34C1A45C131B234A), + UINT64_C(0xC9FA2EEAF66A305D), UINT64_C(0x9D135778AABAC150), + UINT64_C(0xB177A9428413A311), UINT64_C(0xE59ED0D0D8C3521C), + UINT64_C(0x18A55A663DB2410B), UINT64_C(0x4C4C23F46162B006), + UINT64_C(0x700AE020585E79A0), UINT64_C(0x24E399B2048E88AD), + UINT64_C(0xD9D81304E1FF9BBA), UINT64_C(0x8D316A96BD2F6AB7), + UINT64_C(0xC19918C8E2FBF0A4), UINT64_C(0x9570615ABE2B01A9), + UINT64_C(0x684BEBEC5B5A12BE), UINT64_C(0x3CA2927E078AE3B3), + UINT64_C(0x00E451AA3EB62A15), UINT64_C(0x540D28386266DB18), + UINT64_C(0xA936A28E8717C80F), UINT64_C(0xFDDFDB1CDBC73902), + UINT64_C(0xD1BB2526F56E5B43), UINT64_C(0x85525CB4A9BEAA4E), + UINT64_C(0x7869D6024CCFB959), UINT64_C(0x2C80AF90101F4854), + UINT64_C(0x10C66C44292381F2), UINT64_C(0x442F15D675F370FF), + UINT64_C(0xB9149F60908263E8), UINT64_C(0xEDFDE6F2CC5292E5), + UINT64_C(0xE1DD6314CDD0A76A), UINT64_C(0xB5341A8691005667), + UINT64_C(0x480F903074714570), UINT64_C(0x1CE6E9A228A1B47D), + UINT64_C(0x20A02A76119D7DDB), UINT64_C(0x744953E44D4D8CD6), + UINT64_C(0x8972D952A83C9FC1), UINT64_C(0xDD9BA0C0F4EC6ECC), + UINT64_C(0xF1FF5EFADA450C8D), UINT64_C(0xA51627688695FD80), + UINT64_C(0x582DADDE63E4EE97), UINT64_C(0x0CC4D44C3F341F9A), + UINT64_C(0x308217980608D63C), UINT64_C(0x646B6E0A5AD82731), + UINT64_C(0x9950E4BCBFA93426), UINT64_C(0xCDB99D2EE379C52B), + UINT64_C(0x90FB71CAD654A0F5), UINT64_C(0xC41208588A8451F8), + UINT64_C(0x392982EE6FF542EF), UINT64_C(0x6DC0FB7C3325B3E2), + UINT64_C(0x518638A80A197A44), UINT64_C(0x056F413A56C98B49), + UINT64_C(0xF854CB8CB3B8985E), UINT64_C(0xACBDB21EEF686953), + UINT64_C(0x80D94C24C1C10B12), UINT64_C(0xD43035B69D11FA1F), + UINT64_C(0x290BBF007860E908), UINT64_C(0x7DE2C69224B01805), + UINT64_C(0x41A405461D8CD1A3), UINT64_C(0x154D7CD4415C20AE), + UINT64_C(0xE876F662A42D33B9), UINT64_C(0xBC9F8FF0F8FDC2B4), + UINT64_C(0xB0BF0A16F97FF73B), UINT64_C(0xE4567384A5AF0636), + UINT64_C(0x196DF93240DE1521), UINT64_C(0x4D8480A01C0EE42C), + UINT64_C(0x71C2437425322D8A), UINT64_C(0x252B3AE679E2DC87), + UINT64_C(0xD810B0509C93CF90), UINT64_C(0x8CF9C9C2C0433E9D), + UINT64_C(0xA09D37F8EEEA5CDC), UINT64_C(0xF4744E6AB23AADD1), + UINT64_C(0x094FC4DC574BBEC6), UINT64_C(0x5DA6BD4E0B9B4FCB), + UINT64_C(0x61E07E9A32A7866D), UINT64_C(0x350907086E777760), + UINT64_C(0xC8328DBE8B066477), UINT64_C(0x9CDBF42CD7D6957A), + UINT64_C(0xD073867288020F69), UINT64_C(0x849AFFE0D4D2FE64), + UINT64_C(0x79A1755631A3ED73), UINT64_C(0x2D480CC46D731C7E), + UINT64_C(0x110ECF10544FD5D8), UINT64_C(0x45E7B682089F24D5), + UINT64_C(0xB8DC3C34EDEE37C2), UINT64_C(0xEC3545A6B13EC6CF), + UINT64_C(0xC051BB9C9F97A48E), UINT64_C(0x94B8C20EC3475583), + UINT64_C(0x698348B826364694), UINT64_C(0x3D6A312A7AE6B799), + UINT64_C(0x012CF2FE43DA7E3F), UINT64_C(0x55C58B6C1F0A8F32), + UINT64_C(0xA8FE01DAFA7B9C25), UINT64_C(0xFC177848A6AB6D28), + UINT64_C(0xF037FDAEA72958A7), UINT64_C(0xA4DE843CFBF9A9AA), + UINT64_C(0x59E50E8A1E88BABD), UINT64_C(0x0D0C771842584BB0), + UINT64_C(0x314AB4CC7B648216), UINT64_C(0x65A3CD5E27B4731B), + UINT64_C(0x989847E8C2C5600C), UINT64_C(0xCC713E7A9E159101), + UINT64_C(0xE015C040B0BCF340), UINT64_C(0xB4FCB9D2EC6C024D), + UINT64_C(0x49C73364091D115A), UINT64_C(0x1D2E4AF655CDE057), + UINT64_C(0x216889226CF129F1), UINT64_C(0x7581F0B03021D8FC), + UINT64_C(0x88BA7A06D550CBEB), UINT64_C(0xDC53039489803AE6), + UINT64_C(0x11EA9EBA6AF9FFCD), UINT64_C(0x4503E72836290EC0), + UINT64_C(0xB8386D9ED3581DD7), UINT64_C(0xECD1140C8F88ECDA), + UINT64_C(0xD097D7D8B6B4257C), UINT64_C(0x847EAE4AEA64D471), + UINT64_C(0x794524FC0F15C766), UINT64_C(0x2DAC5D6E53C5366B), + UINT64_C(0x01C8A3547D6C542A), UINT64_C(0x5521DAC621BCA527), + UINT64_C(0xA81A5070C4CDB630), UINT64_C(0xFCF329E2981D473D), + UINT64_C(0xC0B5EA36A1218E9B), UINT64_C(0x945C93A4FDF17F96), + UINT64_C(0x6967191218806C81), UINT64_C(0x3D8E608044509D8C), + UINT64_C(0x31AEE56645D2A803), UINT64_C(0x65479CF41902590E), + UINT64_C(0x987C1642FC734A19), UINT64_C(0xCC956FD0A0A3BB14), + UINT64_C(0xF0D3AC04999F72B2), UINT64_C(0xA43AD596C54F83BF), + UINT64_C(0x59015F20203E90A8), UINT64_C(0x0DE826B27CEE61A5), + UINT64_C(0x218CD888524703E4), UINT64_C(0x7565A11A0E97F2E9), + UINT64_C(0x885E2BACEBE6E1FE), UINT64_C(0xDCB7523EB73610F3), + UINT64_C(0xE0F191EA8E0AD955), UINT64_C(0xB418E878D2DA2858), + UINT64_C(0x492362CE37AB3B4F), UINT64_C(0x1DCA1B5C6B7BCA42), + UINT64_C(0x5162690234AF5051), UINT64_C(0x058B1090687FA15C), + UINT64_C(0xF8B09A268D0EB24B), UINT64_C(0xAC59E3B4D1DE4346), + UINT64_C(0x901F2060E8E28AE0), UINT64_C(0xC4F659F2B4327BED), + UINT64_C(0x39CDD344514368FA), UINT64_C(0x6D24AAD60D9399F7), + UINT64_C(0x414054EC233AFBB6), UINT64_C(0x15A92D7E7FEA0ABB), + UINT64_C(0xE892A7C89A9B19AC), UINT64_C(0xBC7BDE5AC64BE8A1), + UINT64_C(0x803D1D8EFF772107), UINT64_C(0xD4D4641CA3A7D00A), + UINT64_C(0x29EFEEAA46D6C31D), UINT64_C(0x7D0697381A063210), + UINT64_C(0x712612DE1B84079F), UINT64_C(0x25CF6B4C4754F692), + UINT64_C(0xD8F4E1FAA225E585), UINT64_C(0x8C1D9868FEF51488), + UINT64_C(0xB05B5BBCC7C9DD2E), UINT64_C(0xE4B2222E9B192C23), + UINT64_C(0x1989A8987E683F34), UINT64_C(0x4D60D10A22B8CE39), + UINT64_C(0x61042F300C11AC78), UINT64_C(0x35ED56A250C15D75), + UINT64_C(0xC8D6DC14B5B04E62), UINT64_C(0x9C3FA586E960BF6F), + UINT64_C(0xA0796652D05C76C9), UINT64_C(0xF4901FC08C8C87C4), + UINT64_C(0x09AB957669FD94D3), UINT64_C(0x5D42ECE4352D65DE) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x3F0BE14A916A6DCB), + UINT64_C(0x7E17C29522D4DB96), UINT64_C(0x411C23DFB3BEB65D), + UINT64_C(0xFC2F852A45A9B72C), UINT64_C(0xC3246460D4C3DAE7), + UINT64_C(0x823847BF677D6CBA), UINT64_C(0xBD33A6F5F6170171), + UINT64_C(0x6A87A57F245D70DD), UINT64_C(0x558C4435B5371D16), + UINT64_C(0x149067EA0689AB4B), UINT64_C(0x2B9B86A097E3C680), + UINT64_C(0x96A8205561F4C7F1), UINT64_C(0xA9A3C11FF09EAA3A), + UINT64_C(0xE8BFE2C043201C67), UINT64_C(0xD7B4038AD24A71AC), + UINT64_C(0xD50F4AFE48BAE1BA), UINT64_C(0xEA04ABB4D9D08C71), + UINT64_C(0xAB18886B6A6E3A2C), UINT64_C(0x94136921FB0457E7), + UINT64_C(0x2920CFD40D135696), UINT64_C(0x162B2E9E9C793B5D), + UINT64_C(0x57370D412FC78D00), UINT64_C(0x683CEC0BBEADE0CB), + UINT64_C(0xBF88EF816CE79167), UINT64_C(0x80830ECBFD8DFCAC), + UINT64_C(0xC19F2D144E334AF1), UINT64_C(0xFE94CC5EDF59273A), + UINT64_C(0x43A76AAB294E264B), UINT64_C(0x7CAC8BE1B8244B80), + UINT64_C(0x3DB0A83E0B9AFDDD), UINT64_C(0x02BB49749AF09016), + UINT64_C(0x38C63AD73E7BDDF1), UINT64_C(0x07CDDB9DAF11B03A), + UINT64_C(0x46D1F8421CAF0667), UINT64_C(0x79DA19088DC56BAC), + UINT64_C(0xC4E9BFFD7BD26ADD), UINT64_C(0xFBE25EB7EAB80716), + UINT64_C(0xBAFE7D685906B14B), UINT64_C(0x85F59C22C86CDC80), + UINT64_C(0x52419FA81A26AD2C), UINT64_C(0x6D4A7EE28B4CC0E7), + UINT64_C(0x2C565D3D38F276BA), UINT64_C(0x135DBC77A9981B71), + UINT64_C(0xAE6E1A825F8F1A00), UINT64_C(0x9165FBC8CEE577CB), + UINT64_C(0xD079D8177D5BC196), UINT64_C(0xEF72395DEC31AC5D), + UINT64_C(0xEDC9702976C13C4B), UINT64_C(0xD2C29163E7AB5180), + UINT64_C(0x93DEB2BC5415E7DD), UINT64_C(0xACD553F6C57F8A16), + UINT64_C(0x11E6F50333688B67), UINT64_C(0x2EED1449A202E6AC), + UINT64_C(0x6FF1379611BC50F1), UINT64_C(0x50FAD6DC80D63D3A), + UINT64_C(0x874ED556529C4C96), UINT64_C(0xB845341CC3F6215D), + UINT64_C(0xF95917C370489700), UINT64_C(0xC652F689E122FACB), + UINT64_C(0x7B61507C1735FBBA), UINT64_C(0x446AB136865F9671), + UINT64_C(0x057692E935E1202C), UINT64_C(0x3A7D73A3A48B4DE7), + UINT64_C(0x718C75AE7CF7BBE2), UINT64_C(0x4E8794E4ED9DD629), + UINT64_C(0x0F9BB73B5E236074), UINT64_C(0x30905671CF490DBF), + UINT64_C(0x8DA3F084395E0CCE), UINT64_C(0xB2A811CEA8346105), + UINT64_C(0xF3B432111B8AD758), UINT64_C(0xCCBFD35B8AE0BA93), + UINT64_C(0x1B0BD0D158AACB3F), UINT64_C(0x2400319BC9C0A6F4), + UINT64_C(0x651C12447A7E10A9), UINT64_C(0x5A17F30EEB147D62), + UINT64_C(0xE72455FB1D037C13), UINT64_C(0xD82FB4B18C6911D8), + UINT64_C(0x9933976E3FD7A785), UINT64_C(0xA6387624AEBDCA4E), + UINT64_C(0xA4833F50344D5A58), UINT64_C(0x9B88DE1AA5273793), + UINT64_C(0xDA94FDC5169981CE), UINT64_C(0xE59F1C8F87F3EC05), + UINT64_C(0x58ACBA7A71E4ED74), UINT64_C(0x67A75B30E08E80BF), + UINT64_C(0x26BB78EF533036E2), UINT64_C(0x19B099A5C25A5B29), + UINT64_C(0xCE049A2F10102A85), UINT64_C(0xF10F7B65817A474E), + UINT64_C(0xB01358BA32C4F113), UINT64_C(0x8F18B9F0A3AE9CD8), + UINT64_C(0x322B1F0555B99DA9), UINT64_C(0x0D20FE4FC4D3F062), + UINT64_C(0x4C3CDD90776D463F), UINT64_C(0x73373CDAE6072BF4), + UINT64_C(0x494A4F79428C6613), UINT64_C(0x7641AE33D3E60BD8), + UINT64_C(0x375D8DEC6058BD85), UINT64_C(0x08566CA6F132D04E), + UINT64_C(0xB565CA530725D13F), UINT64_C(0x8A6E2B19964FBCF4), + UINT64_C(0xCB7208C625F10AA9), UINT64_C(0xF479E98CB49B6762), + UINT64_C(0x23CDEA0666D116CE), UINT64_C(0x1CC60B4CF7BB7B05), + UINT64_C(0x5DDA28934405CD58), UINT64_C(0x62D1C9D9D56FA093), + UINT64_C(0xDFE26F2C2378A1E2), UINT64_C(0xE0E98E66B212CC29), + UINT64_C(0xA1F5ADB901AC7A74), UINT64_C(0x9EFE4CF390C617BF), + UINT64_C(0x9C4505870A3687A9), UINT64_C(0xA34EE4CD9B5CEA62), + UINT64_C(0xE252C71228E25C3F), UINT64_C(0xDD592658B98831F4), + UINT64_C(0x606A80AD4F9F3085), UINT64_C(0x5F6161E7DEF55D4E), + UINT64_C(0x1E7D42386D4BEB13), UINT64_C(0x2176A372FC2186D8), + UINT64_C(0xF6C2A0F82E6BF774), UINT64_C(0xC9C941B2BF019ABF), + UINT64_C(0x88D5626D0CBF2CE2), UINT64_C(0xB7DE83279DD54129), + UINT64_C(0x0AED25D26BC24058), UINT64_C(0x35E6C498FAA82D93), + UINT64_C(0x74FAE74749169BCE), UINT64_C(0x4BF1060DD87CF605), + UINT64_C(0xE318EB5CF9EF77C4), UINT64_C(0xDC130A1668851A0F), + UINT64_C(0x9D0F29C9DB3BAC52), UINT64_C(0xA204C8834A51C199), + UINT64_C(0x1F376E76BC46C0E8), UINT64_C(0x203C8F3C2D2CAD23), + UINT64_C(0x6120ACE39E921B7E), UINT64_C(0x5E2B4DA90FF876B5), + UINT64_C(0x899F4E23DDB20719), UINT64_C(0xB694AF694CD86AD2), + UINT64_C(0xF7888CB6FF66DC8F), UINT64_C(0xC8836DFC6E0CB144), + UINT64_C(0x75B0CB09981BB035), UINT64_C(0x4ABB2A430971DDFE), + UINT64_C(0x0BA7099CBACF6BA3), UINT64_C(0x34ACE8D62BA50668), + UINT64_C(0x3617A1A2B155967E), UINT64_C(0x091C40E8203FFBB5), + UINT64_C(0x4800633793814DE8), UINT64_C(0x770B827D02EB2023), + UINT64_C(0xCA382488F4FC2152), UINT64_C(0xF533C5C265964C99), + UINT64_C(0xB42FE61DD628FAC4), UINT64_C(0x8B2407574742970F), + UINT64_C(0x5C9004DD9508E6A3), UINT64_C(0x639BE59704628B68), + UINT64_C(0x2287C648B7DC3D35), UINT64_C(0x1D8C270226B650FE), + UINT64_C(0xA0BF81F7D0A1518F), UINT64_C(0x9FB460BD41CB3C44), + UINT64_C(0xDEA84362F2758A19), UINT64_C(0xE1A3A228631FE7D2), + UINT64_C(0xDBDED18BC794AA35), UINT64_C(0xE4D530C156FEC7FE), + UINT64_C(0xA5C9131EE54071A3), UINT64_C(0x9AC2F254742A1C68), + UINT64_C(0x27F154A1823D1D19), UINT64_C(0x18FAB5EB135770D2), + UINT64_C(0x59E69634A0E9C68F), UINT64_C(0x66ED777E3183AB44), + UINT64_C(0xB15974F4E3C9DAE8), UINT64_C(0x8E5295BE72A3B723), + UINT64_C(0xCF4EB661C11D017E), UINT64_C(0xF045572B50776CB5), + UINT64_C(0x4D76F1DEA6606DC4), UINT64_C(0x727D1094370A000F), + UINT64_C(0x3361334B84B4B652), UINT64_C(0x0C6AD20115DEDB99), + UINT64_C(0x0ED19B758F2E4B8F), UINT64_C(0x31DA7A3F1E442644), + UINT64_C(0x70C659E0ADFA9019), UINT64_C(0x4FCDB8AA3C90FDD2), + UINT64_C(0xF2FE1E5FCA87FCA3), UINT64_C(0xCDF5FF155BED9168), + UINT64_C(0x8CE9DCCAE8532735), UINT64_C(0xB3E23D8079394AFE), + UINT64_C(0x64563E0AAB733B52), UINT64_C(0x5B5DDF403A195699), + UINT64_C(0x1A41FC9F89A7E0C4), UINT64_C(0x254A1DD518CD8D0F), + UINT64_C(0x9879BB20EEDA8C7E), UINT64_C(0xA7725A6A7FB0E1B5), + UINT64_C(0xE66E79B5CC0E57E8), UINT64_C(0xD96598FF5D643A23), + UINT64_C(0x92949EF28518CC26), UINT64_C(0xAD9F7FB81472A1ED), + UINT64_C(0xEC835C67A7CC17B0), UINT64_C(0xD388BD2D36A67A7B), + UINT64_C(0x6EBB1BD8C0B17B0A), UINT64_C(0x51B0FA9251DB16C1), + UINT64_C(0x10ACD94DE265A09C), UINT64_C(0x2FA73807730FCD57), + UINT64_C(0xF8133B8DA145BCFB), UINT64_C(0xC718DAC7302FD130), + UINT64_C(0x8604F9188391676D), UINT64_C(0xB90F185212FB0AA6), + UINT64_C(0x043CBEA7E4EC0BD7), UINT64_C(0x3B375FED7586661C), + UINT64_C(0x7A2B7C32C638D041), UINT64_C(0x45209D785752BD8A), + UINT64_C(0x479BD40CCDA22D9C), UINT64_C(0x789035465CC84057), + UINT64_C(0x398C1699EF76F60A), UINT64_C(0x0687F7D37E1C9BC1), + UINT64_C(0xBBB45126880B9AB0), UINT64_C(0x84BFB06C1961F77B), + UINT64_C(0xC5A393B3AADF4126), UINT64_C(0xFAA872F93BB52CED), + UINT64_C(0x2D1C7173E9FF5D41), UINT64_C(0x121790397895308A), + UINT64_C(0x530BB3E6CB2B86D7), UINT64_C(0x6C0052AC5A41EB1C), + UINT64_C(0xD133F459AC56EA6D), UINT64_C(0xEE3815133D3C87A6), + UINT64_C(0xAF2436CC8E8231FB), UINT64_C(0x902FD7861FE85C30), + UINT64_C(0xAA52A425BB6311D7), UINT64_C(0x9559456F2A097C1C), + UINT64_C(0xD44566B099B7CA41), UINT64_C(0xEB4E87FA08DDA78A), + UINT64_C(0x567D210FFECAA6FB), UINT64_C(0x6976C0456FA0CB30), + UINT64_C(0x286AE39ADC1E7D6D), UINT64_C(0x176102D04D7410A6), + UINT64_C(0xC0D5015A9F3E610A), UINT64_C(0xFFDEE0100E540CC1), + UINT64_C(0xBEC2C3CFBDEABA9C), UINT64_C(0x81C922852C80D757), + UINT64_C(0x3CFA8470DA97D626), UINT64_C(0x03F1653A4BFDBBED), + UINT64_C(0x42ED46E5F8430DB0), UINT64_C(0x7DE6A7AF6929607B), + UINT64_C(0x7F5DEEDBF3D9F06D), UINT64_C(0x40560F9162B39DA6), + UINT64_C(0x014A2C4ED10D2BFB), UINT64_C(0x3E41CD0440674630), + UINT64_C(0x83726BF1B6704741), UINT64_C(0xBC798ABB271A2A8A), + UINT64_C(0xFD65A96494A49CD7), UINT64_C(0xC26E482E05CEF11C), + UINT64_C(0x15DA4BA4D78480B0), UINT64_C(0x2AD1AAEE46EEED7B), + UINT64_C(0x6BCD8931F5505B26), UINT64_C(0x54C6687B643A36ED), + UINT64_C(0xE9F5CE8E922D379C), UINT64_C(0xD6FE2FC403475A57), + UINT64_C(0x97E20C1BB0F9EC0A), UINT64_C(0xA8E9ED51219381C1) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x1DEE8A5E222CA1DC), + UINT64_C(0x3BDD14BC445943B8), UINT64_C(0x26339EE26675E264), + UINT64_C(0x77BA297888B28770), UINT64_C(0x6A54A326AA9E26AC), + UINT64_C(0x4C673DC4CCEBC4C8), UINT64_C(0x5189B79AEEC76514), + UINT64_C(0xEF7452F111650EE0), UINT64_C(0xF29AD8AF3349AF3C), + UINT64_C(0xD4A9464D553C4D58), UINT64_C(0xC947CC137710EC84), + UINT64_C(0x98CE7B8999D78990), UINT64_C(0x8520F1D7BBFB284C), + UINT64_C(0xA3136F35DD8ECA28), UINT64_C(0xBEFDE56BFFA26BF4), + UINT64_C(0x4C300AC98DC40345), UINT64_C(0x51DE8097AFE8A299), + UINT64_C(0x77ED1E75C99D40FD), UINT64_C(0x6A03942BEBB1E121), + UINT64_C(0x3B8A23B105768435), UINT64_C(0x2664A9EF275A25E9), + UINT64_C(0x0057370D412FC78D), UINT64_C(0x1DB9BD5363036651), + UINT64_C(0xA34458389CA10DA5), UINT64_C(0xBEAAD266BE8DAC79), + UINT64_C(0x98994C84D8F84E1D), UINT64_C(0x8577C6DAFAD4EFC1), + UINT64_C(0xD4FE714014138AD5), UINT64_C(0xC910FB1E363F2B09), + UINT64_C(0xEF2365FC504AC96D), UINT64_C(0xF2CDEFA2726668B1), + UINT64_C(0x986015931B88068A), UINT64_C(0x858E9FCD39A4A756), + UINT64_C(0xA3BD012F5FD14532), UINT64_C(0xBE538B717DFDE4EE), + UINT64_C(0xEFDA3CEB933A81FA), UINT64_C(0xF234B6B5B1162026), + UINT64_C(0xD4072857D763C242), UINT64_C(0xC9E9A209F54F639E), + UINT64_C(0x771447620AED086A), UINT64_C(0x6AFACD3C28C1A9B6), + UINT64_C(0x4CC953DE4EB44BD2), UINT64_C(0x5127D9806C98EA0E), + UINT64_C(0x00AE6E1A825F8F1A), UINT64_C(0x1D40E444A0732EC6), + UINT64_C(0x3B737AA6C606CCA2), UINT64_C(0x269DF0F8E42A6D7E), + UINT64_C(0xD4501F5A964C05CF), UINT64_C(0xC9BE9504B460A413), + UINT64_C(0xEF8D0BE6D2154677), UINT64_C(0xF26381B8F039E7AB), + UINT64_C(0xA3EA36221EFE82BF), UINT64_C(0xBE04BC7C3CD22363), + UINT64_C(0x9837229E5AA7C107), UINT64_C(0x85D9A8C0788B60DB), + UINT64_C(0x3B244DAB87290B2F), UINT64_C(0x26CAC7F5A505AAF3), + UINT64_C(0x00F95917C3704897), UINT64_C(0x1D17D349E15CE94B), + UINT64_C(0x4C9E64D30F9B8C5F), UINT64_C(0x5170EE8D2DB72D83), + UINT64_C(0x7743706F4BC2CFE7), UINT64_C(0x6AADFA3169EE6E3B), + UINT64_C(0xA218840D981E1391), UINT64_C(0xBFF60E53BA32B24D), + UINT64_C(0x99C590B1DC475029), UINT64_C(0x842B1AEFFE6BF1F5), + UINT64_C(0xD5A2AD7510AC94E1), UINT64_C(0xC84C272B3280353D), + UINT64_C(0xEE7FB9C954F5D759), UINT64_C(0xF391339776D97685), + UINT64_C(0x4D6CD6FC897B1D71), UINT64_C(0x50825CA2AB57BCAD), + UINT64_C(0x76B1C240CD225EC9), UINT64_C(0x6B5F481EEF0EFF15), + UINT64_C(0x3AD6FF8401C99A01), UINT64_C(0x273875DA23E53BDD), + UINT64_C(0x010BEB384590D9B9), UINT64_C(0x1CE5616667BC7865), + UINT64_C(0xEE288EC415DA10D4), UINT64_C(0xF3C6049A37F6B108), + UINT64_C(0xD5F59A785183536C), UINT64_C(0xC81B102673AFF2B0), + UINT64_C(0x9992A7BC9D6897A4), UINT64_C(0x847C2DE2BF443678), + UINT64_C(0xA24FB300D931D41C), UINT64_C(0xBFA1395EFB1D75C0), + UINT64_C(0x015CDC3504BF1E34), UINT64_C(0x1CB2566B2693BFE8), + UINT64_C(0x3A81C88940E65D8C), UINT64_C(0x276F42D762CAFC50), + UINT64_C(0x76E6F54D8C0D9944), UINT64_C(0x6B087F13AE213898), + UINT64_C(0x4D3BE1F1C854DAFC), UINT64_C(0x50D56BAFEA787B20), + UINT64_C(0x3A78919E8396151B), UINT64_C(0x27961BC0A1BAB4C7), + UINT64_C(0x01A58522C7CF56A3), UINT64_C(0x1C4B0F7CE5E3F77F), + UINT64_C(0x4DC2B8E60B24926B), UINT64_C(0x502C32B8290833B7), + UINT64_C(0x761FAC5A4F7DD1D3), UINT64_C(0x6BF126046D51700F), + UINT64_C(0xD50CC36F92F31BFB), UINT64_C(0xC8E24931B0DFBA27), + UINT64_C(0xEED1D7D3D6AA5843), UINT64_C(0xF33F5D8DF486F99F), + UINT64_C(0xA2B6EA171A419C8B), UINT64_C(0xBF586049386D3D57), + UINT64_C(0x996BFEAB5E18DF33), UINT64_C(0x848574F57C347EEF), + UINT64_C(0x76489B570E52165E), UINT64_C(0x6BA611092C7EB782), + UINT64_C(0x4D958FEB4A0B55E6), UINT64_C(0x507B05B56827F43A), + UINT64_C(0x01F2B22F86E0912E), UINT64_C(0x1C1C3871A4CC30F2), + UINT64_C(0x3A2FA693C2B9D296), UINT64_C(0x27C12CCDE095734A), + UINT64_C(0x993CC9A61F3718BE), UINT64_C(0x84D243F83D1BB962), + UINT64_C(0xA2E1DD1A5B6E5B06), UINT64_C(0xBF0F57447942FADA), + UINT64_C(0xEE86E0DE97859FCE), UINT64_C(0xF3686A80B5A93E12), + UINT64_C(0xD55BF462D3DCDC76), UINT64_C(0xC8B57E3CF1F07DAA), + UINT64_C(0xD6E9A7309F3239A7), UINT64_C(0xCB072D6EBD1E987B), + UINT64_C(0xED34B38CDB6B7A1F), UINT64_C(0xF0DA39D2F947DBC3), + UINT64_C(0xA1538E481780BED7), UINT64_C(0xBCBD041635AC1F0B), + UINT64_C(0x9A8E9AF453D9FD6F), UINT64_C(0x876010AA71F55CB3), + UINT64_C(0x399DF5C18E573747), UINT64_C(0x24737F9FAC7B969B), + UINT64_C(0x0240E17DCA0E74FF), UINT64_C(0x1FAE6B23E822D523), + UINT64_C(0x4E27DCB906E5B037), UINT64_C(0x53C956E724C911EB), + UINT64_C(0x75FAC80542BCF38F), UINT64_C(0x6814425B60905253), + UINT64_C(0x9AD9ADF912F63AE2), UINT64_C(0x873727A730DA9B3E), + UINT64_C(0xA104B94556AF795A), UINT64_C(0xBCEA331B7483D886), + UINT64_C(0xED6384819A44BD92), UINT64_C(0xF08D0EDFB8681C4E), + UINT64_C(0xD6BE903DDE1DFE2A), UINT64_C(0xCB501A63FC315FF6), + UINT64_C(0x75ADFF0803933402), UINT64_C(0x6843755621BF95DE), + UINT64_C(0x4E70EBB447CA77BA), UINT64_C(0x539E61EA65E6D666), + UINT64_C(0x0217D6708B21B372), UINT64_C(0x1FF95C2EA90D12AE), + UINT64_C(0x39CAC2CCCF78F0CA), UINT64_C(0x24244892ED545116), + UINT64_C(0x4E89B2A384BA3F2D), UINT64_C(0x536738FDA6969EF1), + UINT64_C(0x7554A61FC0E37C95), UINT64_C(0x68BA2C41E2CFDD49), + UINT64_C(0x39339BDB0C08B85D), UINT64_C(0x24DD11852E241981), + UINT64_C(0x02EE8F674851FBE5), UINT64_C(0x1F0005396A7D5A39), + UINT64_C(0xA1FDE05295DF31CD), UINT64_C(0xBC136A0CB7F39011), + UINT64_C(0x9A20F4EED1867275), UINT64_C(0x87CE7EB0F3AAD3A9), + UINT64_C(0xD647C92A1D6DB6BD), UINT64_C(0xCBA943743F411761), + UINT64_C(0xED9ADD965934F505), UINT64_C(0xF07457C87B1854D9), + UINT64_C(0x02B9B86A097E3C68), UINT64_C(0x1F5732342B529DB4), + UINT64_C(0x3964ACD64D277FD0), UINT64_C(0x248A26886F0BDE0C), + UINT64_C(0x7503911281CCBB18), UINT64_C(0x68ED1B4CA3E01AC4), + UINT64_C(0x4EDE85AEC595F8A0), UINT64_C(0x53300FF0E7B9597C), + UINT64_C(0xEDCDEA9B181B3288), UINT64_C(0xF02360C53A379354), + UINT64_C(0xD610FE275C427130), UINT64_C(0xCBFE74797E6ED0EC), + UINT64_C(0x9A77C3E390A9B5F8), UINT64_C(0x879949BDB2851424), + UINT64_C(0xA1AAD75FD4F0F640), UINT64_C(0xBC445D01F6DC579C), + UINT64_C(0x74F1233D072C2A36), UINT64_C(0x691FA96325008BEA), + UINT64_C(0x4F2C37814375698E), UINT64_C(0x52C2BDDF6159C852), + UINT64_C(0x034B0A458F9EAD46), UINT64_C(0x1EA5801BADB20C9A), + UINT64_C(0x38961EF9CBC7EEFE), UINT64_C(0x257894A7E9EB4F22), + UINT64_C(0x9B8571CC164924D6), UINT64_C(0x866BFB923465850A), + UINT64_C(0xA05865705210676E), UINT64_C(0xBDB6EF2E703CC6B2), + UINT64_C(0xEC3F58B49EFBA3A6), UINT64_C(0xF1D1D2EABCD7027A), + UINT64_C(0xD7E24C08DAA2E01E), UINT64_C(0xCA0CC656F88E41C2), + UINT64_C(0x38C129F48AE82973), UINT64_C(0x252FA3AAA8C488AF), + UINT64_C(0x031C3D48CEB16ACB), UINT64_C(0x1EF2B716EC9DCB17), + UINT64_C(0x4F7B008C025AAE03), UINT64_C(0x52958AD220760FDF), + UINT64_C(0x74A614304603EDBB), UINT64_C(0x69489E6E642F4C67), + UINT64_C(0xD7B57B059B8D2793), UINT64_C(0xCA5BF15BB9A1864F), + UINT64_C(0xEC686FB9DFD4642B), UINT64_C(0xF186E5E7FDF8C5F7), + UINT64_C(0xA00F527D133FA0E3), UINT64_C(0xBDE1D8233113013F), + UINT64_C(0x9BD246C15766E35B), UINT64_C(0x863CCC9F754A4287), + UINT64_C(0xEC9136AE1CA42CBC), UINT64_C(0xF17FBCF03E888D60), + UINT64_C(0xD74C221258FD6F04), UINT64_C(0xCAA2A84C7AD1CED8), + UINT64_C(0x9B2B1FD69416ABCC), UINT64_C(0x86C59588B63A0A10), + UINT64_C(0xA0F60B6AD04FE874), UINT64_C(0xBD188134F26349A8), + UINT64_C(0x03E5645F0DC1225C), UINT64_C(0x1E0BEE012FED8380), + UINT64_C(0x383870E3499861E4), UINT64_C(0x25D6FABD6BB4C038), + UINT64_C(0x745F4D278573A52C), UINT64_C(0x69B1C779A75F04F0), + UINT64_C(0x4F82599BC12AE694), UINT64_C(0x526CD3C5E3064748), + UINT64_C(0xA0A13C6791602FF9), UINT64_C(0xBD4FB639B34C8E25), + UINT64_C(0x9B7C28DBD5396C41), UINT64_C(0x8692A285F715CD9D), + UINT64_C(0xD71B151F19D2A889), UINT64_C(0xCAF59F413BFE0955), + UINT64_C(0xECC601A35D8BEB31), UINT64_C(0xF1288BFD7FA74AED), + UINT64_C(0x4FD56E9680052119), UINT64_C(0x523BE4C8A22980C5), + UINT64_C(0x74087A2AC45C62A1), UINT64_C(0x69E6F074E670C37D), + UINT64_C(0x386F47EE08B7A669), UINT64_C(0x2581CDB02A9B07B5), + UINT64_C(0x03B253524CEEE5D1), UINT64_C(0x1E5CD90C6EC2440D) + } +}; diff --git a/src/liblzma/check/crc64_tablegen.c b/src/liblzma/check/crc64_tablegen.c new file mode 100644 index 00000000..0f1005f4 --- /dev/null +++ b/src/liblzma/check/crc64_tablegen.c @@ -0,0 +1,56 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64_tablegen.c +/// \brief Generates CRC64 crc64_table.c +/// +/// Compiling: gcc -std=c99 -o crc64_tablegen crc64_tablegen.c crc64_init.c +/// Add -DWORDS_BIGENDIAN to generate big endian table. +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#include <sys/types.h> +#include <inttypes.h> +#include <stdio.h> + + +extern void lzma_crc64_init(void); + +extern uint64_t lzma_crc64_table[4][256]; + + +int +main() +{ + lzma_crc64_init(); + + printf("/* This file has been automatically generated by " + "crc64_tablegen.c. */\n\n" + "#include <inttypes.h>\n\n" + "const uint64_t lzma_crc64_table[4][256] = {\n\t{"); + + for (size_t s = 0; s < 4; ++s) { + for (size_t b = 0; b < 256; ++b) { + if ((b % 2) == 0) + printf("\n\t\t"); + + printf("UINT64_C(0x%016" PRIX64 ")", + lzma_crc64_table[s][b]); + + if (b != 255) + printf(", "); + } + + if (s == 3) + printf("\n\t}\n};\n"); + else + printf("\n\t}, {"); + } + + return 0; +} diff --git a/src/liblzma/check/crc64_x86.s b/src/liblzma/check/crc64_x86.s new file mode 100644 index 00000000..2b8d349e --- /dev/null +++ b/src/liblzma/check/crc64_x86.s @@ -0,0 +1,203 @@ +/* + * Speed-optimized CRC64 using slicing-by-four algorithm + * Instruction set: i386 + * Optimized for: i686 + * + * This code has been put into the public domain by its authors: + * Igor Pavlov <http://7-zip.org/> + * Lasse Collin <lasse.collin@tukaani.org> + * + * This code needs lzma_crc64_table, which can be created using the + * following C code: + +uint64_t lzma_crc64_table[4][256]; + +void +init_table(void) +{ + static const uint64_t poly64 = UINT64_C(0xC96C5795D7870F42); + + for (size_t s = 0; s < 4; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint64_t r = s == 0 ? b : lzma_crc64_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly64; + else + r >>= 1; + } + + lzma_crc64_table[s][b] = r; + } + } +} + + * The prototype of the CRC64 function: + * extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc); + */ + + .text + .global lzma_crc64 + .type lzma_crc64, @function + + .align 16 +lzma_crc64: + /* + * Register usage: + * %eax crc LSB + * %edx crc MSB + * %esi buf + * %edi size or buf + size + * %ebx lzma_crc64_table + * %ebp Table index + * %ecx Temporary + */ + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + movl 0x14(%esp), %esi /* buf */ + movl 0x18(%esp), %edi /* size */ + movl 0x1C(%esp), %eax /* crc LSB */ + movl 0x20(%esp), %edx /* crc MSB */ + + /* + * Store the address of lzma_crc64_table to %ebx. This is needed to + * get position-independent code (PIC). + */ + call .L_PIC +.L_PIC: + popl %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-.L_PIC], %ebx + movl lzma_crc64_table@GOT(%ebx), %ebx + + /* Complement the initial value. */ + notl %eax + notl %edx + +.L_align: + /* + * Check if there is enough input to use slicing-by-four. + * We need eight bytes, because the loop pre-reads four bytes. + */ + cmpl $8, %edi + jl .L_rest + + /* Check if we have reached alignment of four bytes. */ + testl $3, %esi + jz .L_slice + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrdl $8, %edx, %eax + xorl (%ebx, %ebp, 8), %eax + shrl $8, %edx + xorl 4(%ebx, %ebp, 8), %edx + decl %edi + jmp .L_align + +.L_slice: + /* + * If we get here, there's at least eight bytes of aligned input + * available. Make %edi multiple of four bytes. Store the possible + * remainder over the "size" variable in the argument stack. + */ + movl %edi, 0x18(%esp) + andl $-4, %edi + subl %edi, 0x18(%esp) + + /* + * Let %edi be buf + size - 4 while running the main loop. This way + * we can compare for equality to determine when exit the loop. + */ + addl %esi, %edi + subl $4, %edi + + /* Read in the first four aligned bytes. */ + movl (%esi), %ecx + +.L_loop: + xorl %eax, %ecx + movzbl %cl, %ebp + movl 0x1800(%ebx, %ebp, 8), %eax + xorl %edx, %eax + movl 0x1804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + xorl 0x1000(%ebx, %ebp, 8), %eax + xorl 0x1004(%ebx, %ebp, 8), %edx + shrl $16, %ecx + movzbl %cl, %ebp + xorl 0x0800(%ebx, %ebp, 8), %eax + xorl 0x0804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + addl $4, %esi + xorl (%ebx, %ebp, 8), %eax + xorl 4(%ebx, %ebp, 8), %edx + + /* Check for end of aligned input. */ + cmpl %edi, %esi + + /* + * Copy the next input byte to %ecx. It is slightly faster to + * read it here than at the top of the loop. + */ + movl (%esi), %ecx + jl .L_loop + + /* + * Process the remaining four bytes, which we have already + * copied to %ecx. + */ + xorl %eax, %ecx + movzbl %cl, %ebp + movl 0x1800(%ebx, %ebp, 8), %eax + xorl %edx, %eax + movl 0x1804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + xorl 0x1000(%ebx, %ebp, 8), %eax + xorl 0x1004(%ebx, %ebp, 8), %edx + shrl $16, %ecx + movzbl %cl, %ebp + xorl 0x0800(%ebx, %ebp, 8), %eax + xorl 0x0804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + addl $4, %esi + xorl (%ebx, %ebp, 8), %eax + xorl 4(%ebx, %ebp, 8), %edx + + /* Copy the number of remaining bytes to %edi. */ + movl 0x18(%esp), %edi + +.L_rest: + /* Check for end of input. */ + testl %edi, %edi + jz .L_return + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrdl $8, %edx, %eax + xorl (%ebx, %ebp, 8), %eax + shrl $8, %edx + xorl 4(%ebx, %ebp, 8), %edx + decl %edi + jmp .L_rest + +.L_return: + /* Complement the final value. */ + notl %eax + notl %edx + + popl %ebp + popl %edi + popl %esi + popl %ebx + ret + + .size lzma_crc32, .-lzma_crc32 diff --git a/src/liblzma/check/crc_macros.h b/src/liblzma/check/crc_macros.h new file mode 100644 index 00000000..5fbecf07 --- /dev/null +++ b/src/liblzma/check/crc_macros.h @@ -0,0 +1,33 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc_macros +/// \brief Some endian-dependent macros for CRC32 and CRC64 +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef WORDS_BIGENDIAN +# include "check_byteswap.h" + +# define A(x) ((x) >> 24) +# define B(x) (((x) >> 16) & 0xFF) +# define C(x) (((x) >> 8) & 0xFF) +# define D(x) ((x) & 0xFF) + +# define S8(x) ((x) << 8) +# define S32(x) ((x) << 32) + +#else +# define A(x) ((x) & 0xFF) +# define B(x) (((x) >> 8) & 0xFF) +# define C(x) (((x) >> 16) & 0xFF) +# define D(x) ((x) >> 24) + +# define S8(x) ((x) >> 8) +# define S32(x) ((x) >> 32) +#endif diff --git a/src/liblzma/check/sha256.c b/src/liblzma/check/sha256.c new file mode 100644 index 00000000..8e3d375a --- /dev/null +++ b/src/liblzma/check/sha256.c @@ -0,0 +1,203 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sha256.c +/// \brief SHA256 +// +// Based on the public domain code found from Wei Dai's Crypto++ library +// version 5.5.1: http://www.cryptopp.com/ +// This code has been put into the public domain. +// +/// \todo Crypto++ has x86 ASM optimizations. They use SSE so if they +/// are imported to liblzma, SSE instructions need to be used +/// conditionally to keep the code working on older boxes. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" + +#ifndef WORDS_BIGENDIAN +# include "check_byteswap.h" +#endif + +// At least on x86, GCC is able to optimize this to a rotate instruction. +#define rotr_32(num, amount) ((num) >> (amount) | (num) << (32 - (amount))) + +#define blk0(i) (W[i] = data[i]) +#define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \ + + s0(W[(i - 15) & 15])) + +#define Ch(x, y, z) (z ^ (x & (y ^ z))) +#define Maj(x, y, z) ((x & y) | (z & (x | y))) + +#define a(i) T[(0 - i) & 7] +#define b(i) T[(1 - i) & 7] +#define c(i) T[(2 - i) & 7] +#define d(i) T[(3 - i) & 7] +#define e(i) T[(4 - i) & 7] +#define f(i) T[(5 - i) & 7] +#define g(i) T[(6 - i) & 7] +#define h(i) T[(7 - i) & 7] + +#define R(i) \ + h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] \ + + (j ? blk2(i) : blk0(i)); \ + d(i) += h(i); \ + h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) + +#define S0(x) (rotr_32(x, 2) ^ rotr_32(x, 13) ^ rotr_32(x, 22)) +#define S1(x) (rotr_32(x, 6) ^ rotr_32(x, 11) ^ rotr_32(x, 25)) +#define s0(x) (rotr_32(x, 7) ^ rotr_32(x, 18) ^ (x >> 3)) +#define s1(x) (rotr_32(x, 17) ^ rotr_32(x, 19) ^ (x >> 10)) + + +static const uint32_t SHA256_K[64] = { + 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, + 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, + 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, + 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, + 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, + 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, + 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, + 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, + 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, + 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, + 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, + 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, + 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, + 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, +}; + + +static void +transform(uint32_t state[static 8], const uint32_t data[static 16]) +{ + uint32_t W[16]; + uint32_t T[8]; + + // Copy state[] to working vars. + memcpy(T, state, sizeof(T)); + + // 64 operations, partially loop unrolled + for (unsigned int j = 0; j < 64; j += 16) { + R( 0); R( 1); R( 2); R( 3); + R( 4); R( 5); R( 6); R( 7); + R( 8); R( 9); R(10); R(11); + R(12); R(13); R(14); R(15); + } + + // Add the working vars back into state[]. + state[0] += a(0); + state[1] += b(0); + state[2] += c(0); + state[3] += d(0); + state[4] += e(0); + state[5] += f(0); + state[6] += g(0); + state[7] += h(0); +} + + +static void +process(lzma_sha256 *sha256) +{ +#ifdef WORDS_BIGENDIAN + transform(sha256->state, (uint32_t *)(sha256->buffer)); + +#else + uint32_t data[16]; + + for (size_t i = 0; i < 16; ++i) + data[i] = bswap_32(*((uint32_t*)(sha256->buffer) + i)); + + transform(sha256->state, data); +#endif + + return; +} + + +extern void +lzma_sha256_init(lzma_sha256 *sha256) +{ + static const uint32_t s[8] = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, + }; + + memcpy(sha256->state, s, sizeof(s)); + sha256->size = 0; + + return; +} + + +extern void +lzma_sha256_update(const uint8_t *buf, size_t size, lzma_sha256 *sha256) +{ + // Copy the input data into a properly aligned temporary buffer. + // This way we can be called with arbitrarily sized buffers + // (no need to be multiple of 64 bytes), and the code works also + // on architectures that don't allow unaligned memory access. + while (size > 0) { + const size_t copy_start = sha256->size & 0x3F; + size_t copy_size = 64 - copy_start; + if (copy_size > size) + copy_size = size; + + memcpy(sha256->buffer + copy_start, buf, copy_size); + + buf += copy_size; + size -= copy_size; + sha256->size += copy_size; + + if ((sha256->size & 0x3F) == 0) + process(sha256); + } + + return; +} + + +extern void +lzma_sha256_finish(lzma_sha256 *sha256) +{ + // Add padding as described in RFC 3174 (it describes SHA-1 but + // the same padding style is used for SHA-256 too). + size_t pos = sha256->size & 0x3F; + sha256->buffer[pos++] = 0x80; + + while (pos != 64 - 8) { + if (pos == 64) { + process(sha256); + pos = 0; + } + + sha256->buffer[pos++] = 0x00; + } + + // Convert the message size from bytes to bits. + sha256->size *= 8; + +#ifdef WORDS_BIGENDIAN + *(uint64_t *)(sha256->buffer + 64 - 8) = sha256->size; +#else + *(uint64_t *)(sha256->buffer + 64 - 8) = bswap_64(sha256->size); +#endif + + process(sha256); + + for (size_t i = 0; i < 8; ++i) +#ifdef WORDS_BIGENDIAN + ((uint32_t *)(sha256->buffer))[i] = sha256->state[i]; +#else + ((uint32_t *)(sha256->buffer))[i] = bswap_32(sha256->state[i]); +#endif + + return; +} diff --git a/src/liblzma/common/Makefile.am b/src/liblzma/common/Makefile.am new file mode 100644 index 00000000..4eb9d54e --- /dev/null +++ b/src/liblzma/common/Makefile.am @@ -0,0 +1,94 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +noinst_LTLIBRARIES = libcommon.la +libcommon_la_CPPFLAGS = \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_srcdir@/src/liblzma/check \ + -I@top_srcdir@/src/liblzma/lz \ + -I@top_srcdir@/src/liblzma/lzma \ + -I@top_srcdir@/src/liblzma/simple \ + -I@top_srcdir@/src/liblzma/subblock \ + -I@top_srcdir@/src/liblzma/rangecoder +libcommon_la_SOURCES = \ + common.h \ + sysdefs.h \ + allocator.c \ + block_private.h \ + extra.c \ + features.c \ + index.c \ + info.c \ + init.c \ + memory_limitter.c \ + memory_usage.c \ + next_coder.c \ + raw_common.c \ + raw_common.h \ + code.c \ + version.c + +if COND_FILTER_COPY +libcommon_la_SOURCES += \ + copy_coder.c \ + copy_coder.h +endif + +if COND_FILTER_DELTA +libcommon_la_SOURCES += \ + delta_coder.c \ + delta_coder.h +endif + +if COND_MAIN_ENCODER +libcommon_la_SOURCES += \ + alignment.c \ + auto_decoder.c \ + alone_encoder.c \ + block_encoder.c \ + block_encoder.h \ + block_header_encoder.c \ + filter_flags_encoder.c \ + init_encoder.c \ + metadata_encoder.c \ + metadata_encoder.h \ + raw_encoder.c \ + raw_encoder.h \ + stream_common.c \ + stream_common.h \ + stream_encoder_single.c \ + stream_encoder_multi.c \ + stream_flags_encoder.c \ + vli_encoder.c +endif + +if COND_MAIN_DECODER +libcommon_la_SOURCES += \ + alone_decoder.c \ + alone_decoder.h \ + block_decoder.c \ + block_decoder.h \ + block_header_decoder.c \ + filter_flags_decoder.c \ + init_decoder.c \ + metadata_decoder.c \ + metadata_decoder.h \ + raw_decoder.c \ + raw_decoder.h \ + stream_decoder.c \ + stream_flags_decoder.c \ + stream_flags_decoder.h \ + vli_decoder.c \ + vli_reverse_decoder.c +endif diff --git a/src/liblzma/common/alignment.c b/src/liblzma/common/alignment.c new file mode 100644 index 00000000..2d468fe5 --- /dev/null +++ b/src/liblzma/common/alignment.c @@ -0,0 +1,118 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alignment.c +/// \brief Calculates preferred alignments of different filters +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API uint32_t +lzma_alignment_input(const lzma_options_filter *filters, uint32_t guess) +{ + for (size_t i = 0; filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { + switch (filters[i].id) { + case LZMA_FILTER_COPY: + case LZMA_FILTER_DELTA: + // The same as the input, check the next filter. + continue; + + case LZMA_FILTER_SUBBLOCK: + if (filters[i].options == NULL) + return LZMA_SUBBLOCK_ALIGNMENT_DEFAULT; + else + return ((const lzma_options_subblock *)( + filters[i].options))->alignment; + + case LZMA_FILTER_X86: + return 1; + + case LZMA_FILTER_ARMTHUMB: + return 2; + + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_ARM: + case LZMA_FILTER_SPARC: + return 4; + + case LZMA_FILTER_IA64: + return 16; + + case LZMA_FILTER_LZMA: { + const lzma_options_lzma *lzma = filters[i].options; + return 1 << MAX(lzma->pos_bits, + lzma->literal_pos_bits); + } + + default: + return UINT32_MAX; + } + } + + return guess; +} + + +extern LZMA_API uint32_t +lzma_alignment_output(const lzma_options_filter *filters, uint32_t guess) +{ + // Check if there is only an implicit Copy filter. + if (filters[0].id == LZMA_VLI_VALUE_UNKNOWN) + return guess; + + // Find the last filter in the chain. + size_t i = 0; + while (filters[i + 1].id != LZMA_VLI_VALUE_UNKNOWN) + ++i; + + do { + switch (filters[i].id) { + case LZMA_FILTER_COPY: + case LZMA_FILTER_DELTA: + // It's the same as the input alignment, so + // check the next filter. + continue; + + case LZMA_FILTER_SUBBLOCK: + if (filters[i].options == NULL) + return LZMA_SUBBLOCK_ALIGNMENT_DEFAULT; + else + return ((const lzma_options_subblock *)( + filters[i].options))->alignment; + + case LZMA_FILTER_X86: + case LZMA_FILTER_LZMA: + return 1; + + case LZMA_FILTER_ARMTHUMB: + return 2; + + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_ARM: + case LZMA_FILTER_SPARC: + return 4; + + case LZMA_FILTER_IA64: + return 16; + + default: + return UINT32_MAX; + } + } while (i-- != 0); + + // If we get here, we have the same alignment as the input data. + return guess; +} diff --git a/src/liblzma/common/allocator.c b/src/liblzma/common/allocator.c new file mode 100644 index 00000000..edea0f68 --- /dev/null +++ b/src/liblzma/common/allocator.c @@ -0,0 +1,57 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file allocator.c +/// \brief Allocating and freeing memory +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + +#undef lzma_free + +extern void * lzma_attribute((malloc)) +lzma_alloc(size_t size, lzma_allocator *allocator) +{ + // Some malloc() variants return NULL if called with size == 0. + if (size == 0) + size = 1; + + void *ptr; + + if (allocator != NULL && allocator->alloc != NULL) + ptr = allocator->alloc(allocator->opaque, 1, size); + else + ptr = malloc(size); + +#if !defined(NDEBUG) && defined(HAVE_MEMSET) + // This helps to catch some stupid mistakes. + if (ptr != NULL) + memset(ptr, 0xFD, size); +#endif + + return ptr; +} + + +extern void +lzma_free(void *ptr, lzma_allocator *allocator) +{ + if (allocator != NULL && allocator->free != NULL) + allocator->free(allocator->opaque, ptr); + else + free(ptr); + + return; +} diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c new file mode 100644 index 00000000..092047b4 --- /dev/null +++ b/src/liblzma/common/alone_decoder.c @@ -0,0 +1,197 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.c +/// \brief Decoder for LZMA_Alone files +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "alone_decoder.h" +#include "lzma_decoder.h" + + +struct lzma_coder_s { + lzma_next_coder next; + + enum { + SEQ_PROPERTIES, + SEQ_DICTIONARY_SIZE, + SEQ_UNCOMPRESSED_SIZE, + SEQ_CODER_INIT, + SEQ_CODE, + } sequence; + + size_t pos; + + lzma_options_alone options; +}; + + +static lzma_ret +alone_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + while (*out_pos < out_size + && (coder->sequence == SEQ_CODE || *in_pos < in_size)) + switch (coder->sequence) { + case SEQ_PROPERTIES: + if (lzma_lzma_decode_properties( + &coder->options.lzma, in[*in_pos])) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_DICTIONARY_SIZE; + ++*in_pos; + break; + + case SEQ_DICTIONARY_SIZE: + coder->options.lzma.dictionary_size + |= (size_t)(in[*in_pos]) << (coder->pos * 8); + + if (++coder->pos == 4) { + // A hack to ditch tons of false positives: We allow + // only dictionary sizes that are a power of two. + // LZMA_Alone didn't create other kinds of files, + // although it's not impossible that files with + // other dictionary sizes exist. Well, if someone + // complains, this will be reconsidered. + size_t count = 0; + for (size_t i = 0; i < 32; ++i) + if (coder->options.lzma.dictionary_size + & (UINT32_C(1) << i)) + ++count; + + if (count > 1) + return LZMA_DATA_ERROR; + + coder->pos = 0; + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + } + + ++*in_pos; + break; + + case SEQ_UNCOMPRESSED_SIZE: + coder->options.uncompressed_size + |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); + + if (++coder->pos == 8) { + // Another hack to ditch false positives: Assume that + // if the uncompressed size is known, it must be less + // than 256 GiB. Again, if someone complains, this + // will be reconsidered. + if (coder->options.uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN + && coder->options.uncompressed_size + >= (LZMA_VLI_C(1) << 38)) + return LZMA_DATA_ERROR; + + coder->pos = 0; + coder->sequence = SEQ_CODER_INIT; + } + + ++*in_pos; + break; + + case SEQ_CODER_INIT: { + // Two is enough because there won't be implicit filters. + lzma_filter_info filters[2] = { + { + .init = &lzma_lzma_decoder_init, + .options = &coder->options.lzma, + .uncompressed_size = coder->options + .uncompressed_size, + }, { + .init = NULL, + } + }; + + const lzma_ret ret = lzma_next_filter_init(&coder->next, + allocator, filters); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_CODE; + } + + // Fall through + + case SEQ_CODE: { + return coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + } + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +alone_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &alone_decode; + next->end = &alone_decoder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + next->coder->sequence = SEQ_PROPERTIES; + next->coder->pos = 0; + next->coder->options.lzma.dictionary_size = 0; + next->coder->options.uncompressed_size = 0; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) +{ + // We need to use _init2 because we don't pass any varadic args. + lzma_next_coder_init2(next, allocator, alone_decoder_init, + alone_decoder_init, allocator); +} + + +extern LZMA_API lzma_ret +lzma_alone_decoder(lzma_stream *strm) +{ + lzma_next_strm_init2(strm, alone_decoder_init, + alone_decoder_init, strm->allocator); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/alone_decoder.h b/src/liblzma/common/alone_decoder.h new file mode 100644 index 00000000..a9b7e84b --- /dev/null +++ b/src/liblzma/common/alone_decoder.h @@ -0,0 +1,24 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.h +/// \brief Decoder for LZMA_Alone files +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern lzma_ret lzma_alone_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator); diff --git a/src/liblzma/common/alone_encoder.c b/src/liblzma/common/alone_encoder.c new file mode 100644 index 00000000..7629aa77 --- /dev/null +++ b/src/liblzma/common/alone_encoder.c @@ -0,0 +1,167 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.c +/// \brief Decoder for LZMA_Alone files +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "lzma_encoder.h" + + +struct lzma_coder_s { + lzma_next_coder next; + + enum { + SEQ_PROPERTIES, + SEQ_DICTIONARY_SIZE, + SEQ_UNCOMPRESSED_SIZE, + SEQ_CODE, + } sequence; + + size_t pos; + + lzma_options_alone options; +}; + + +static lzma_ret +alone_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_PROPERTIES: + if (lzma_lzma_encode_properties( + &coder->options.lzma, out + *out_pos)) { + return LZMA_PROG_ERROR; + } + + coder->sequence = SEQ_DICTIONARY_SIZE; + ++*out_pos; + break; + + case SEQ_DICTIONARY_SIZE: + out[*out_pos] = coder->options.lzma.dictionary_size + >> (coder->pos * 8); + + if (++coder->pos == 4) { + coder->pos = 0; + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + } + + ++*out_pos; + break; + + case SEQ_UNCOMPRESSED_SIZE: + out[*out_pos] = coder->options.uncompressed_size + >> (coder->pos * 8); + + if (++coder->pos == 8) { + coder->pos = 0; + coder->sequence = SEQ_CODE; + } + + ++*out_pos; + break; + + case SEQ_CODE: { + return coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + } + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +// At least for now, this is not used by any internal function. +static lzma_ret +alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_alone *options) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &alone_encode; + next->end = &alone_encoder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Initialize the LZMA_Alone coder variables. + next->coder->sequence = SEQ_PROPERTIES; + next->coder->pos = 0; + next->coder->options = *options; + + // Verify uncompressed_size since the other functions assume that + // it is valid. + if (!lzma_vli_is_valid(next->coder->options.uncompressed_size)) + return LZMA_PROG_ERROR; + + // Initialize the LZMA encoder. + const lzma_filter_info filters[2] = { + { + .init = &lzma_lzma_encoder_init, + .options = &next->coder->options.lzma, + .uncompressed_size = next->coder->options + .uncompressed_size, + }, { + .init = NULL, + } + }; + + return lzma_next_filter_init(&next->coder->next, allocator, filters); +} + + +/* +extern lzma_ret +lzma_alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_alone *options) +{ + lzma_next_coder_init(alone_encoder_init, next, allocator, options); +} +*/ + + +extern LZMA_API lzma_ret +lzma_alone_encoder(lzma_stream *strm, const lzma_options_alone *options) +{ + lzma_next_strm_init(strm, alone_encoder_init, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/auto_decoder.c b/src/liblzma/common/auto_decoder.c new file mode 100644 index 00000000..7e92df9a --- /dev/null +++ b/src/liblzma/common/auto_decoder.c @@ -0,0 +1,113 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file auto_decoder.c +/// \brief Autodetect between .lzma Stream and LZMA_Alone formats +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "alone_decoder.h" + + +struct lzma_coder_s { + lzma_next_coder next; + + lzma_extra **header; + lzma_extra **footer; + bool initialized; +}; + + +static lzma_ret +auto_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + if (!coder->initialized) { + if (*in_pos >= in_size) + return LZMA_OK; + + lzma_ret ret; + + if (in[*in_pos] == 0xFF) + ret = lzma_stream_decoder_init(&coder->next, allocator, + coder->header, coder->footer); + else + ret = lzma_alone_decoder_init(&coder->next, allocator); + + if (ret != LZMA_OK) + return ret; + + coder->initialized = true; + } + + return coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, action); +} + + +static void +auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_extra **header, lzma_extra **footer) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &auto_decode; + next->end = &auto_decoder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + next->coder->header = header; + next->coder->footer = footer; + next->coder->initialized = false; + + return LZMA_OK; +} + + +/* +extern lzma_ret +lzma_auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_extra **header, lzma_extra **footer) +{ + lzma_next_coder_init( + auto_decoder_init, next, allocator, header, footer); +} +*/ + + +extern LZMA_API lzma_ret +lzma_auto_decoder(lzma_stream *strm, lzma_extra **header, lzma_extra **footer) +{ + lzma_next_strm_init(strm, auto_decoder_init, header, footer); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c new file mode 100644 index 00000000..b9dcde49 --- /dev/null +++ b/src/liblzma/common/block_decoder.c @@ -0,0 +1,405 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_decoder.c +/// \brief Decodes .lzma Blocks +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_decoder.h" +#include "block_private.h" +#include "raw_decoder.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_CODE, + SEQ_CHECK, + SEQ_UNCOMPRESSED_SIZE, + SEQ_BACKWARD_SIZE, + SEQ_PADDING, + SEQ_END, + } sequence; + + /// The filters in the chain; initialized with lzma_raw_decoder_init(). + lzma_next_coder next; + + /// Decoding options; we also write Total Size, Compressed Size, and + /// Uncompressed Size back to this structure when the encoding has + /// been finished. + lzma_options_block *options; + + /// Position in variable-length integers (and in some other places). + size_t pos; + + /// Check of the uncompressed data + lzma_check check; + + /// Total Size calculated while encoding + lzma_vli total_size; + + /// Compressed Size calculated while encoding + lzma_vli compressed_size; + + /// Uncompressed Size calculated while encoding + lzma_vli uncompressed_size; + + /// Maximum allowed total_size + lzma_vli total_limit; + + /// Maximum allowed uncompressed_size + lzma_vli uncompressed_limit; + + /// Temporary location for the Uncompressed Size and Backward Size + /// fields in Block Footer. + lzma_vli tmp; + + /// Size of the Backward Size field - This is needed so that we + /// can verify the Backward Size and still keep updating total_size. + size_t size_of_backward_size; +}; + + +static lzma_ret +update_sequence(lzma_coder *coder) +{ + switch (coder->sequence) { + case SEQ_CODE: + if (coder->options->check != LZMA_CHECK_NONE) { + lzma_check_finish(&coder->check, + coder->options->check); + coder->sequence = SEQ_CHECK; + break; + } + + // Fall through + + case SEQ_CHECK: + if (coder->options->has_uncompressed_size_in_footer) { + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + break; + } + + // Fall through + + case SEQ_UNCOMPRESSED_SIZE: + if (coder->options->has_backward_size) { + coder->sequence = SEQ_BACKWARD_SIZE; + break; + } + + // Fall through + + case SEQ_BACKWARD_SIZE: + if (coder->options->handle_padding) { + coder->sequence = SEQ_PADDING; + break; + } + + case SEQ_PADDING: + if (!is_size_valid(coder->total_size, + coder->options->total_size) + || !is_size_valid(coder->compressed_size, + coder->options->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->options->uncompressed_size)) + return LZMA_DATA_ERROR; + + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->total_size = coder->total_size; + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +block_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Special case when the Block has only Block Header. + if (coder->sequence == SEQ_END) + return LZMA_STREAM_END; + + // FIXME: Termination condition should work but could be cleaner. + while (*out_pos < out_size && (*in_pos < in_size + || coder->sequence == SEQ_CODE)) + switch (coder->sequence) { + case SEQ_CODE: { + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + const size_t in_used = *in_pos - in_start; + const size_t out_used = *out_pos - out_start; + + if (update_size(&coder->total_size, in_used, + coder->total_limit) + || update_size(&coder->compressed_size, + in_used, + coder->options->compressed_size) + || update_size(&coder->uncompressed_size, + out_used, coder->uncompressed_limit)) + return LZMA_DATA_ERROR; + + lzma_check_update(&coder->check, coder->options->check, + out + out_start, out_used); + + if (ret != LZMA_STREAM_END) + return ret; + + ret = update_sequence(coder); + if (ret != LZMA_OK) + return ret; + + break; + } + + case SEQ_CHECK: + switch (coder->options->check) { + case LZMA_CHECK_CRC32: + if (((coder->check.crc32 >> (coder->pos * 8)) + & 0xFF) != in[*in_pos]) + return LZMA_DATA_ERROR; + break; + + case LZMA_CHECK_CRC64: + if (((coder->check.crc64 >> (coder->pos * 8)) + & 0xFF) != in[*in_pos]) + return LZMA_DATA_ERROR; + break; + + case LZMA_CHECK_SHA256: + if (coder->check.sha256.buffer[coder->pos] + != in[*in_pos]) + return LZMA_DATA_ERROR; + break; + + default: + assert(coder->options->check != LZMA_CHECK_NONE); + assert(coder->options->check <= LZMA_CHECK_ID_MAX); + break; + } + + if (update_size(&coder->total_size, 1, coder->total_limit)) + return LZMA_DATA_ERROR; + + ++*in_pos; + + if (++coder->pos == lzma_check_sizes[coder->options->check]) { + const lzma_ret ret = update_sequence(coder); + if (ret != LZMA_OK) + return ret; + + coder->pos = 0; + } + + break; + + case SEQ_UNCOMPRESSED_SIZE: { + const size_t in_start = *in_pos; + + lzma_ret ret = lzma_vli_decode(&coder->tmp, + &coder->pos, in, in_pos, in_size); + + if (update_size(&coder->total_size, *in_pos - in_start, + coder->total_limit)) + return LZMA_DATA_ERROR; + + if (ret != LZMA_STREAM_END) + return ret; + + if (coder->tmp != coder->uncompressed_size) + return LZMA_DATA_ERROR; + + coder->pos = 0; + coder->tmp = 0; + + ret = update_sequence(coder); + if (ret != LZMA_OK) + return ret; + + break; + } + + case SEQ_BACKWARD_SIZE: { + const size_t in_start = *in_pos; + + lzma_ret ret = lzma_vli_decode(&coder->tmp, + &coder->pos, in, in_pos, in_size); + + const size_t in_used = *in_pos - in_start; + + if (update_size(&coder->total_size, in_used, + coder->total_limit)) + return LZMA_DATA_ERROR; + + coder->size_of_backward_size += in_used; + + if (ret != LZMA_STREAM_END) + return ret; + + if (coder->tmp != coder->total_size + - coder->size_of_backward_size) + return LZMA_DATA_ERROR; + + ret = update_sequence(coder); + if (ret != LZMA_OK) + return ret; + + break; + } + + case SEQ_PADDING: + if (in[*in_pos] == 0x00) { + if (update_size(&coder->total_size, 1, + coder->total_limit)) + return LZMA_DATA_ERROR; + + ++*in_pos; + break; + } + + return update_sequence(coder); + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +block_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_options_block *options) +{ + // This is pretty similar to lzma_block_encoder_init(). + // See comments there. + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &block_decode; + next->end = &block_decoder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + if (!lzma_vli_is_valid(options->total_size) + || !lzma_vli_is_valid(options->compressed_size) + || !lzma_vli_is_valid(options->uncompressed_size) + || !lzma_vli_is_valid(options->total_size) + || !lzma_vli_is_valid(options->total_limit) + || !lzma_vli_is_valid(options->uncompressed_limit) + || (options->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN + && options->uncompressed_size + > options->uncompressed_limit) + || (options->total_size != LZMA_VLI_VALUE_UNKNOWN + && options->total_size + > options->total_limit) + || (!options->has_eopm && options->uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN) + || options->header_size > options->total_size + || (options->handle_padding + && (options->has_uncompressed_size_in_footer + || options->has_backward_size))) + return LZMA_PROG_ERROR; + + { + const lzma_ret ret = lzma_check_init( + &next->coder->check, options->check); + if (ret != LZMA_OK) + return ret; + } + + if (!options->has_eopm && options->uncompressed_size == 0) { + if (!is_size_valid(0, options->compressed_size)) + return LZMA_PROG_ERROR; + + if (options->check != LZMA_CHECK_NONE) { + lzma_check_finish(&next->coder->check, options->check); + next->coder->sequence = SEQ_CHECK; + } else if (options->handle_padding) { + next->coder->sequence = SEQ_PADDING; + } else { + next->coder->sequence = SEQ_END; + } + } else { + next->coder->sequence = SEQ_CODE; + } + + { + const lzma_ret ret = lzma_raw_decoder_init( + &next->coder->next, allocator, + options->filters, options->has_eopm + ? LZMA_VLI_VALUE_UNKNOWN + : options->uncompressed_size, + true); + if (ret != LZMA_OK) + return ret; + } + + next->coder->options = options; + next->coder->pos = 0; + next->coder->total_size = options->header_size; + next->coder->compressed_size = 0; + next->coder->uncompressed_size = 0; + next->coder->total_limit + = MIN(options->total_size, options->total_limit); + next->coder->uncompressed_limit = MIN(options->uncompressed_size, + options->uncompressed_limit); + next->coder->tmp = 0; + next->coder->size_of_backward_size = 0; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_block_decoder(lzma_stream *strm, lzma_options_block *options) +{ + lzma_next_strm_init(strm, lzma_block_decoder_init, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_decoder.h b/src/liblzma/common/block_decoder.h new file mode 100644 index 00000000..af71128d --- /dev/null +++ b/src/liblzma/common/block_decoder.h @@ -0,0 +1,29 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_decoder.h +/// \brief Decodes .lzma Blocks +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_DECODER_H +#define LZMA_BLOCK_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_block_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_options_block *options); + +#endif diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c new file mode 100644 index 00000000..77ff78ea --- /dev/null +++ b/src/liblzma/common/block_encoder.c @@ -0,0 +1,375 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_encoder.c +/// \brief Encodes .lzma Blocks +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_encoder.h" +#include "block_private.h" +#include "raw_encoder.h" +#include "check.h" + + +struct lzma_coder_s { + /// The filters in the chain; initialized with lzma_raw_decoder_init(). + lzma_next_coder next; + + /// Encoding options; we also write Total Size, Compressed Size, and + /// Uncompressed Size back to this structure when the encoding has + /// been finished. + lzma_options_block *options; + + enum { + SEQ_CODE, + SEQ_CHECK_FINISH, + SEQ_CHECK_COPY, + SEQ_UNCOMPRESSED_SIZE, + SEQ_BACKWARD_SIZE, + SEQ_PADDING, + } sequence; + + /// Position in .header and .check. + size_t pos; + + /// Check of the uncompressed data + lzma_check check; + + /// Total Size calculated while encoding + lzma_vli total_size; + + /// Compressed Size calculated while encoding + lzma_vli compressed_size; + + /// Uncompressed Size calculated while encoding + lzma_vli uncompressed_size; + + /// Maximum allowed total_size + lzma_vli total_limit; + + /// Maximum allowed uncompressed_size + lzma_vli uncompressed_limit; + + /// Backward Size - This is a copy of total_size right before + /// the Backward Size field. + lzma_vli backward_size; +}; + + +static lzma_ret +block_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Check that our amount of input stays in proper limits. + if (coder->options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + if (action == LZMA_FINISH) { + if (coder->options->uncompressed_size + - coder->uncompressed_size + != (lzma_vli)(in_size - *in_pos)) + return LZMA_DATA_ERROR; + } else { + if (coder->options->uncompressed_size + - coder->uncompressed_size + < (lzma_vli)(in_size - *in_pos)) + return LZMA_DATA_ERROR; + } + } else if (LZMA_VLI_VALUE_MAX - coder->uncompressed_size + < (lzma_vli)(in_size - *in_pos)) { + return LZMA_DATA_ERROR; + } + + // Main loop + while (*out_pos < out_size + && (*in_pos < in_size || action == LZMA_FINISH)) + switch (coder->sequence) { + case SEQ_CODE: { + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + const size_t in_used = *in_pos - in_start; + const size_t out_used = *out_pos - out_start; + + if (update_size(&coder->total_size, out_used, + coder->total_limit) + || update_size(&coder->compressed_size, + out_used, + coder->options->compressed_size)) + return LZMA_DATA_ERROR; + + // No need to check for overflow because we have already + // checked it at the beginning of this function. + coder->uncompressed_size += in_used; + + lzma_check_update(&coder->check, coder->options->check, + in + in_start, in_used); + + if (ret != LZMA_STREAM_END) + return ret; + + assert(*in_pos == in_size); + + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values give to us. + if (!is_size_valid(coder->compressed_size, + coder->options->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->options->uncompressed_size)) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_CHECK_FINISH; + break; + } + + case SEQ_CHECK_FINISH: + if (coder->options->check == LZMA_CHECK_NONE) { + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + break; + } + + lzma_check_finish(&coder->check, coder->options->check); + coder->sequence = SEQ_CHECK_COPY; + + // Fall through + + case SEQ_CHECK_COPY: + assert(lzma_check_sizes[coder->options->check] > 0); + + switch (coder->options->check) { + case LZMA_CHECK_CRC32: + out[*out_pos] = coder->check.crc32 >> (coder->pos * 8); + break; + + case LZMA_CHECK_CRC64: + out[*out_pos] = coder->check.crc64 >> (coder->pos * 8); + break; + + case LZMA_CHECK_SHA256: + out[*out_pos] = coder->check.sha256.buffer[coder->pos]; + break; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + ++*out_pos; + + if (update_size(&coder->total_size, 1, coder->total_limit)) + return LZMA_DATA_ERROR; + + if (++coder->pos == lzma_check_sizes[coder->options->check]) { + coder->pos = 0; + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + } + + break; + + case SEQ_UNCOMPRESSED_SIZE: + if (coder->options->has_uncompressed_size_in_footer) { + const size_t out_start = *out_pos; + + const lzma_ret ret = lzma_vli_encode( + coder->uncompressed_size, + &coder->pos, 1, + out, out_pos, out_size); + + // Updating the size this way instead of doing in a + // single chunk using lzma_vli_size(), because this + // way we detect when exactly we are going out of + // our limits. + if (update_size(&coder->total_size, + *out_pos - out_start, + coder->total_limit)) + return LZMA_DATA_ERROR; + + if (ret != LZMA_STREAM_END) + return ret; + + coder->pos = 0; + } + + coder->backward_size = coder->total_size; + coder->sequence = SEQ_BACKWARD_SIZE; + break; + + case SEQ_BACKWARD_SIZE: + if (coder->options->has_backward_size) { + const size_t out_start = *out_pos; + + const lzma_ret ret = lzma_vli_encode( + coder->backward_size, &coder->pos, 1, + out, out_pos, out_size); + + if (update_size(&coder->total_size, + *out_pos - out_start, + coder->total_limit)) + return LZMA_DATA_ERROR; + + if (ret != LZMA_STREAM_END) + return ret; + } + + coder->sequence = SEQ_PADDING; + break; + + case SEQ_PADDING: + if (coder->options->handle_padding) { + assert(!coder->options + ->has_uncompressed_size_in_footer); + assert(!coder->options->has_backward_size); + assert(coder->options->total_size != LZMA_VLI_VALUE_UNKNOWN); + + if (coder->total_size < coder->options->total_size) { + out[*out_pos] = 0x00; + ++*out_pos; + + if (update_size(&coder->total_size, 1, + coder->total_limit)) + return LZMA_DATA_ERROR; + + break; + } + } + + // Now also Total Size is known. Verify it. + if (!is_size_valid(coder->total_size, + coder->options->total_size)) + return LZMA_DATA_ERROR; + + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->total_size = coder->total_size; + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; + + return LZMA_STREAM_END; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +block_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_options_block *options) +{ + // Validate some options. + if (options == NULL + || !lzma_vli_is_valid(options->total_size) + || !lzma_vli_is_valid(options->compressed_size) + || !lzma_vli_is_valid(options->uncompressed_size) + || !lzma_vli_is_valid(options->total_size) + || !lzma_vli_is_valid(options->total_limit) + || !lzma_vli_is_valid(options->uncompressed_limit) + || (options->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN + && options->uncompressed_size + > options->uncompressed_limit) + || (options->total_size != LZMA_VLI_VALUE_UNKNOWN + && options->total_size + > options->total_limit) + || (!options->has_eopm && options->uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN) + || (options->handle_padding && (options->total_size + == LZMA_VLI_VALUE_UNKNOWN + || options->has_uncompressed_size_in_footer + || options->has_backward_size)) + || options->header_size > options->total_size) + return LZMA_PROG_ERROR; + + // Allocate and initialize *next->coder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &block_encode; + next->end = &block_encoder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Initialize the check. + return_if_error(lzma_check_init(&next->coder->check, options->check)); + + // If End of Payload Marker is not used and Uncompressed Size is zero, + // Compressed Data is empty. That is, we don't call the encoder at all. + // We initialize it though; it allows detecting invalid options. + if (!options->has_eopm && options->uncompressed_size == 0) { + // Also Compressed Size must also be zero if it has been + // given to us. + if (!is_size_valid(options->compressed_size, 0)) + return LZMA_PROG_ERROR; + + next->coder->sequence = SEQ_CHECK_FINISH; + } else { + next->coder->sequence = SEQ_CODE; + } + + // Other initializations + next->coder->options = options; + next->coder->pos = 0; + next->coder->total_size = options->header_size; + next->coder->compressed_size = 0; + next->coder->uncompressed_size = 0; + next->coder->total_limit + = MIN(options->total_size, options->total_limit); + next->coder->uncompressed_limit = MIN(options->uncompressed_size, + options->uncompressed_limit); + + // Initialize the requested filters. + return lzma_raw_encoder_init(&next->coder->next, allocator, + options->filters, options->has_eopm + ? LZMA_VLI_VALUE_UNKNOWN + : options->uncompressed_size, + true); +} + + +extern lzma_ret +lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_options_block *options) +{ + lzma_next_coder_init(block_encoder_init, next, allocator, options); +} + + +extern LZMA_API lzma_ret +lzma_block_encoder(lzma_stream *strm, lzma_options_block *options) +{ + lzma_next_strm_init(strm, block_encoder_init, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_encoder.h b/src/liblzma/common/block_encoder.h new file mode 100644 index 00000000..eafcc618 --- /dev/null +++ b/src/liblzma/common/block_encoder.h @@ -0,0 +1,29 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_encoder.h +/// \brief Encodes .lzma Blocks +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_ENCODER_H +#define LZMA_BLOCK_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_block_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_options_block *options); + +#endif diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c new file mode 100644 index 00000000..7676c795 --- /dev/null +++ b/src/liblzma/common/block_header_decoder.c @@ -0,0 +1,373 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header_decoder.c +/// \brief Decodes Block Header from .lzma files +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "check.h" + + +struct lzma_coder_s { + lzma_options_block *options; + + enum { + SEQ_FLAGS_1, + SEQ_FLAGS_2, + SEQ_COMPRESSED_SIZE, + SEQ_UNCOMPRESSED_SIZE, + SEQ_FILTER_FLAGS_INIT, + SEQ_FILTER_FLAGS_DECODE, + SEQ_CRC32, + SEQ_PADDING + } sequence; + + /// Position in variable-length integers + size_t pos; + + /// CRC32 of the Block Header + uint32_t crc32; + + lzma_next_coder filter_flags_decoder; +}; + + +static bool +update_sequence(lzma_coder *coder) +{ + switch (coder->sequence) { + case SEQ_FLAGS_2: + if (coder->options->compressed_size + != LZMA_VLI_VALUE_UNKNOWN) { + coder->pos = 0; + coder->sequence = SEQ_COMPRESSED_SIZE; + break; + } + + // Fall through + + case SEQ_COMPRESSED_SIZE: + if (coder->options->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN) { + coder->pos = 0; + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + break; + } + + // Fall through + + case SEQ_UNCOMPRESSED_SIZE: + coder->pos = 0; + + // Fall through + + case SEQ_FILTER_FLAGS_DECODE: + if (coder->options->filters[coder->pos].id + != LZMA_VLI_VALUE_UNKNOWN) { + coder->sequence = SEQ_FILTER_FLAGS_INIT; + break; + } + + if (coder->options->has_crc32) { + coder->pos = 0; + coder->sequence = SEQ_CRC32; + break; + } + + case SEQ_CRC32: + if (coder->options->padding != 0) { + coder->pos = 0; + coder->sequence = SEQ_PADDING; + break; + } + + return true; + + default: + assert(0); + return true; + } + + return false; +} + + +static lzma_ret +block_header_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_FLAGS_1: + // Check that the reserved bit is unset. Use HEADER_ERROR + // because newer version of liblzma may support the reserved + // bit, although it is likely that this is just a broken file. + if (in[*in_pos] & 0x40) + return LZMA_HEADER_ERROR; + + // Number of filters: we prepare appropriate amount of + // variables for variable-length integer parsing. The + // initialization function has already reset the rest + // of the values to LZMA_VLI_VALUE_UNKNOWN, which allows + // us to later know how many filters there are. + for (int i = (int)(in[*in_pos] & 0x07) - 1; i >= 0; --i) + coder->options->filters[i].id = 0; + + // End of Payload Marker flag + coder->options->has_eopm = (in[*in_pos] & 0x08) != 0; + + // Compressed Size: Prepare for variable-length integer + // parsing if it is known. + if (in[*in_pos] & 0x10) + coder->options->compressed_size = 0; + + // Uncompressed Size: the same. + if (in[*in_pos] & 0x20) + coder->options->uncompressed_size = 0; + + // Is Metadata Block flag + coder->options->is_metadata = (in[*in_pos] & 0x80) != 0; + + // We need at least one: Uncompressed Size or EOPM. + if (coder->options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN + && !coder->options->has_eopm) + return LZMA_DATA_ERROR; + + // Update header CRC32. + coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32); + + ++*in_pos; + coder->sequence = SEQ_FLAGS_2; + break; + + case SEQ_FLAGS_2: + // Check that the reserved bits are unset. + if (in[*in_pos] & 0xE0) + return LZMA_DATA_ERROR; + + // Get the size of Header Padding. + coder->options->padding = in[*in_pos] & 0x1F; + + coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32); + + ++*in_pos; + + if (update_sequence(coder)) + return LZMA_STREAM_END; + + break; + + case SEQ_COMPRESSED_SIZE: { + // Store the old input position to be used when + // updating coder->header_crc32. + const size_t in_start = *in_pos; + + const lzma_ret ret = lzma_vli_decode( + &coder->options->compressed_size, + &coder->pos, in, in_pos, in_size); + + const size_t in_used = *in_pos - in_start; + + coder->options->compressed_reserve += in_used; + assert(coder->options->compressed_reserve + <= LZMA_VLI_BYTES_MAX); + + coder->options->header_size += in_used; + + coder->crc32 = lzma_crc32(in + in_start, in_used, + coder->crc32); + + if (ret != LZMA_STREAM_END) + return ret; + + if (update_sequence(coder)) + return LZMA_STREAM_END; + + break; + } + + case SEQ_UNCOMPRESSED_SIZE: { + const size_t in_start = *in_pos; + + const lzma_ret ret = lzma_vli_decode( + &coder->options->uncompressed_size, + &coder->pos, in, in_pos, in_size); + + const size_t in_used = *in_pos - in_start; + + coder->options->uncompressed_reserve += in_used; + assert(coder->options->uncompressed_reserve + <= LZMA_VLI_BYTES_MAX); + + coder->options->header_size += in_used; + + coder->crc32 = lzma_crc32(in + in_start, in_used, + coder->crc32); + + if (ret != LZMA_STREAM_END) + return ret; + + if (update_sequence(coder)) + return LZMA_STREAM_END; + + break; + } + + case SEQ_FILTER_FLAGS_INIT: { + assert(coder->options->filters[coder->pos].id + != LZMA_VLI_VALUE_UNKNOWN); + + const lzma_ret ret = lzma_filter_flags_decoder_init( + &coder->filter_flags_decoder, allocator, + &coder->options->filters[coder->pos]); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_FILTER_FLAGS_DECODE; + } + + // Fall through + + case SEQ_FILTER_FLAGS_DECODE: { + const size_t in_start = *in_pos; + + const lzma_ret ret = coder->filter_flags_decoder.code( + coder->filter_flags_decoder.coder, + allocator, in, in_pos, in_size, + NULL, NULL, 0, LZMA_RUN); + + const size_t in_used = *in_pos - in_start; + coder->options->header_size += in_used; + coder->crc32 = lzma_crc32(in + in_start, + in_used, coder->crc32); + + if (ret != LZMA_STREAM_END) + return ret; + + ++coder->pos; + + if (update_sequence(coder)) + return LZMA_STREAM_END; + + break; + } + + case SEQ_CRC32: + assert(coder->options->has_crc32); + + if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF)) + return LZMA_DATA_ERROR; + + ++*in_pos; + ++coder->pos; + + // Check if we reached end of the CRC32 field. + if (coder->pos == 4) { + coder->options->header_size += 4; + + if (update_sequence(coder)) + return LZMA_STREAM_END; + } + + break; + + case SEQ_PADDING: + if (in[*in_pos] != 0x00) + return LZMA_DATA_ERROR; + + ++*in_pos; + ++coder->options->header_size; + ++coder->pos; + + if (coder->pos < (size_t)(coder->options->padding)) + break; + + return LZMA_STREAM_END; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +block_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->filter_flags_decoder, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_block_header_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_options_block *options) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &block_header_decode; + next->end = &block_header_decoder_end; + next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT; + } + + // Assume that Compressed Size and Uncompressed Size are unknown. + options->compressed_size = LZMA_VLI_VALUE_UNKNOWN; + options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + + // We will calculate the sizes of these fields too so that the + // application may rewrite the header if it wishes so. + options->compressed_reserve = 0; + options->uncompressed_reserve = 0; + + // The Block Flags field is always present, so include its size here + // and we don't need to worry about it in block_header_decode(). + options->header_size = 2; + + // Reset filters[] to indicate empty list of filters. + // See SEQ_FLAGS_1 in block_header_decode() for reasoning of this. + for (size_t i = 0; i < 8; ++i) { + options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; + options->filters[i].options = NULL; + } + + next->coder->options = options; + next->coder->sequence = SEQ_FLAGS_1; + next->coder->pos = 0; + next->coder->crc32 = 0; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_block_header_decoder(lzma_stream *strm, + lzma_options_block *options) +{ + lzma_next_strm_init(strm, lzma_block_header_decoder_init, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c new file mode 100644 index 00000000..594b4fc0 --- /dev/null +++ b/src/liblzma/common/block_header_encoder.c @@ -0,0 +1,211 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header_encoder.c +/// \brief Encodes Block Header for .lzma files +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "check.h" + + +extern LZMA_API lzma_ret +lzma_block_header_size(lzma_options_block *options) +{ + // Block Flags take two bytes. + size_t size = 2; + + // Compressed Size + if (!lzma_vli_is_valid(options->compressed_size)) { + return LZMA_PROG_ERROR; + + } else if (options->compressed_reserve != 0) { + // Make sure that the known Compressed Size fits into the + // reserved space. Note that lzma_vli_size() will return zero + // if options->compressed_size is LZMA_VLI_VALUE_UNKNOWN, so + // we don't need to handle that special case separately. + if (options->compressed_reserve > LZMA_VLI_BYTES_MAX + || lzma_vli_size(options->compressed_size) + > (size_t)(options->compressed_reserve)) + return LZMA_PROG_ERROR; + + size += options->compressed_reserve; + + } else if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { + // Compressed Size is known. We have already checked + // that is is a valid VLI, and since it isn't + // LZMA_VLI_VALUE_UNKNOWN, we can be sure that + // lzma_vli_size() will succeed. + size += lzma_vli_size(options->compressed_size); + } + + // Uncompressed Size + if (!lzma_vli_is_valid(options->uncompressed_size)) { + return LZMA_PROG_ERROR; + + } else if (options->uncompressed_reserve != 0) { + if (options->uncompressed_reserve > LZMA_VLI_BYTES_MAX + || lzma_vli_size(options->uncompressed_size) + > (size_t)(options->uncompressed_reserve)) + return LZMA_PROG_ERROR; + + size += options->uncompressed_reserve; + + } else if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + size += lzma_vli_size(options->uncompressed_size); + } + + // List of Filter Flags + for (size_t i = 0; options->filters[i].id != LZMA_VLI_VALUE_UNKNOWN; + ++i) { + // Don't allow too many filters. + if (i == 7) + return LZMA_PROG_ERROR; + + uint32_t tmp; + const lzma_ret ret = lzma_filter_flags_size(&tmp, + options->filters + i); + if (ret != LZMA_OK) + return ret; + + size += tmp; + } + + // CRC32 + if (options->has_crc32) + size += 4; + + // Padding + int32_t padding; + if (options->padding == LZMA_BLOCK_HEADER_PADDING_AUTO) { + const uint32_t preferred = lzma_alignment_output( + options->filters, 1); + const uint32_t unaligned = size + options->alignment; + padding = (int32_t)(unaligned % preferred); + if (padding != 0) + padding = preferred - padding; + } else if (options->padding >= LZMA_BLOCK_HEADER_PADDING_MIN + && options->padding <= LZMA_BLOCK_HEADER_PADDING_MAX) { + padding = options->padding; + } else { + return LZMA_PROG_ERROR; + } + + // All success. Copy the calculated values to the options structure. + options->padding = padding; + options->header_size = size + (size_t)(padding); + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_block_header_encode(uint8_t *out, const lzma_options_block *options) +{ + // We write the Block Flags later. + if (options->header_size < 2) + return LZMA_PROG_ERROR; + + const size_t out_size = options->header_size; + size_t out_pos = 2; + + // Compressed Size + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN + || options->compressed_reserve != 0) { + const lzma_vli size = options->compressed_size + != LZMA_VLI_VALUE_UNKNOWN + ? options->compressed_size : 0; + size_t vli_pos = 0; + if (lzma_vli_encode( + size, &vli_pos, options->compressed_reserve, + out, &out_pos, out_size) != LZMA_STREAM_END) + return LZMA_PROG_ERROR; + + } + + // Uncompressed Size + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN + || options->uncompressed_reserve != 0) { + const lzma_vli size = options->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN + ? options->uncompressed_size : 0; + size_t vli_pos = 0; + if (lzma_vli_encode( + size, &vli_pos, options->uncompressed_reserve, + out, &out_pos, out_size) != LZMA_STREAM_END) + return LZMA_PROG_ERROR; + + } + + // Filter Flags + size_t filter_count; + for (filter_count = 0; options->filters[filter_count].id + != LZMA_VLI_VALUE_UNKNOWN; ++filter_count) { + // There can be at maximum of seven filters. + if (filter_count == 7) + return LZMA_PROG_ERROR; + + const lzma_ret ret = lzma_filter_flags_encode(out, &out_pos, + out_size, options->filters + filter_count); + // FIXME: Don't return LZMA_BUF_ERROR. + if (ret != LZMA_OK) + return ret; + } + + // Block Flags 1 + out[0] = filter_count; + + if (options->has_eopm) + out[0] |= 0x08; + else if (options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_PROG_ERROR; + + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN + || options->compressed_reserve != 0) + out[0] |= 0x10; + + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN + || options->uncompressed_reserve != 0) + out[0] |= 0x20; + + if (options->is_metadata) + out[0] |= 0x80; + + // Block Flags 2 + if (options->padding < LZMA_BLOCK_HEADER_PADDING_MIN + || options->padding > LZMA_BLOCK_HEADER_PADDING_MAX) + return LZMA_PROG_ERROR; + + out[1] = (uint8_t)(options->padding); + + // CRC32 + if (options->has_crc32) { + if (out_size - out_pos < 4) + return LZMA_PROG_ERROR; + + const uint32_t crc = lzma_crc32(out, out_pos, 0); + for (size_t i = 0; i < 4; ++i) + out[out_pos++] = crc >> (i * 8); + } + + // Padding - the amount of available space must now match with + // the size of the Padding field. + if (out_size - out_pos != (size_t)(options->padding)) + return LZMA_PROG_ERROR; + + memzero(out + out_pos, (size_t)(options->padding)); + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_private.h b/src/liblzma/common/block_private.h new file mode 100644 index 00000000..8e2db319 --- /dev/null +++ b/src/liblzma/common/block_private.h @@ -0,0 +1,46 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_private.h +/// \brief Common stuff for Block encoder and decoder +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_COMMON_H +#define LZMA_BLOCK_COMMON_H + +#include "common.h" + +static inline bool +update_size(lzma_vli *size, lzma_vli add, lzma_vli limit) +{ + if (limit > LZMA_VLI_VALUE_MAX) + limit = LZMA_VLI_VALUE_MAX; + + if (limit < *size || limit - *size < add) + return true; + + *size += add; + + return false; +} + + +static inline bool +is_size_valid(lzma_vli size, lzma_vli reference) +{ + return reference == LZMA_VLI_VALUE_UNKNOWN || reference == size; +} + +#endif diff --git a/src/liblzma/common/chunk_size.c b/src/liblzma/common/chunk_size.c new file mode 100644 index 00000000..042201d2 --- /dev/null +++ b/src/liblzma/common/chunk_size.c @@ -0,0 +1,74 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file chunk_size.c +/// \brief Finds out the minimal reasonable chunk size for a filter chain +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +/** + * \brief Finds out the minimal reasonable chunk size for a filter chain + * + * This function helps determining the Uncompressed Sizes of the Blocks when + * doing multi-threaded encoding. + * + * When compressing a large file on a system having multiple CPUs or CPU + * cores, the file can be splitted in smaller chunks, that are compressed + * independently into separate Blocks in the same .lzma Stream. + * + * \return Minimum reasonable Uncompressed Size of a Block. The + * recommended minimum Uncompressed Size is between this value + * and the value times two. + + Zero if the Uncompressed Sizes of Blocks don't matter + */ +extern LZMA_API size_t +lzma_chunk_size(const lzma_options_filter *filters) +{ + while (filters->id != LZMA_VLI_VALUE_UNKNOWN) { + switch (filters->id) { + // TODO LZMA_FILTER_SPARSE + + case LZMA_FILTER_COPY: + case LZMA_FILTER_SUBBLOCK: + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + // These are very fast, thus there is no point in + // splitting the data in smaller blocks. + break; + + case LZMA_FILTER_LZMA: + // The block sizes of the possible next filters in + // the chain are irrelevant after the LZMA filter. + return ((lzma_options_lzma *)(filters->options)) + ->dictionary_size; + + default: + // Unknown filters + return 0; + } + + ++filters; + } + + // Indicate that splitting would be useless. + return SIZE_MAX; +} diff --git a/src/liblzma/common/code.c b/src/liblzma/common/code.c new file mode 100644 index 00000000..0e3929b6 --- /dev/null +++ b/src/liblzma/common/code.c @@ -0,0 +1,203 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file code.c +/// \brief zlib-like API wrapper for liblzma's internal API +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +LZMA_API const lzma_stream LZMA_STREAM_INIT_VAR = { + .next_in = NULL, + .avail_in = 0, + .total_in = 0, + .next_out = NULL, + .avail_out = 0, + .total_out = 0, + .allocator = NULL, + .internal = NULL, +}; + + +extern lzma_ret +lzma_strm_init(lzma_stream *strm) +{ + if (strm == NULL) + return LZMA_PROG_ERROR; + + if (strm->internal == NULL) { + strm->internal = lzma_alloc(sizeof(lzma_internal), + strm->allocator); + if (strm->internal == NULL) + return LZMA_MEM_ERROR; + + strm->internal->next = LZMA_NEXT_CODER_INIT; + } + + strm->internal->supported_actions[LZMA_RUN] = false; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = false; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = false; + strm->internal->supported_actions[LZMA_FINISH] = false; + strm->internal->sequence = ISEQ_RUN; + + strm->total_in = 0; + strm->total_out = 0; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_code(lzma_stream *strm, lzma_action action) +{ + // Sanity checks + if ((strm->next_in == NULL && strm->avail_in != 0) + || (strm->next_out == NULL && strm->avail_out != 0) + || strm->internal == NULL + || strm->internal->next.code == NULL + || (unsigned int)(action) > LZMA_FINISH + || !strm->internal->supported_actions[action]) + return LZMA_PROG_ERROR; + + switch (strm->internal->sequence) { + case ISEQ_RUN: + switch (action) { + case LZMA_RUN: + break; + + case LZMA_SYNC_FLUSH: + strm->internal->sequence = ISEQ_SYNC_FLUSH; + break; + + case LZMA_FULL_FLUSH: + strm->internal->sequence = ISEQ_FULL_FLUSH; + break; + + case LZMA_FINISH: + strm->internal->sequence = ISEQ_FINISH; + break; + } + + break; + + case ISEQ_SYNC_FLUSH: + if (action != LZMA_SYNC_FLUSH) + return LZMA_PROG_ERROR; + + // Check that application doesn't change avail_in once + // LZMA_SYNC_FLUSH has been used. + if (strm->internal->avail_in != strm->avail_in) + return LZMA_DATA_ERROR; + + break; + + case ISEQ_FULL_FLUSH: + if (action != LZMA_FULL_FLUSH) + return LZMA_PROG_ERROR; + + // Check that application doesn't change avail_in once + // LZMA_FULL_FLUSH has been used. + if (strm->internal->avail_in != strm->avail_in) + return LZMA_DATA_ERROR; + + break; + + case ISEQ_FINISH: + if (action != LZMA_FINISH) + return LZMA_PROG_ERROR; + + if (strm->internal->avail_in != strm->avail_in) + return LZMA_DATA_ERROR; + + break; + + case ISEQ_END: + return LZMA_STREAM_END; + + case ISEQ_ERROR: + default: + return LZMA_PROG_ERROR; + } + + size_t in_pos = 0; + size_t out_pos = 0; + lzma_ret ret = strm->internal->next.code( + strm->internal->next.coder, strm->allocator, + strm->next_in, &in_pos, strm->avail_in, + strm->next_out, &out_pos, strm->avail_out, action); + + strm->next_in += in_pos; + strm->avail_in -= in_pos; + strm->total_in += in_pos; + + strm->next_out += out_pos; + strm->avail_out -= out_pos; + strm->total_out += out_pos; + + strm->internal->avail_in = strm->avail_in; + + switch (ret) { + case LZMA_OK: + // Don't return LZMA_BUF_ERROR when it happens the first time. + // This is to avoid returning LZMA_BUF_ERROR when avail_out + // was zero but still there was no more data left to written + // to next_out. + if (out_pos == 0 && in_pos == 0) { + if (strm->internal->allow_buf_error) + ret = LZMA_BUF_ERROR; + else + strm->internal->allow_buf_error = true; + } else { + strm->internal->allow_buf_error = false; + } + break; + + case LZMA_STREAM_END: + if (strm->internal->sequence == ISEQ_SYNC_FLUSH + || strm->internal->sequence == ISEQ_FULL_FLUSH) + strm->internal->sequence = ISEQ_RUN; + else + strm->internal->sequence = ISEQ_END; + break; + + case LZMA_UNSUPPORTED_CHECK: + strm->internal->allow_buf_error = false; + break; + + default: + // All the other errors are fatal; coding cannot be continued. + strm->internal->sequence = ISEQ_ERROR; + break; + } + + return ret; +} + + +extern LZMA_API void +lzma_end(lzma_stream *strm) +{ + if (strm != NULL && strm->internal != NULL) { + if (strm->internal->next.end != NULL) + strm->internal->next.end(strm->internal->next.coder, + strm->allocator); + + lzma_free(strm->internal, strm->allocator); + strm->internal = NULL; + } + + return; +} diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h new file mode 100644 index 00000000..ca9c2f23 --- /dev/null +++ b/src/liblzma/common/common.h @@ -0,0 +1,271 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file common.h +/// \brief Definitions common to the whole liblzma library +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_COMMON_H +#define LZMA_COMMON_H + +#include "sysdefs.h" + +// Don't use ifdef... +#if HAVE_VISIBILITY +# define LZMA_API __attribute__((__visibility__("default"))) +#else +# define LZMA_API +#endif + + +/// Size of temporary buffers needed in some filters +#define LZMA_BUFFER_SIZE 4096 + + +/// Internal helper filter used by Subblock decoder. It is mapped to an +/// otherwise invalid Filter ID, which is impossible to get from any input +/// file (even if malicious file). +#define LZMA_FILTER_SUBBLOCK_HELPER (UINT64_MAX - 2) + + +/////////// +// Types // +/////////// + +typedef struct lzma_coder_s lzma_coder; + +typedef struct lzma_next_coder_s lzma_next_coder; + +typedef struct lzma_filter_info_s lzma_filter_info; + + +typedef lzma_ret (*lzma_init_function)( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +typedef lzma_ret (*lzma_code_function)( + lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action); + +typedef void (*lzma_end_function)( + lzma_coder *coder, lzma_allocator *allocator); + + +/// Hold data and function pointers of the next filter in the chain. +struct lzma_next_coder_s { + /// Pointer to coder-specific data + lzma_coder *coder; + + /// "Pointer" to init function. This is never called here. + /// We need only to detect if we are initializing a coder + /// that was allocated earlier. See code.c and next_coder.c. + uintptr_t init; + + /// Pointer to function to do the actual coding + lzma_code_function code; + + /// Pointer to function to free lzma_next_coder.coder + lzma_end_function end; +}; + +#define LZMA_NEXT_CODER_INIT \ + (lzma_next_coder){ \ + .coder = NULL, \ + .init = 0, \ + .code = NULL, \ + .end = NULL, \ + } + + +struct lzma_internal_s { + lzma_next_coder next; + + enum { + ISEQ_RUN, + ISEQ_SYNC_FLUSH, + ISEQ_FULL_FLUSH, + ISEQ_FINISH, + ISEQ_END, + ISEQ_ERROR, + } sequence; + + bool supported_actions[4]; + bool allow_buf_error; + size_t avail_in; +}; + + +struct lzma_filter_info_s { + /// Pointer to function used to initialize the filter. + /// This is NULL to indicate end of array. + lzma_init_function init; + + /// Pointer to filter's options structure + void *options; + + /// Uncompressed size of the filter, or LZMA_VLI_VALUE_UNKNOWN + /// if unknown. + lzma_vli uncompressed_size; +}; + + +/* +typedef struct { + lzma_init_function init; + uint32_t (*input_alignment)(lzma_vli id, const void *options); + uint32_t (*output_alignment)(lzma_vli id, const void *options); + bool changes_uncompressed_size; + bool supports_eopm; +} lzma_filter_hook; +*/ + + +/////////////// +// Functions // +/////////////// + +/// Allocates memory +extern void *lzma_alloc(size_t size, lzma_allocator *allocator) + lzma_attribute((malloc)); + +/// Frees memory +extern void lzma_free(void *ptr, lzma_allocator *allocator); + +/// Initializes lzma_stream FIXME desc +extern lzma_ret lzma_strm_init(lzma_stream *strm); + +/// +extern lzma_ret lzma_next_filter_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +/// +extern void lzma_next_coder_end(lzma_next_coder *next, + lzma_allocator *allocator); + + +extern lzma_ret lzma_filter_flags_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_options_filter *options); + +extern lzma_ret lzma_block_header_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_options_block *options); + +extern lzma_ret lzma_stream_encoder_single_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_options_stream *options); + +extern lzma_ret lzma_stream_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + lzma_extra **header, lzma_extra **footer); + + +/// \brief Wrapper for memcpy() +/// +/// This function copies as much data as possible from in[] to out[] and +/// updates *in_pos and *out_pos accordingly. +/// +static inline size_t +bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + const size_t in_avail = in_size - *in_pos; + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = MIN(in_avail, out_avail); + + memcpy(out + *out_pos, in + *in_pos, copy_size); + + *in_pos += copy_size; + *out_pos += copy_size; + + return copy_size; +} + + +/// \brief Initializing the next coder +/// +/// lzma_next_coder can point to different types of coders. The existing +/// coder may be different than what we are initializing now. In that case +/// we must git rid of the old coder first. Otherwise we reuse the existing +/// coder structure. +/// +#define lzma_next_coder_init2(next, allocator, cmpfunc, func, ...) \ +do { \ + if ((uintptr_t)(&cmpfunc) != (next)->init) \ + lzma_next_coder_end(next, allocator); \ + const lzma_ret ret = func(next, __VA_ARGS__); \ + if (ret == LZMA_OK) { \ + (next)->init = (uintptr_t)(&cmpfunc); \ + assert((next)->code != NULL); \ + assert((next)->end != NULL); \ + } else { \ + lzma_next_coder_end(next, allocator); \ + } \ + return ret; \ +} while (0) + +/// \brief Initializing lzma_next_coder +/// +/// Call the initialization function, which must take at least one +/// argument in addition to lzma_next_coder and lzma_allocator. +#define lzma_next_coder_init(func, next, allocator, ...) \ + lzma_next_coder_init2(next, allocator, \ + func, func, allocator, __VA_ARGS__) + + +/// \brief Initializing lzma_stream +/// +/// lzma_strm initialization with more detailed options. +#define lzma_next_strm_init2(strm, cmpfunc, func, ...) \ +do { \ + lzma_ret ret = lzma_strm_init(strm); \ + if (ret != LZMA_OK) \ + return ret; \ + if ((uintptr_t)(&cmpfunc) != (strm)->internal->next.init) \ + lzma_next_coder_end(\ + &(strm)->internal->next, (strm)->allocator); \ + ret = func(&(strm)->internal->next, __VA_ARGS__); \ + if (ret != LZMA_OK) { \ + lzma_end(strm); \ + return ret; \ + } \ + (strm)->internal->next.init = (uintptr_t)(&cmpfunc); \ + assert((strm)->internal->next.code != NULL); \ + assert((strm)->internal->next.end != NULL); \ +} while (0) + +/// \brief Initializing lzma_stream +/// +/// Call the initialization function, which must take at least one +/// argument in addition to lzma_next_coder and lzma_allocator. +#define lzma_next_strm_init(strm, func, ...) \ + lzma_next_strm_init2(strm, func, func, (strm)->allocator, __VA_ARGS__) + + +/// \brief Return if expression doesn't evaluate to LZMA_OK +/// +/// There are several situations where we want to return immediatelly +/// with the value of expr if it isn't LZMA_OK. This macro shortens +/// the code a bit. +/// +#define return_if_error(expr) \ +do { \ + const lzma_ret ret_ = expr; \ + if (ret_ != LZMA_OK) \ + return ret_; \ +} while (0) + +#endif diff --git a/src/liblzma/common/copy_coder.c b/src/liblzma/common/copy_coder.c new file mode 100644 index 00000000..64864f60 --- /dev/null +++ b/src/liblzma/common/copy_coder.c @@ -0,0 +1,143 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file copy_coder.c +/// \brief The Copy filter encoder and decoder +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "copy_coder.h" + + +struct lzma_coder_s { + lzma_next_coder next; + lzma_vli uncompressed_size; + bool is_encoder; +}; + + +static lzma_ret +copy_code(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // If we aren't the last filter in the chain, the Copy filter + // is totally useless. Note that it is job of the next coder to + // take care of Uncompressed Size, so we don't need to update our + // coder->uncompressed_size at all. + if (coder->next.code != NULL) + return coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + // If we get here, we are the last filter in the chain. + + const size_t in_avail = in_size - *in_pos; + + if (coder->is_encoder) { + // Check that we don't have too much input. + if ((lzma_vli)(in_avail) > coder->uncompressed_size) + return LZMA_DATA_ERROR; + + // Check that once LZMA_FINISH has been given, the + // amount of input matches uncompressed_size if it + // is known. + if (action == LZMA_FINISH && coder->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN + && coder->uncompressed_size + != (lzma_vli)(in_avail)) + return LZMA_DATA_ERROR; + + } else { + // Limit in_size so that we don't copy too much. + if ((lzma_vli)(in_avail) > coder->uncompressed_size) + in_size = *in_pos + (size_t)(coder->uncompressed_size); + } + + // Store the old input position, which is needed to update + // coder->uncompressed_size. + const size_t in_start = *in_pos; + + // We are the last coder in the chain. + // Just copy as much data as possible. + bufcpy(in, in_pos, in_size, out, out_pos, out_size); + + // Update uncompressed_size if it is known. + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + coder->uncompressed_size -= *in_pos - in_start; + + // action can be LZMA_FINISH only in the encoder. + if ((action == LZMA_FINISH && *in_pos == in_size) + || coder->uncompressed_size == 0) + return LZMA_STREAM_END; + + return LZMA_OK; +} + + +static void +copy_coder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +copy_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + // Allocate memory for the decoder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = ©_code; + next->end = ©_coder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Copy Uncompressed Size which is used to limit the output size. + next->coder->uncompressed_size = filters[0].uncompressed_size; + + // The coder acts slightly differently as encoder and decoder. + next->coder->is_encoder = is_encoder; + + // Initialize the next decoder in the chain, if any. + return lzma_next_filter_init( + &next->coder->next, allocator, filters + 1); +} + + +#ifdef HAVE_ENCODER +extern lzma_ret +lzma_copy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + lzma_next_coder_init(copy_coder_init, next, allocator, filters, true); +} +#endif + + +#ifdef HAVE_DECODER +extern lzma_ret +lzma_copy_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + lzma_next_coder_init(copy_coder_init, next, allocator, filters, false); +} +#endif diff --git a/src/liblzma/common/copy_coder.h b/src/liblzma/common/copy_coder.h new file mode 100644 index 00000000..b8d0295d --- /dev/null +++ b/src/liblzma/common/copy_coder.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file copy_coder.h +/// \brief The Copy filter encoder and decoder +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_COPY_CODER_H +#define LZMA_COPY_CODER_H + +#include "common.h" + +extern lzma_ret lzma_copy_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_copy_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/src/liblzma/common/delta_coder.c b/src/liblzma/common/delta_coder.c new file mode 100644 index 00000000..ec8c6d59 --- /dev/null +++ b/src/liblzma/common/delta_coder.c @@ -0,0 +1,210 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_coder.c +/// \brief Encoder and decoder for the Delta filter +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_coder.h" + + +struct lzma_coder_s { + /// Next coder in the chain + lzma_next_coder next; + + /// Uncompressed size - This is needed when we are the last + /// filter in the chain. + lzma_vli uncompressed_size; + + /// Delta distance + size_t distance; + + /// True if we are encoding; false if decoding + bool is_encoder; + + /// Position in history[] + uint8_t pos; + + /// Buffer to hold history of the original data + uint8_t history[LZMA_DELTA_DISTANCE_MAX]; +}; + + +static void +encode_buffer(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + const uint8_t tmp = coder->history[ + (distance + coder->pos) & 0xFF]; + coder->history[coder->pos--] = buffer[i]; + buffer[i] -= tmp; + } + + return; +} + + +static void +decode_buffer(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + buffer[i] += coder->history[(distance + coder->pos) & 0xFF]; + coder->history[coder->pos--] = buffer[i]; + } + + return; +} + + +static lzma_ret +delta_code(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + const size_t out_start = *out_pos; + size_t size; + lzma_ret ret; + + if (coder->next.code == NULL) { + const size_t in_avail = in_size - *in_pos; + + if (coder->is_encoder) { + // Check that we don't have too much input. + if ((lzma_vli)(in_avail) > coder->uncompressed_size) + return LZMA_DATA_ERROR; + + // Check that once LZMA_FINISH has been given, the + // amount of input matches uncompressed_size if it + // is known. + if (action == LZMA_FINISH && coder->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN + && coder->uncompressed_size + != (lzma_vli)(in_avail)) + return LZMA_DATA_ERROR; + + } else { + // Limit in_size so that we don't copy too much. + if ((lzma_vli)(in_avail) > coder->uncompressed_size) + in_size = *in_pos + (size_t)( + coder->uncompressed_size); + } + + size = bufcpy(in, in_pos, in_size, out, out_pos, out_size); + + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + coder->uncompressed_size -= size; + + // action can be LZMA_FINISH only in the encoder. + ret = (action == LZMA_FINISH && *in_pos == in_size) + || coder->uncompressed_size == 0 + ? LZMA_STREAM_END : LZMA_OK; + + } else { + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + if (ret != LZMA_OK && ret != LZMA_STREAM_END) + return ret; + + size = *out_pos - out_start; + } + + if (coder->is_encoder) + encode_buffer(coder, out + out_start, size); + else + decode_buffer(coder, out + out_start, size); + + return ret; +} + + +static void +delta_coder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +delta_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + // Allocate memory for the decoder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &delta_code; + next->end = &delta_coder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Copy Uncompressed Size which is used to limit the output size. + next->coder->uncompressed_size = filters[0].uncompressed_size; + + // The coder acts slightly differently as encoder and decoder. + next->coder->is_encoder = is_encoder; + + // Set the delta distance. + if (filters[0].options == NULL) + return LZMA_PROG_ERROR; + next->coder->distance = ((lzma_options_delta *)(filters[0].options)) + ->distance; + if (next->coder->distance < LZMA_DELTA_DISTANCE_MIN + || next->coder->distance > LZMA_DELTA_DISTANCE_MAX) + return LZMA_HEADER_ERROR; + + // Initialize the rest of the variables. + next->coder->pos = 0; + memzero(next->coder->history, LZMA_DELTA_DISTANCE_MAX); + + // Initialize the next decoder in the chain, if any. + { + const lzma_ret ret = lzma_next_filter_init(&next->coder->next, + allocator, filters + 1); + if (ret != LZMA_OK) + return ret; + } + + return LZMA_OK; +} + + +#ifdef HAVE_ENCODER +extern lzma_ret +lzma_delta_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return delta_coder_init(next, allocator, filters, true); +} +#endif + + +#ifdef HAVE_DECODER +extern lzma_ret +lzma_delta_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return delta_coder_init(next, allocator, filters, false); +} +#endif diff --git a/src/liblzma/common/delta_coder.h b/src/liblzma/common/delta_coder.h new file mode 100644 index 00000000..60cea95c --- /dev/null +++ b/src/liblzma/common/delta_coder.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_coder.h +/// \brief The Delta filter encoder and decoder +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_CODER_H +#define LZMA_DELTA_CODER_H + +#include "common.h" + +extern lzma_ret lzma_delta_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_delta_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/src/liblzma/common/extra.c b/src/liblzma/common/extra.c new file mode 100644 index 00000000..b743a439 --- /dev/null +++ b/src/liblzma/common/extra.c @@ -0,0 +1,33 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file extra.c +/// \brief Handling of Extra in Metadata +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API void +lzma_extra_free(lzma_extra *extra, lzma_allocator *allocator) +{ + while (extra != NULL) { + lzma_extra *tmp = extra->next; + lzma_free(extra, allocator); + extra = tmp; + } + + return; +} diff --git a/src/liblzma/common/features.c b/src/liblzma/common/features.c new file mode 100644 index 00000000..33b2e0a2 --- /dev/null +++ b/src/liblzma/common/features.c @@ -0,0 +1,70 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file features.c +/// \brief Information about features enabled at compile time +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +static const lzma_vli filters[] = { +#ifdef HAVE_FILTER_COPY + LZMA_FILTER_COPY, +#endif + +#ifdef HAVE_FILTER_SUBBLOCK + LZMA_FILTER_SUBBLOCK, +#endif + +#ifdef HAVE_FILTER_X86 + LZMA_FILTER_X86, +#endif + +#ifdef HAVE_FILTER_POWERPC + LZMA_FILTER_POWERPC, +#endif + +#ifdef HAVE_FILTER_IA64 + LZMA_FILTER_IA64, +#endif + +#ifdef HAVE_FILTER_ARM + LZMA_FILTER_ARM, +#endif + +#ifdef HAVE_FILTER_ARMTHUMB + LZMA_FILTER_ARMTHUMB, +#endif + +#ifdef HAVE_FILTER_SPARC + LZMA_FILTER_SPARC, +#endif + +#ifdef HAVE_FILTER_DELTA + LZMA_FILTER_DELTA, +#endif + +#ifdef HAVE_FILTER_LZMA + LZMA_FILTER_LZMA, +#endif + + LZMA_VLI_VALUE_UNKNOWN +}; + + +LZMA_API const lzma_vli *const lzma_available_filter_encoders = filters; + +LZMA_API const lzma_vli *const lzma_available_filter_decoders = filters; diff --git a/src/liblzma/common/filter_flags_decoder.c b/src/liblzma/common/filter_flags_decoder.c new file mode 100644 index 00000000..515f9346 --- /dev/null +++ b/src/liblzma/common/filter_flags_decoder.c @@ -0,0 +1,382 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_flags_decoder.c +/// \brief Decodes a Filter Flags field +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "lzma_decoder.h" + + +struct lzma_coder_s { + lzma_options_filter *options; + + enum { + SEQ_MISC, + SEQ_ID, + SEQ_SIZE, + SEQ_PROPERTIES, + } sequence; + + /// \brief Position in variable-length integers + size_t pos; + + /// \brief Size of Filter Properties + lzma_vli properties_size; +}; + + +#ifdef HAVE_FILTER_SUBBLOCK +static lzma_ret +properties_subblock(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *in lzma_attribute((unused)), + size_t *in_pos lzma_attribute((unused)), + size_t in_size lzma_attribute((unused))) +{ + if (coder->properties_size != 0) + return LZMA_HEADER_ERROR; + + coder->options->options = lzma_alloc( + sizeof(lzma_options_subblock), allocator); + if (coder->options->options == NULL) + return LZMA_MEM_ERROR; + + ((lzma_options_subblock *)(coder->options->options)) + ->allow_subfilters = true; + return LZMA_STREAM_END; +} +#endif + + +#ifdef HAVE_FILTER_SIMPLE +static lzma_ret +properties_simple(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + if (coder->properties_size == 0) + return LZMA_STREAM_END; + + if (coder->properties_size != 4) + return LZMA_HEADER_ERROR; + + lzma_options_simple *options = coder->options->options; + + if (options == NULL) { + options = lzma_alloc(sizeof(lzma_options_simple), allocator); + if (options == NULL) + return LZMA_MEM_ERROR; + + options->start_offset = 0; + coder->options->options = options; + } + + while (coder->pos < 4) { + if (*in_pos == in_size) + return LZMA_OK; + + options->start_offset + |= (uint32_t)(in[*in_pos]) << (8 * coder->pos); + ++*in_pos; + ++coder->pos; + } + + // Don't leave an options structure allocated if start_offset is zero. + if (options->start_offset == 0) { + lzma_free(options, allocator); + coder->options->options = NULL; + } + + return LZMA_STREAM_END; +} +#endif + + +#ifdef HAVE_FILTER_DELTA +static lzma_ret +properties_delta(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + if (coder->properties_size != 1) + return LZMA_HEADER_ERROR; + + if (*in_pos == in_size) + return LZMA_OK; + + lzma_options_delta *options = lzma_alloc( + sizeof(lzma_options_delta), allocator); + if (options == NULL) + return LZMA_MEM_ERROR; + + coder->options->options = options; + + options->distance = (uint32_t)(in[*in_pos]) + 1; + ++*in_pos; + + return LZMA_STREAM_END; +} +#endif + + +#ifdef HAVE_FILTER_LZMA +static lzma_ret +properties_lzma(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + // LZMA properties are always two bytes (at least for now). + if (coder->properties_size != 2) + return LZMA_HEADER_ERROR; + + assert(coder->pos < 2); + + while (*in_pos < in_size) { + switch (coder->pos) { + case 0: + // Allocate the options structure. + coder->options->options = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (coder->options->options == NULL) + return LZMA_MEM_ERROR; + + // Decode lc, lp, and pb. + if (lzma_lzma_decode_properties( + coder->options->options, in[*in_pos])) + return LZMA_HEADER_ERROR; + + ++*in_pos; + ++coder->pos; + break; + + case 1: { + lzma_options_lzma *options = coder->options->options; + + // Check that reserved bits are unset. + if (in[*in_pos] & 0xC0) + return LZMA_HEADER_ERROR; + + // Decode the dictionary size. See the file format + // specification section 4.3.4.2 to understand this. + if (in[*in_pos] == 0) { + options->dictionary_size = 1; + + } else if (in[*in_pos] > 59) { + // Dictionary size is over 1 GiB. + // It's not supported at the moment. + return LZMA_HEADER_ERROR; +# if LZMA_DICTIONARY_SIZE_MAX != UINT32_C(1) << 30 +# error Update the if()-condition a few lines +# error above to match LZMA_DICTIONARY_SIZE_MAX. +# endif + + } else { + options->dictionary_size + = 2 | ((in[*in_pos] + 1) & 1); + options->dictionary_size + <<= (in[*in_pos] - 1) / 2; + } + + ++*in_pos; + return LZMA_STREAM_END; + } + } + } + + assert(coder->pos < 2); + return LZMA_OK; +} +#endif + + +static lzma_ret +filter_flags_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + while (*in_pos < in_size || coder->sequence == SEQ_PROPERTIES) + switch (coder->sequence) { + case SEQ_MISC: + // Determine the Filter ID and Size of Filter Properties. + if (in[*in_pos] >= 0xE0) { + // Using External ID. Prepare the ID + // for variable-length integer parsing. + coder->options->id = 0; + + if (in[*in_pos] == 0xFF) { + // Mark that Size of Filter Properties is + // unknown, so we know later that there is + // external Size of Filter Properties present. + coder->properties_size + = LZMA_VLI_VALUE_UNKNOWN; + } else { + // Take Size of Filter Properties from Misc. + coder->properties_size = in[*in_pos] - 0xE0; + } + + coder->sequence = SEQ_ID; + + } else { + // The Filter ID is the same as Misc. + coder->options->id = in[*in_pos]; + + // The Size of Filter Properties can be calculated + // from Misc too. + coder->properties_size = in[*in_pos] / 0x20; + + coder->sequence = SEQ_PROPERTIES; + } + + ++*in_pos; + break; + + case SEQ_ID: { + const lzma_ret ret = lzma_vli_decode(&coder->options->id, + &coder->pos, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + return ret; + + if (coder->properties_size == LZMA_VLI_VALUE_UNKNOWN) { + // We have also external Size of Filter + // Properties. Prepare the size for + // variable-length integer parsing. + coder->properties_size = 0; + coder->sequence = SEQ_SIZE; + } else { + coder->sequence = SEQ_PROPERTIES; + } + + // Reset pos for its next job. + coder->pos = 0; + break; + } + + case SEQ_SIZE: { + const lzma_ret ret = lzma_vli_decode(&coder->properties_size, + &coder->pos, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + return ret; + + coder->pos = 0; + coder->sequence = SEQ_PROPERTIES; + break; + } + + case SEQ_PROPERTIES: { + lzma_ret (*get_properties)(lzma_coder *coder, + lzma_allocator *allocator, const uint8_t *in, + size_t *in_pos, size_t in_size); + + switch (coder->options->id) { +#ifdef HAVE_FILTER_COPY + case LZMA_FILTER_COPY: + return coder->properties_size > 0 + ? LZMA_HEADER_ERROR : LZMA_STREAM_END; +#endif +#ifdef HAVE_FILTER_SUBBLOCK + case LZMA_FILTER_SUBBLOCK: + get_properties = &properties_subblock; + break; +#endif +#ifdef HAVE_FILTER_SIMPLE +# ifdef HAVE_FILTER_X86 + case LZMA_FILTER_X86: +# endif +# ifdef HAVE_FILTER_POWERPC + case LZMA_FILTER_POWERPC: +# endif +# ifdef HAVE_FILTER_IA64 + case LZMA_FILTER_IA64: +# endif +# ifdef HAVE_FILTER_ARM + case LZMA_FILTER_ARM: +# endif +# ifdef HAVE_FILTER_ARMTHUMB + case LZMA_FILTER_ARMTHUMB: +# endif +# ifdef HAVE_FILTER_SPARC + case LZMA_FILTER_SPARC: +# endif + get_properties = &properties_simple; + break; +#endif +#ifdef HAVE_FILTER_DELTA + case LZMA_FILTER_DELTA: + get_properties = &properties_delta; + break; +#endif +#ifdef HAVE_FILTER_LZMA + case LZMA_FILTER_LZMA: + get_properties = &properties_lzma; + break; +#endif + default: + return LZMA_HEADER_ERROR; + } + + return get_properties(coder, allocator, in, in_pos, in_size); + } + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +filter_flags_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_filter_flags_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_options_filter *options) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &filter_flags_decode; + next->end = &filter_flags_decoder_end; + } + + options->id = 0; + options->options = NULL; + + next->coder->options = options; + next->coder->sequence = SEQ_MISC; + next->coder->pos = 0; + next->coder->properties_size = 0; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_filter_flags_decoder(lzma_stream *strm, lzma_options_filter *options) +{ + lzma_next_strm_init(strm, lzma_filter_flags_decoder_init, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/filter_flags_encoder.c b/src/liblzma/common/filter_flags_encoder.c new file mode 100644 index 00000000..d8f260a1 --- /dev/null +++ b/src/liblzma/common/filter_flags_encoder.c @@ -0,0 +1,359 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_flags_encoder.c +/// \brief Decodes a Filter Flags field +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "lzma_encoder.h" + + +/// \brief Calculates the size of the Filter Properties field +/// +/// This currently can return only LZMA_OK or LZMA_HEADER_ERROR, but +/// with some new filters it may return also LZMA_PROG_ERROR. +static lzma_ret +get_properties_size(uint32_t *size, const lzma_options_filter *options) +{ + lzma_ret ret = LZMA_OK; + + switch (options->id) { +#ifdef HAVE_FILTER_COPY + case LZMA_FILTER_COPY: + *size = 0; + break; +#endif + +#ifdef HAVE_FILTER_SUBBLOCK + case LZMA_FILTER_SUBBLOCK: + *size = 0; + break; +#endif + +#ifdef HAVE_FILTER_SIMPLE +# ifdef HAVE_FILTER_X86 + case LZMA_FILTER_X86: +# endif +# ifdef HAVE_FILTER_POWERPC + case LZMA_FILTER_POWERPC: +# endif +# ifdef HAVE_FILTER_IA64 + case LZMA_FILTER_IA64: +# endif +# ifdef HAVE_FILTER_ARM + case LZMA_FILTER_ARM: +# endif +# ifdef HAVE_FILTER_ARMTHUMB + case LZMA_FILTER_ARMTHUMB: +# endif +# ifdef HAVE_FILTER_SPARC + case LZMA_FILTER_SPARC: +# endif + if (options->options == NULL || ((const lzma_options_simple *)( + options->options))->start_offset == 0) + *size = 0; + else + *size = 4; + break; +#endif + +#ifdef HAVE_FILTER_DELTA + case LZMA_FILTER_DELTA: + *size = 1; + break; +#endif + +#ifdef HAVE_FILTER_LZMA + case LZMA_FILTER_LZMA: + *size = 2; + break; +#endif + + default: + // Unknown filter - if the Filter ID is a proper VLI, + // return LZMA_HEADER_ERROR instead of LZMA_PROG_ERROR, + // because it's possible that we just don't have support + // compiled in for the requested filter. + ret = options->id <= LZMA_VLI_VALUE_MAX + ? LZMA_HEADER_ERROR : LZMA_PROG_ERROR; + break; + } + + return ret; +} + + +extern LZMA_API lzma_ret +lzma_filter_flags_size(uint32_t *size, const lzma_options_filter *options) +{ + // Get size of Filter Properties. + uint32_t prop_size; + const lzma_ret ret = get_properties_size(&prop_size, options); + if (ret != LZMA_OK) + return ret; + + // Size of Filter ID field if it exists. + size_t id_size; + size_t prop_size_size; + if (options->id < 0xE0 + && (lzma_vli)(prop_size) == options->id / 0x20) { + // ID and Size of Filter Properties fit into Misc. + id_size = 0; + prop_size_size = 0; + + } else { + // At least Filter ID is stored using the External ID field. + id_size = lzma_vli_size(options->id); + if (id_size == 0) + return LZMA_PROG_ERROR; + + if (prop_size <= 30) { + // Size of Filter Properties fits into Misc still. + prop_size_size = 0; + } else { + // The Size of Filter Properties field is used too. + prop_size_size = lzma_vli_size(prop_size); + if (prop_size_size == 0) + return LZMA_PROG_ERROR; + } + } + + // 1 is for the Misc field. + *size = 1 + id_size + prop_size_size + prop_size; + + return LZMA_OK; +} + + +#ifdef HAVE_FILTER_SIMPLE +/// Encodes Filter Properties of the so called simple filters +static lzma_ret +properties_simple(uint8_t *out, size_t *out_pos, size_t out_size, + const lzma_options_simple *options) +{ + if (options == NULL || options->start_offset == 0) + return LZMA_OK; + + if (out_size - *out_pos < 4) + return LZMA_BUF_ERROR; + + for (size_t i = 0; i < 4; ++i) + out[(*out_pos)++] = options->start_offset >> (i * 8); + + return LZMA_OK; +} +#endif + + +#ifdef HAVE_FILTER_DELTA +/// Encodes Filter Properties of the Delta filter +static lzma_ret +properties_delta(uint8_t *out, size_t *out_pos, size_t out_size, + const lzma_options_delta *options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + // It's possible that newer liblzma versions will support larger + // distance values. + if (options->distance < LZMA_DELTA_DISTANCE_MIN + || options->distance > LZMA_DELTA_DISTANCE_MAX) + return LZMA_HEADER_ERROR; + + if (out_size - *out_pos < 1) + return LZMA_BUF_ERROR; + + out[*out_pos] = options->distance - LZMA_DELTA_DISTANCE_MIN; + ++*out_pos; + + return LZMA_OK; +} +#endif + + +#ifdef HAVE_FILTER_LZMA +/// Encodes LZMA Properties and Dictionary Flags (two bytes) +static lzma_ret +properties_lzma(uint8_t *out, size_t *out_pos, size_t out_size, + const lzma_options_lzma *options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + if (out_size - *out_pos < 2) + return LZMA_BUF_ERROR; + + // LZMA Properties + if (lzma_lzma_encode_properties(options, out + *out_pos)) + return LZMA_HEADER_ERROR; + + ++*out_pos; + + // Dictionary flags + // + // Dictionary size is encoded using six bits of + // which one is mantissa and five are exponent. + // + // There are some limits that must hold to keep + // this coding working. +# if LZMA_DICTIONARY_SIZE_MAX > UINT32_MAX / 2 +# error LZMA_DICTIONARY_SIZE_MAX is too big. +# endif +# if LZMA_DICTIONARY_SIZE_MIN < 1 +# error LZMA_DICTIONARY_SIZE_MIN cannot be zero. +# endif + + // Validate it: + if (options->dictionary_size < LZMA_DICTIONARY_SIZE_MIN + || options->dictionary_size > LZMA_DICTIONARY_SIZE_MAX) + return LZMA_HEADER_ERROR; + + if (options->dictionary_size == 1) { + // Special case + out[*out_pos] = 0x00; + } else { + // TODO This could be more elegant. + uint32_t i = 1; + while (((2 | ((i + 1) & 1)) << ((i - 1) / 2)) + < options->dictionary_size) + ++i; + out[*out_pos] = i; + } + + ++*out_pos; + + return LZMA_OK; +} +#endif + + +extern LZMA_API lzma_ret +lzma_filter_flags_encode(uint8_t *out, size_t *out_pos, size_t out_size, + const lzma_options_filter *options) +{ + // Minimum output is one byte (everything fits into Misc). + // The caller should have checked that there is enough output space, + // so we return LZMA_PROG_ERROR instead of LZMA_BUF_ERROR. + if (*out_pos >= out_size) + return LZMA_PROG_ERROR; + + // Get size of Filter Properties. + uint32_t prop_size; + lzma_ret ret = get_properties_size(&prop_size, options); + if (ret != LZMA_OK) + return ret; + + // Misc, External ID, and Size of Properties + if (options->id < 0xE0 + && (lzma_vli)(prop_size) == options->id / 0x20) { + // ID and Size of Filter Properties fit into Misc. + out[*out_pos] = options->id; + ++*out_pos; + + } else if (prop_size <= 30) { + // Size of Filter Properties fits into Misc. + out[*out_pos] = prop_size + 0xE0; + ++*out_pos; + + // External ID is used to encode the Filter ID. If encoding + // the VLI fails, it's because the caller has given as too + // little output space, which it should have checked already. + // So return LZMA_PROG_ERROR, not LZMA_BUF_ERROR. + size_t dummy = 0; + if (lzma_vli_encode(options->id, &dummy, 1, + out, out_pos, out_size) != LZMA_STREAM_END) + return LZMA_PROG_ERROR; + + } else { + // Nothing fits into Misc. + out[*out_pos] = 0xFF; + ++*out_pos; + + // External ID is used to encode the Filter ID. + size_t dummy = 0; + if (lzma_vli_encode(options->id, &dummy, 1, + out, out_pos, out_size) != LZMA_STREAM_END) + return LZMA_PROG_ERROR; + + // External Size of Filter Properties + dummy = 0; + if (lzma_vli_encode(prop_size, &dummy, 1, + out, out_pos, out_size) != LZMA_STREAM_END) + return LZMA_PROG_ERROR; + } + + // Filter Properties + switch (options->id) { +#ifdef HAVE_FILTER_COPY + case LZMA_FILTER_COPY: + assert(prop_size == 0); + ret = options->options == NULL ? LZMA_OK : LZMA_HEADER_ERROR; + break; +#endif + +#ifdef HAVE_FILTER_SUBBLOCK + case LZMA_FILTER_SUBBLOCK: + assert(prop_size == 0); + ret = LZMA_OK; + break; +#endif + +#ifdef HAVE_FILTER_SIMPLE +# ifdef HAVE_FILTER_X86 + case LZMA_FILTER_X86: +# endif +# ifdef HAVE_FILTER_POWERPC + case LZMA_FILTER_POWERPC: +# endif +# ifdef HAVE_FILTER_IA64 + case LZMA_FILTER_IA64: +# endif +# ifdef HAVE_FILTER_ARM + case LZMA_FILTER_ARM: +# endif +# ifdef HAVE_FILTER_ARMTHUMB + case LZMA_FILTER_ARMTHUMB: +# endif +# ifdef HAVE_FILTER_SPARC + case LZMA_FILTER_SPARC: +# endif + ret = properties_simple(out, out_pos, out_size, + options->options); + break; +#endif + +#ifdef HAVE_FILTER_DELTA + case LZMA_FILTER_DELTA: + ret = properties_delta(out, out_pos, out_size, + options->options); + break; +#endif + +#ifdef HAVE_FILTER_LZMA + case LZMA_FILTER_LZMA: + ret = properties_lzma(out, out_pos, out_size, + options->options); + break; +#endif + + default: + assert(0); + ret = LZMA_PROG_ERROR; + break; + } + + return ret; +} diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c new file mode 100644 index 00000000..6816b37a --- /dev/null +++ b/src/liblzma/common/index.c @@ -0,0 +1,140 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.c +/// \brief Handling of Index in Metadata +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +/** + * \brief Duplicates an Index list + * + * \return A copy of the Index list, or NULL if memory allocation + * failed or the original Index was empty. + */ +extern LZMA_API lzma_index * +lzma_index_dup(const lzma_index *old_current, lzma_allocator *allocator) +{ + lzma_index *new_head = NULL; + lzma_index *new_current = NULL; + + while (old_current != NULL) { + lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); + if (i == NULL) { + lzma_index_free(new_head, allocator); + return NULL; + } + + i->total_size = old_current->total_size; + i->uncompressed_size = old_current->uncompressed_size; + i->next = NULL; + + if (new_head == NULL) + new_head = i; + else + new_current->next = i; + + new_current = i; + old_current = old_current->next; + } + + return new_head; +} + + +/** + * \brief Frees an Index list + * + * All Index Recors in the list are freed. This function is convenient when + * getting rid of lzma_metadata structures containing an Index. + */ +extern LZMA_API void +lzma_index_free(lzma_index *i, lzma_allocator *allocator) +{ + while (i != NULL) { + lzma_index *tmp = i->next; + lzma_free(i, allocator); + i = tmp; + } + + return; +} + + +/** + * \brief Calculates properties of an Index list + * + * + */ +extern LZMA_API lzma_ret +lzma_index_count(const lzma_index *i, size_t *count, + lzma_vli *lzma_restrict total_size, + lzma_vli *lzma_restrict uncompressed_size) +{ + *count = 0; + *total_size = 0; + *uncompressed_size = 0; + + while (i != NULL) { + if (i->total_size == LZMA_VLI_VALUE_UNKNOWN) { + *total_size = LZMA_VLI_VALUE_UNKNOWN; + } else if (i->total_size > LZMA_VLI_VALUE_MAX) { + return LZMA_PROG_ERROR; + } else if (*total_size != LZMA_VLI_VALUE_UNKNOWN) { + *total_size += i->total_size; + if (*total_size > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + } + + if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { + *uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + } else if (i->uncompressed_size > LZMA_VLI_VALUE_MAX) { + return LZMA_PROG_ERROR; + } else if (*uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + *uncompressed_size += i->uncompressed_size; + if (*uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + } + + ++*count; + i = i->next; + } + + // FIXME ? + if (*total_size == LZMA_VLI_VALUE_UNKNOWN + || *uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_HEADER_ERROR; + + return LZMA_OK; +} + + + +extern LZMA_API lzma_bool +lzma_index_is_equal(const lzma_index *a, const lzma_index *b) +{ + while (a != NULL && b != NULL) { + if (a->total_size != b->total_size || a->uncompressed_size + != b->uncompressed_size) + return false; + + a = a->next; + b = b->next; + } + + return a == b; +} diff --git a/src/liblzma/common/info.c b/src/liblzma/common/info.c new file mode 100644 index 00000000..2a59a029 --- /dev/null +++ b/src/liblzma/common/info.c @@ -0,0 +1,823 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file info.c +/// \brief Collects and verifies integrity of Stream size information +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +struct lzma_info_s { + struct { + /// Known Size of Header Metadata Block; here's some + /// special things: + /// - LZMA_VLI_VALUE_UNKNOWN indicates that we don't know + /// if Header Metadata Block is present. + /// - 0 indicates that Header Metadata Block is not present. + lzma_vli header_metadata_size; + + /// Known Total Size of the Data Blocks in the Stream + lzma_vli total_size; + + /// Known Uncompressed Size of the Data Blocks in the Stream + lzma_vli uncompressed_size; + + /// Known Size of Footer Metadata Block + lzma_vli footer_metadata_size; + } known; + + struct { + /// Sum of Total Size fields stored to the Index so far + lzma_vli total_size; + + /// Sum of Uncompressed Size fields stored to the Index so far + lzma_vli uncompressed_size; + + /// First Index Record in the list, or NULL if Index is empty. + lzma_index *head; + + /// Number of Index Records + size_t record_count; + + /// Number of Index Records + size_t incomplete_count; + + /// True when we know that no more Records will get added + /// to the Index. + bool is_final; + } index; + + /// Start offset of the Stream. This is needed to calculate + /// lzma_info_iter.stream_offset. + lzma_vli stream_start_offset; + + /// True if Index is present in Header Metadata Block + bool has_index_in_header_metadata; +}; + + +////////////////////// +// Create/Reset/End // +////////////////////// + +static void +index_init(lzma_info *info) +{ + info->index.total_size = 0; + info->index.uncompressed_size = 0; + info->index.head = NULL; + info->index.record_count = 0; + info->index.incomplete_count = 0; + info->index.is_final = false; + return; +} + + +static void +info_init(lzma_info *info) +{ + info->known.header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; + info->known.total_size = LZMA_VLI_VALUE_UNKNOWN; + info->known.uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + info->known.footer_metadata_size = LZMA_VLI_VALUE_UNKNOWN; + info->stream_start_offset = 0; + info->has_index_in_header_metadata = false; + + index_init(info); + + return; +} + + +extern LZMA_API lzma_info * +lzma_info_init(lzma_info *info, lzma_allocator *allocator) +{ + if (info == NULL) + info = lzma_alloc(sizeof(lzma_info), allocator); + else + lzma_index_free(info->index.head, allocator); + + if (info != NULL) + info_init(info); + + return info; +} + + +extern LZMA_API void +lzma_info_free(lzma_info *info, lzma_allocator *allocator) +{ + lzma_index_free(info->index.head, allocator); + lzma_free(info, allocator); + return; +} + + +///////// +// Set // +///////// + +static lzma_ret +set_size(lzma_vli new_size, lzma_vli *known_size, lzma_vli index_size, + bool forbid_zero) +{ + assert(new_size <= LZMA_VLI_VALUE_MAX); + + lzma_ret ret = LZMA_OK; + + if (forbid_zero && new_size == 0) + ret = LZMA_PROG_ERROR; + else if (index_size > new_size) + ret = LZMA_DATA_ERROR; + else if (*known_size == LZMA_VLI_VALUE_UNKNOWN) + *known_size = new_size; + else if (*known_size != new_size) + ret = LZMA_DATA_ERROR; + + return ret; +} + + +extern LZMA_API lzma_ret +lzma_info_size_set(lzma_info *info, lzma_info_size type, lzma_vli size) +{ + if (size > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + switch (type) { + case LZMA_INFO_STREAM_START: + info->stream_start_offset = size; + return LZMA_OK; + + case LZMA_INFO_HEADER_METADATA: + return set_size(size, &info->known.header_metadata_size, + 0, false); + + case LZMA_INFO_TOTAL: + return set_size(size, &info->known.total_size, + info->index.total_size, true); + + case LZMA_INFO_UNCOMPRESSED: + return set_size(size, &info->known.uncompressed_size, + info->index.uncompressed_size, false); + + case LZMA_INFO_FOOTER_METADATA: + return set_size(size, &info->known.footer_metadata_size, + 0, true); + } + + return LZMA_PROG_ERROR; +} + + +extern LZMA_API lzma_ret +lzma_info_index_set(lzma_info *info, lzma_allocator *allocator, + lzma_index *i_new, lzma_bool eat_index) +{ + if (i_new == NULL) + return LZMA_PROG_ERROR; + + lzma_index *i_old = info->index.head; + + if (i_old != NULL) { + while (true) { + // If the new Index has fewer Records than the old one, + // the new Index cannot be valid. + if (i_new == NULL) + return LZMA_DATA_ERROR; + + // The new Index must be complete i.e. no unknown + // values. + if (i_new->total_size > LZMA_VLI_VALUE_MAX + || i_new->uncompressed_size + > LZMA_VLI_VALUE_MAX) { + if (eat_index) + lzma_index_free(i_new, allocator); + + return LZMA_PROG_ERROR; + } + + // Compare the values from the new Index with the old + // Index. The old Index may be incomplete; in that + // case we + // - use the value from the new Index as is; + // - update the appropriate info->index.foo_size; and + // - decrease the count of incomplete Index Records. + bool was_incomplete = false; + + if (i_old->total_size == LZMA_VLI_VALUE_UNKNOWN) { + assert(!info->index.is_final); + was_incomplete = true; + + i_old->total_size = i_new->total_size; + + if (lzma_vli_add(info->index.total_size, + i_new->total_size)) { + if (eat_index) + lzma_index_free(i_new, + allocator); + + return LZMA_PROG_ERROR; + } + } else if (i_old->total_size != i_new->total_size) { + if (eat_index) + lzma_index_free(i_new, allocator); + + return LZMA_DATA_ERROR; + } + + if (i_old->uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN) { + assert(!info->index.is_final); + was_incomplete = true; + + i_old->uncompressed_size + = i_new->uncompressed_size; + + if (lzma_vli_add(info->index.uncompressed_size, + i_new->uncompressed_size)) { + if (eat_index) + lzma_index_free(i_new, + allocator); + + return LZMA_PROG_ERROR; + } + } else if (i_old->uncompressed_size + != i_new->uncompressed_size) { + if (eat_index) + lzma_index_free(i_new, allocator); + + return LZMA_DATA_ERROR; + } + + if (was_incomplete) { + assert(!info->index.is_final); + assert(info->index.incomplete_count > 0); + --info->index.incomplete_count; + } + + // Get rid of *i_new. It's now identical with *i_old. + lzma_index *tmp = i_new->next; + if (eat_index) + lzma_free(i_new, allocator); + + i_new = tmp; + + // We want to leave i_old pointing to the last + // Index Record in the old Index. This way we can + // concatenate the possible new Records from i_new. + if (i_old->next == NULL) + break; + + i_old = i_old->next; + } + } + + assert(info->index.incomplete_count == 0); + + // If Index was already known to be final, i_new must be NULL now. + // The new Index cannot contain more Records that we already have. + if (info->index.is_final) { + assert(info->index.head != NULL); + + if (i_new != NULL) { + if (eat_index) + lzma_index_free(i_new, allocator); + + return LZMA_DATA_ERROR; + } + + return LZMA_OK; + } + + // The rest of the new Index is merged to the old Index. Keep the + // current i_new pointer in available. We need it when merging the + // new Index with the old one, and if an error occurs so we can + // get rid of the broken part of the new Index. + lzma_index *i_start = i_new; + while (i_new != NULL) { + // The new Index must be complete i.e. no unknown values. + if (i_new->total_size > LZMA_VLI_VALUE_MAX + || i_new->uncompressed_size + > LZMA_VLI_VALUE_MAX) { + if (eat_index) + lzma_index_free(i_start, allocator); + + return LZMA_PROG_ERROR; + } + + // Update info->index.foo_sizes. + if (lzma_vli_add(info->index.total_size, i_new->total_size) + || lzma_vli_add(info->index.uncompressed_size, + i_new->uncompressed_size)) { + if (eat_index) + lzma_index_free(i_start, allocator); + + return LZMA_PROG_ERROR; + } + + ++info->index.record_count; + i_new = i_new->next; + } + + // All the Records in the new Index are good, and info->index.foo_sizes + // were successfully updated. + if (lzma_info_index_finish(info) != LZMA_OK) { + if (eat_index) + lzma_index_free(i_start, allocator); + + return LZMA_DATA_ERROR; + } + + // The Index is ready to be merged. If we aren't supposed to eat + // the Index, make a copy of it first. + if (!eat_index && i_start != NULL) { + i_start = lzma_index_dup(i_start, allocator); + if (i_start == NULL) + return LZMA_MEM_ERROR; + } + + // Concatenate the new Index with the old one. Note that it is + // possible that we don't have any old Index. + if (info->index.head == NULL) + info->index.head = i_start; + else + i_old->next = i_start; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_info_metadata_set(lzma_info *info, lzma_allocator *allocator, + lzma_metadata *metadata, lzma_bool is_header_metadata, + lzma_bool eat_index) +{ + // Validate *metadata. + if (!lzma_vli_is_valid(metadata->header_metadata_size) + || !lzma_vli_is_valid(metadata->total_size) + || !lzma_vli_is_valid(metadata->uncompressed_size)) { + if (eat_index) { + lzma_index_free(metadata->index, allocator); + metadata->index = NULL; + } + + return LZMA_PROG_ERROR; + } + + // Index + if (metadata->index != NULL) { + if (is_header_metadata) + info->has_index_in_header_metadata = true; + + const lzma_ret ret = lzma_info_index_set( + info, allocator, metadata->index, eat_index); + if (ret != LZMA_OK) + return ret; + + } else if (!is_header_metadata + && (metadata->total_size == LZMA_VLI_VALUE_UNKNOWN + || !info->has_index_in_header_metadata)) { + // Either Total Size or Index must be present in Footer + // Metadata Block. If Index is not present, it must have + // already been in the Header Metadata Block. Since we + // got here, these conditions weren't met. + return LZMA_DATA_ERROR; + } + + // Size of Header Metadata + if (!is_header_metadata) { + // If it is marked unknown in Metadata, it means that + // it's not present. + const lzma_vli size = metadata->header_metadata_size + != LZMA_VLI_VALUE_UNKNOWN + ? metadata->header_metadata_size : 0; + const lzma_ret ret = lzma_info_size_set( + info, LZMA_INFO_HEADER_METADATA, size); + if (ret != LZMA_OK) + return ret; + } + + // Total Size + if (metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) { + const lzma_ret ret = lzma_info_size_set(info, + LZMA_INFO_TOTAL, metadata->total_size); + if (ret != LZMA_OK) + return ret; + } + + // Uncompressed Size + if (metadata->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + const lzma_ret ret = lzma_info_size_set(info, + LZMA_INFO_UNCOMPRESSED, + metadata->uncompressed_size); + if (ret != LZMA_OK) + return ret; + } + + return LZMA_OK; +} + + +///////// +// Get // +///////// + +extern LZMA_API lzma_vli +lzma_info_size_get(const lzma_info *info, lzma_info_size type) +{ + switch (type) { + case LZMA_INFO_STREAM_START: + return info->stream_start_offset; + + case LZMA_INFO_HEADER_METADATA: + return info->known.header_metadata_size; + + case LZMA_INFO_TOTAL: + return info->known.total_size; + + case LZMA_INFO_UNCOMPRESSED: + return info->known.uncompressed_size; + + case LZMA_INFO_FOOTER_METADATA: + return info->known.footer_metadata_size; + } + + return LZMA_VLI_VALUE_UNKNOWN; +} + + +extern LZMA_API lzma_index * +lzma_info_index_get(lzma_info *info, lzma_bool detach) +{ + lzma_index *i = info->index.head; + + if (detach) + index_init(info); + + return i; +} + + +extern LZMA_API size_t +lzma_info_index_count_get(const lzma_info *info) +{ + return info->index.record_count; +} + + +///////////////// +// Incremental // +///////////////// + +enum { + ITER_INFO, + ITER_INDEX, + ITER_RESERVED_1, + ITER_RESERVED_2, +}; + + +#define iter_info ((lzma_info *)(iter->internal[ITER_INFO])) + +#define iter_index ((lzma_index *)(iter->internal[ITER_INDEX])) + + +extern LZMA_API void +lzma_info_iter_begin(lzma_info *info, lzma_info_iter *iter) +{ + *iter = (lzma_info_iter){ + .total_size = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, + .stream_offset = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_offset = LZMA_VLI_VALUE_UNKNOWN, + .internal = { info, NULL, NULL, NULL }, + }; + + return; +} + + +extern LZMA_API lzma_ret +lzma_info_iter_next(lzma_info_iter *iter, lzma_allocator *allocator) +{ + // FIXME debug remove + lzma_info *info = iter_info; + (void)info; + + if (iter_index == NULL) { + // The first call after lzma_info_iter_begin(). + if (iter_info->known.header_metadata_size + == LZMA_VLI_VALUE_UNKNOWN) + iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; + else if (lzma_vli_sum3(iter->stream_offset, + iter_info->stream_start_offset, + LZMA_STREAM_HEADER_SIZE, + iter_info->known.header_metadata_size)) + return LZMA_PROG_ERROR; + + iter->uncompressed_offset = 0; + + if (iter_info->index.head != NULL) { + // The first Index Record has already been allocated. + iter->internal[ITER_INDEX] = iter_info->index.head; + iter->total_size = iter_index->total_size; + iter->uncompressed_size + = iter_index->uncompressed_size; + return LZMA_OK; + } + } else { + // Update iter->*_offsets. + if (iter->stream_offset != LZMA_VLI_VALUE_UNKNOWN) { + if (iter_index->total_size == LZMA_VLI_VALUE_UNKNOWN) + iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; + else if (lzma_vli_add(iter->stream_offset, + iter_index->total_size)) + return LZMA_DATA_ERROR; + } + + if (iter->uncompressed_offset != LZMA_VLI_VALUE_UNKNOWN) { + if (iter_index->uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN) + iter->uncompressed_offset + = LZMA_VLI_VALUE_UNKNOWN; + else if (lzma_vli_add(iter->uncompressed_offset, + iter_index->uncompressed_size)) + return LZMA_DATA_ERROR; + } + + if (iter_index->next != NULL) { + // The next Record has already been allocated. + iter->internal[ITER_INDEX] = iter_index->next; + iter->total_size = iter_index->total_size; + iter->uncompressed_size + = iter_index->uncompressed_size; + return LZMA_OK; + } + } + + // Don't add new Records to a final Index. + if (iter_info->index.is_final) + return LZMA_DATA_ERROR; + + // Allocate and initialize a new Index Record. + lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); + if (i == NULL) + return LZMA_MEM_ERROR; + + i->total_size = LZMA_VLI_VALUE_UNKNOWN; + i->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + i->next = NULL; + + iter->total_size = LZMA_VLI_VALUE_UNKNOWN; + iter->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + + // Decide where to put the new Index Record. + if (iter_info->index.head == NULL) + iter_info->index.head = i; + + if (iter_index != NULL) + iter_index->next = i; + + iter->internal[ITER_INDEX] = i; + + ++iter_info->index.record_count; + ++iter_info->index.incomplete_count; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_info_iter_set(lzma_info_iter *iter, + lzma_vli total_size, lzma_vli uncompressed_size) +{ + // FIXME debug remove + lzma_info *info = iter_info; + (void)info; + + if (iter_index == NULL || !lzma_vli_is_valid(total_size) + || !lzma_vli_is_valid(uncompressed_size)) + return LZMA_PROG_ERROR; + + const bool was_incomplete = iter_index->total_size + == LZMA_VLI_VALUE_UNKNOWN + || iter_index->uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN; + + if (total_size != LZMA_VLI_VALUE_UNKNOWN) { + if (iter_index->total_size == LZMA_VLI_VALUE_UNKNOWN) { + iter_index->total_size = total_size; + + if (lzma_vli_add(iter_info->index.total_size, + total_size) + || iter_info->index.total_size + > iter_info->known.total_size) + return LZMA_DATA_ERROR; + + } else if (iter_index->total_size != total_size) { + return LZMA_DATA_ERROR; + } + } + + if (uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + if (iter_index->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { + iter_index->uncompressed_size = uncompressed_size; + + if (lzma_vli_add(iter_info->index.uncompressed_size, + uncompressed_size) + || iter_info->index.uncompressed_size + > iter_info->known.uncompressed_size) + return LZMA_DATA_ERROR; + + } else if (iter_index->uncompressed_size + != uncompressed_size) { + return LZMA_DATA_ERROR; + } + } + + // Check if the new information we got managed to finish this + // Index Record. If so, update the count of incomplete Index Records. + if (was_incomplete && iter_index->total_size + != LZMA_VLI_VALUE_UNKNOWN + && iter_index->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN) { + assert(iter_info->index.incomplete_count > 0); + --iter_info->index.incomplete_count; + } + + // Make sure that the known sizes are now available in *iter. + iter->total_size = iter_index->total_size; + iter->uncompressed_size = iter_index->uncompressed_size; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_info_index_finish(lzma_info *info) +{ + if (info->index.record_count == 0 || info->index.incomplete_count > 0 + || lzma_info_size_set(info, LZMA_INFO_TOTAL, + info->index.total_size) + || lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, + info->index.uncompressed_size)) + return LZMA_DATA_ERROR; + + info->index.is_final = true; + + return LZMA_OK; +} + + +////////////// +// Locating // +////////////// + +extern LZMA_API lzma_vli +lzma_info_metadata_locate(const lzma_info *info, lzma_bool is_header_metadata) +{ + bool error = false; + lzma_vli size = 0; + + if (info->known.header_metadata_size == LZMA_VLI_VALUE_UNKNOWN) { + // We don't know if Header Metadata Block is present, thus + // we cannot locate it either. + // + // Well, you could say that just assume that it is present. + // I'm not sure if this is useful. But it can be useful to + // be able to use this function and get LZMA_VLI_VALUE_UNKNOWN + // to detect that Header Metadata Block wasn't present. + error = true; + } else if (is_header_metadata) { + error = lzma_vli_sum(size, info->stream_start_offset, + LZMA_STREAM_HEADER_SIZE); + } else if (!info->index.is_final) { + // Since we don't know if we have all the Index Records yet, + // we cannot know where the Footer Metadata Block is. + error = true; + } else { + error = lzma_vli_sum4(size, info->stream_start_offset, + LZMA_STREAM_HEADER_SIZE, + info->known.header_metadata_size, + info->known.total_size); + } + + return error ? LZMA_VLI_VALUE_UNKNOWN : size; +} + + +extern LZMA_API uint32_t +lzma_info_metadata_alignment_get( + const lzma_info *info, lzma_bool is_header_metadata) +{ + uint32_t alignment; + + if (is_header_metadata) { + alignment = info->stream_start_offset + + LZMA_STREAM_HEADER_SIZE; + } else { + alignment = info->stream_start_offset + LZMA_STREAM_HEADER_SIZE + + info->known.header_metadata_size + + info->known.total_size; + } + + return alignment; +} + + +extern LZMA_API lzma_ret +lzma_info_iter_locate(lzma_info_iter *iter, lzma_allocator *allocator, + lzma_vli uncompressed_offset, lzma_bool allow_alloc) +{ + if (iter == NULL || uncompressed_offset > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + // Quick check in case Index is final. + if (iter_info->index.is_final) { + assert(iter_info->known.uncompressed_size + == iter_info->index.uncompressed_size); + if (uncompressed_offset >= iter_info->index.uncompressed_size) + return LZMA_DATA_ERROR; + } + + // TODO: Optimize so that it uses existing info from *iter when + // seeking forward. + + // Initialize *iter + if (iter_info->known.header_metadata_size != LZMA_VLI_VALUE_UNKNOWN) { + if (lzma_vli_sum3(iter->stream_offset, + iter_info->stream_start_offset, + LZMA_STREAM_HEADER_SIZE, + iter_info->known.header_metadata_size)) + return LZMA_PROG_ERROR; + } else { + // We don't know the Size of Header Metadata Block, thus + // we cannot calculate the Stream offset either. + iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; + } + + iter->uncompressed_offset = 0; + + // If we have no Index Records, it's obvious that we need to + // add a new one. + if (iter_info->index.head == NULL) { + assert(!iter_info->index.is_final); + if (!allow_alloc) + return LZMA_DATA_ERROR; + + return lzma_info_iter_next(iter, allocator); + } + + // Locate an appropriate Index Record. + lzma_index *i = iter_info->index.head; + while (true) { + // - If Uncompressed Size in the Record is unknown, + // we have no chance to search further. + // - If the next Record would go past the requested offset, + // we have found our target Data Block. + if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN + || iter->uncompressed_offset + + i->uncompressed_size > uncompressed_offset) { + iter->total_size = i->total_size; + iter->uncompressed_size = i->uncompressed_size; + iter->internal[ITER_INDEX] = i; + return LZMA_OK; + } + + // Update the stream offset. It may be unknown if we didn't + // know the size of Header Metadata Block. + if (iter->stream_offset != LZMA_VLI_VALUE_UNKNOWN) + if (lzma_vli_add(iter->stream_offset, i->total_size)) + return LZMA_PROG_ERROR; + + // Update the uncompressed offset. This cannot overflow since + // the Index is known to be valid. + iter->uncompressed_offset += i->uncompressed_size; + + // Move to the next Block. + if (i->next == NULL) { + assert(!iter_info->index.is_final); + if (!allow_alloc) + return LZMA_DATA_ERROR; + + iter->internal[ITER_INDEX] = i; + return lzma_info_iter_next(iter, allocator); + } + + i = i->next; + } +} diff --git a/src/liblzma/common/init.c b/src/liblzma/common/init.c new file mode 100644 index 00000000..fb377f5a --- /dev/null +++ b/src/liblzma/common/init.c @@ -0,0 +1,39 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file init.c +/// \brief Static internal initializations +/// +/// The initializations have been splitted to so many small files to prevent +/// an application needing only decoder functions from statically linking +/// also the encoder functions. +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API void +lzma_init(void) +{ +#ifdef HAVE_ENCODER + lzma_init_encoder(); +#endif + +#ifdef HAVE_DECODER + lzma_init_decoder(); +#endif + + return; +} diff --git a/src/liblzma/common/init_decoder.c b/src/liblzma/common/init_decoder.c new file mode 100644 index 00000000..2d61b451 --- /dev/null +++ b/src/liblzma/common/init_decoder.c @@ -0,0 +1,33 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file init_decoder.c +/// \brief Static internal initializations +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API void +lzma_init_decoder(void) +{ + // So far there's no decoder-specific stuff to initialize. + +#ifdef HAVE_CHECK + lzma_init_check(); +#endif + + return; +} diff --git a/src/liblzma/common/init_encoder.c b/src/liblzma/common/init_encoder.c new file mode 100644 index 00000000..4d3da506 --- /dev/null +++ b/src/liblzma/common/init_encoder.c @@ -0,0 +1,44 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file init_encoder.c +/// \brief Static internal initializations +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "range_encoder.h" +#include "lzma_encoder.h" + + +extern LZMA_API void +lzma_init_encoder(void) +{ + static bool already_initialized = false; + if (already_initialized) + return; + +#ifdef HAVE_CHECK + lzma_init_check(); +#endif + +// FIXME TODO Create precalculated tables. +#if defined(HAVE_ENCODER) && defined(HAVE_FILTER_LZMA) + lzma_rc_init(); + lzma_fastpos_init(); +#endif + + already_initialized = true; + return; +} diff --git a/src/liblzma/common/memory_limitter.c b/src/liblzma/common/memory_limitter.c new file mode 100644 index 00000000..19cdefc2 --- /dev/null +++ b/src/liblzma/common/memory_limitter.c @@ -0,0 +1,200 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file memory_limitter.c +/// \brief Limitting memory usage +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +/// Rounds an unsigned integer upwards to the next multiple. +#define my_ceil(num, multiple) \ + ((num) + (((multiple) - ((num) % (multiple))) % (multiple))) + + +/// Rounds upwards to the next multiple of 2 * sizeof(void*). +/// malloc() tends to align allocations this way. +#define malloc_ceil(num) my_ceil(num, 2 * sizeof(void *)) + + +typedef struct lzma_memlimit_list_s lzma_memlimit_list; +struct lzma_memlimit_list_s { + lzma_memlimit_list *next; + void *ptr; + size_t size; +}; + + +struct lzma_memlimit_s { + size_t used; + size_t limit; + lzma_memlimit_list *list; +}; + + +extern LZMA_API lzma_memlimit * +lzma_memlimit_create(size_t limit) +{ + if (limit < sizeof(lzma_memlimit)) + return NULL; + + lzma_memlimit *mem = malloc(sizeof(lzma_memlimit)); + + if (mem != NULL) { + mem->used = sizeof(lzma_memlimit); + mem->limit = limit; + mem->list = NULL; + } + + return mem; +} + + +extern LZMA_API void +lzma_memlimit_set(lzma_memlimit *mem, size_t limit) +{ + mem->limit = limit; + return; +} + + +extern LZMA_API size_t +lzma_memlimit_get(const lzma_memlimit *mem) +{ + return mem->limit; +} + + +extern LZMA_API size_t +lzma_memlimit_used(const lzma_memlimit *mem) +{ + return mem->used; +} + + +extern LZMA_API void +lzma_memlimit_end(lzma_memlimit *mem, lzma_bool free_allocated) +{ + if (mem == NULL) + return; + + lzma_memlimit_list *record = mem->list; + while (record != NULL) { + if (free_allocated) + free(record->ptr); + + lzma_memlimit_list *tmp = record; + record = record->next; + free(tmp); + } + + free(mem); + + return; +} + + +extern LZMA_API void * +lzma_memlimit_alloc(lzma_memlimit *mem, size_t nmemb, size_t size) +{ + // While liblzma always sets nmemb to one, do this multiplication + // to make these functions usable e.g. with zlib and libbzip2. + // Making sure that this doesn't overflow is up to the application. + size *= nmemb; + + // Some malloc() implementations return NULL on malloc(0). We like + // to get a non-NULL value. + if (size == 0) + size = 1; + + // Calculate how much memory we are going to allocate in reality. + // TODO: We should add some rough estimate how much malloc() needs + // for its internal structures. + const size_t total_size = malloc_ceil(size) + + malloc_ceil(sizeof(lzma_memlimit_list)); + + // Integer overflow protection + if (SIZE_MAX - size <= total_size) + return NULL; + + if (mem->limit < mem->used || mem->limit - mem->used < total_size) + return NULL; + + lzma_memlimit_list *record = malloc(sizeof(lzma_memlimit_list)); + void *ptr = malloc(size); + + if (record == NULL || ptr == NULL) { + free(record); + free(ptr); + return NULL; + } + + // Add the new entry to the beginning of the list. This should be + // more efficient when freeing memory, because usually it is + // "last allocated, first freed". + record->next = mem->list; + record->ptr = ptr; + record->size = total_size; + + mem->list = record; + mem->used += total_size; + + return ptr; +} + + +extern LZMA_API void +lzma_memlimit_detach(lzma_memlimit *mem, void *ptr) +{ + if (ptr == NULL || mem->list == NULL) + return; + + lzma_memlimit_list *record = mem->list; + lzma_memlimit_list *prev = NULL; + + while (record->ptr != ptr) { + prev = record; + record = record->next; + if (record == NULL) + return; + } + + if (prev != NULL) + prev->next = record->next; + else + mem->list = record->next; + + assert(mem->used >= record->size); + mem->used -= record->size; + + free(record); + + return; +} + + +extern LZMA_API void +lzma_memlimit_free(lzma_memlimit *mem, void *ptr) +{ + if (ptr == NULL) + return; + + lzma_memlimit_detach(mem, ptr); + + free(ptr); + + return; +} diff --git a/src/liblzma/common/memory_usage.c b/src/liblzma/common/memory_usage.c new file mode 100644 index 00000000..b6f27957 --- /dev/null +++ b/src/liblzma/common/memory_usage.c @@ -0,0 +1,113 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file memory_usage.c +/// \brief Calculate rough amount of memory required by filters +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "lz_encoder.h" +#include "lzma_literal.h" + + +static uint64_t +get_usage(const lzma_options_filter *filter, bool is_encoder) +{ + uint64_t ret; + + switch (filter->id) { + case LZMA_FILTER_COPY: + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + case LZMA_FILTER_DELTA: + // These don't require any significant amount of memory. + ret = 0; + break; + + case LZMA_FILTER_SUBBLOCK: + if (is_encoder) { + const lzma_options_subblock *options = filter->options; + ret = options->subblock_data_size; + } else { + ret = 0; + } + break; + +#ifdef HAVE_FILTER_LZMA + case LZMA_FILTER_LZMA: { + const lzma_options_lzma *options = filter->options; + + // Literal coder - this can be signficant if both values are + // big, or if sizeof(probability) is big. + ret = literal_states(options->literal_context_bits, + options->literal_pos_bits) * LIT_SIZE + * sizeof(probability); + + // Dictionary base size + ret += options->dictionary_size; + + if (is_encoder) { +# ifdef HAVE_ENCODER + // This is rough, but should be accurate enough + // in practice. + ret += options->dictionary_size / 2; + + uint32_t dummy1; + uint32_t dummy2; + uint32_t num_items; + if (lzma_lz_encoder_hash_properties( + options->match_finder, + options->dictionary_size, + &dummy1, &dummy2, &num_items)) + return UINT64_MAX; + + ret += (uint64_t)(num_items) * sizeof(uint32_t); +# else + return UINT64_MAX; +# endif + } + + break; + } +#endif + + default: + return UINT64_MAX; + } + + return ret; +} + + +extern LZMA_API uint32_t +lzma_memory_usage(const lzma_options_filter *filters, lzma_bool is_encoder) +{ + uint64_t usage = 0; + + for (size_t i = 0; filters[i].id != UINT64_MAX; ++i) { + const uint64_t ret = get_usage(filters + i, is_encoder); + if (ret == UINT64_MAX) + return UINT32_MAX; + + usage += ret; + } + + // Convert to mebibytes with rounding. + return usage / (1024 * 1024) + (usage % (1024 * 1024) >= 512 ? 1 : 0); +} diff --git a/src/liblzma/common/metadata_decoder.c b/src/liblzma/common/metadata_decoder.c new file mode 100644 index 00000000..f2ac6c1d --- /dev/null +++ b/src/liblzma/common/metadata_decoder.c @@ -0,0 +1,555 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file metadata_decoder.c +/// \brief Decodes metadata stored in Metadata Blocks +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "metadata_decoder.h" +#include "block_decoder.h" + + +/// Maximum size of a single Extra Record. Again, this is mostly to make +/// sure that the parsed lzma_vli fits into size_t. Still, maybe this should +/// be smaller. +#define EXTRA_SIZE_MAX (SIZE_MAX / 4) + + +struct lzma_coder_s { + enum { + SEQ_FLAGS, + SEQ_HEADER_METADATA_SIZE, + SEQ_TOTAL_SIZE, + SEQ_UNCOMPRESSED_SIZE, + SEQ_INDEX_COUNT, + SEQ_INDEX_ALLOC, + SEQ_INDEX_TOTAL_SIZE, + SEQ_INDEX_UNCOMPRESSED_SIZE, + SEQ_EXTRA_PREPARE, + SEQ_EXTRA_ALLOC, + SEQ_EXTRA_ID, + SEQ_EXTRA_SIZE, + SEQ_EXTRA_DATA_ALLOC, + SEQ_EXTRA_DATA_COPY, + SEQ_EXTRA_DUMMY_ALLOC, + SEQ_EXTRA_DUMMY_ID, + SEQ_EXTRA_DUMMY_SIZE, + SEQ_EXTRA_DUMMY_COPY, + } sequence; + + /// Number of "things" left to be parsed. If we hit end of input + /// when this isn't zero, we have corrupt Metadata Block. + size_t todo_count; + + /// Position in variable-length integers + size_t pos; + + /// Temporary variable needed to decode variables whose type + /// is size_t instead of lzma_vli. + lzma_vli tmp; + + /// Pointer to target structure to hold the parsed results. + lzma_metadata *metadata; + + /// The Index Record we currently are parsing + lzma_index *index_current; + + /// Number of Records in Index + size_t index_count; + + /// Sum of Total Size fields in the Index + lzma_vli index_total_size; + + /// Sum of Uncompressed Size fields in the Index + lzma_vli index_uncompressed_size; + + /// True if Extra is present. + bool has_extra; + + /// True if we have been requested to store the Extra to *metadata. + bool want_extra; + + /// Pointer to the end of the Extra Record list. + lzma_extra *extra_tail; + + /// Dummy Extra Record used when only verifying integrity of Extra + /// (not storing it to RAM). + lzma_extra extra_dummy; + + /// Block decoder + lzma_next_coder block_decoder; + + /// buffer[buffer_pos] is the next byte to process. + size_t buffer_pos; + + /// buffer[buffer_size] is the first byte to not process. + size_t buffer_size; + + /// Temporary buffer to which encoded Metadata is read before + /// it is parsed. + uint8_t buffer[LZMA_BUFFER_SIZE]; +}; + + +/// Reads a variable-length integer to coder->num. +#define read_vli(num) \ +do { \ + const lzma_ret ret = lzma_vli_decode( \ + &num, &coder->pos, \ + coder->buffer, &coder->buffer_pos, \ + coder->buffer_size); \ + if (ret != LZMA_STREAM_END) \ + return ret; \ + \ + coder->pos = 0; \ +} while (0) + + +static lzma_ret +process(lzma_coder *coder, lzma_allocator *allocator) +{ + while (coder->buffer_pos < coder->buffer_size) + switch (coder->sequence) { + case SEQ_FLAGS: + // Reserved bits must be unset. + if (coder->buffer[coder->buffer_pos] & 0x70) + return LZMA_HEADER_ERROR; + + // If Size of Header Metadata is present, prepare the + // variable for variable-length integer decoding. Otherwise + // set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the + // field isn't present. + if (coder->buffer[coder->buffer_pos] & 0x01) { + coder->metadata->header_metadata_size = 0; + ++coder->todo_count; + } + + if (coder->buffer[coder->buffer_pos] & 0x02) { + coder->metadata->total_size = 0; + ++coder->todo_count; + } + + if (coder->buffer[coder->buffer_pos] & 0x04) { + coder->metadata->uncompressed_size = 0; + ++coder->todo_count; + } + + if (coder->buffer[coder->buffer_pos] & 0x08) { + // Setting index_count to 1 is just to indicate that + // Index is present. The real size is parsed later. + coder->index_count = 1; + ++coder->todo_count; + } + + coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80) + != 0; + + ++coder->buffer_pos; + coder->sequence = SEQ_HEADER_METADATA_SIZE; + break; + + case SEQ_HEADER_METADATA_SIZE: + if (coder->metadata->header_metadata_size + != LZMA_VLI_VALUE_UNKNOWN) { + read_vli(coder->metadata->header_metadata_size); + + if (coder->metadata->header_metadata_size == 0) + return LZMA_DATA_ERROR; + + --coder->todo_count; + } + + coder->sequence = SEQ_TOTAL_SIZE; + break; + + case SEQ_TOTAL_SIZE: + if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) { + read_vli(coder->metadata->total_size); + + if (coder->metadata->total_size == 0) + return LZMA_DATA_ERROR; + + --coder->todo_count; + } + + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + break; + + case SEQ_UNCOMPRESSED_SIZE: + if (coder->metadata->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN) { + read_vli(coder->metadata->uncompressed_size); + --coder->todo_count; + } + + coder->sequence = SEQ_INDEX_COUNT; + break; + + case SEQ_INDEX_COUNT: + if (coder->index_count == 0) { + coder->sequence = SEQ_EXTRA_PREPARE; + break; + } + + read_vli(coder->tmp); + + // Index must not be empty nor far too big (wouldn't fit + // in RAM). + if (coder->tmp == 0 || coder->tmp + >= SIZE_MAX / sizeof(lzma_index)) + return LZMA_DATA_ERROR; + + coder->index_count = (size_t)(coder->tmp); + coder->tmp = 0; + + coder->sequence = SEQ_INDEX_ALLOC; + break; + + case SEQ_INDEX_ALLOC: { + lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); + if (i == NULL) + return LZMA_MEM_ERROR; + + i->total_size = 0; + i->uncompressed_size = 0; + i->next = NULL; + + if (coder->metadata->index == NULL) + coder->metadata->index = i; + else + coder->index_current->next = i; + + coder->index_current = i; + + coder->sequence = SEQ_INDEX_TOTAL_SIZE; + } + + // Fall through + + case SEQ_INDEX_TOTAL_SIZE: { + read_vli(coder->index_current->total_size); + + coder->index_total_size += coder->index_current->total_size; + if (coder->index_total_size > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + // No Block can have Total Size of zero bytes. + if (coder->index_current->total_size == 0) + return LZMA_DATA_ERROR; + + if (--coder->index_count == 0) { + // If Total Size is present, it must match the sum + // of Total Sizes in Index. + if (coder->metadata->total_size + != LZMA_VLI_VALUE_UNKNOWN + && coder->metadata->total_size + != coder->index_total_size) + return LZMA_DATA_ERROR; + + coder->index_current = coder->metadata->index; + coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE; + } else { + coder->sequence = SEQ_INDEX_ALLOC; + } + + break; + } + + case SEQ_INDEX_UNCOMPRESSED_SIZE: { + read_vli(coder->index_current->uncompressed_size); + + coder->index_uncompressed_size + += coder->index_current->uncompressed_size; + if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + coder->index_current = coder->index_current->next; + if (coder->index_current == NULL) { + if (coder->metadata->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN + && coder->metadata->uncompressed_size + != coder->index_uncompressed_size) + return LZMA_DATA_ERROR; + + --coder->todo_count; + coder->sequence = SEQ_EXTRA_PREPARE; + } + + break; + } + + case SEQ_EXTRA_PREPARE: + assert(coder->todo_count == 0); + + // If we get here, we have at least one byte of input left. + // If "Extra is present" flag is unset in Metadata Flags, + // it means that there is some garbage and we return an error. + if (!coder->has_extra) + return LZMA_DATA_ERROR; + + if (!coder->want_extra) { + coder->extra_tail = &coder->extra_dummy; + coder->sequence = SEQ_EXTRA_DUMMY_ALLOC; + break; + } + + coder->sequence = SEQ_EXTRA_ALLOC; + + // Fall through + + case SEQ_EXTRA_ALLOC: { + lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator); + if (e == NULL) + return LZMA_MEM_ERROR; + + e->next = NULL; + e->id = 0; + e->size = 0; + e->data = NULL; + + if (coder->metadata->extra == NULL) + coder->metadata->extra = e; + else + coder->extra_tail->next = e; + + coder->extra_tail = e; + + coder->todo_count = 1; + coder->sequence = SEQ_EXTRA_ID; + } + + // Fall through + + case SEQ_EXTRA_ID: + case SEQ_EXTRA_DUMMY_ID: + read_vli(coder->extra_tail->id); + + if (coder->extra_tail->id == 0) { + coder->extra_tail->size = 0; + coder->extra_tail->data = NULL; + coder->todo_count = 0; + --coder->sequence; + } else { + ++coder->sequence; + } + + break; + + case SEQ_EXTRA_SIZE: + case SEQ_EXTRA_DUMMY_SIZE: + read_vli(coder->tmp); + ++coder->sequence; + break; + + case SEQ_EXTRA_DATA_ALLOC: { + if (coder->tmp > EXTRA_SIZE_MAX) + return LZMA_DATA_ERROR; + + coder->extra_tail->size = (size_t)(coder->tmp); + coder->tmp = 0; + + uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size), + allocator); + if (d == NULL) + return LZMA_MEM_ERROR; + + coder->extra_tail->data = d; + coder->sequence = SEQ_EXTRA_DATA_COPY; + } + + // Fall through + + case SEQ_EXTRA_DATA_COPY: + bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size, + coder->extra_tail->data, &coder->pos, + (size_t)(coder->extra_tail->size)); + + if ((size_t)(coder->extra_tail->size) == coder->pos) { + coder->pos = 0; + coder->todo_count = 0; + coder->sequence = SEQ_EXTRA_ALLOC; + } + + break; + + case SEQ_EXTRA_DUMMY_ALLOC: + // Not really alloc, just initialize the dummy entry. + coder->extra_dummy = (lzma_extra){ + .next = NULL, + .id = 0, + .size = 0, + .data = NULL, + }; + + coder->todo_count = 1; + coder->sequence = SEQ_EXTRA_DUMMY_ID; + break; + + case SEQ_EXTRA_DUMMY_COPY: { + // Simply skip as many bytes as indicated by Extra Record Size. + // We don't check lzma_extra_size_max because we don't + // allocate any memory to hold the data. + const size_t in_avail = coder->buffer_size - coder->buffer_pos; + const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp); + coder->buffer_pos += skip; + coder->tmp -= skip; + + if (coder->tmp == 0) { + coder->todo_count = 0; + coder->sequence = SEQ_EXTRA_DUMMY_ALLOC; + } + + break; + } + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +metadata_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + bool end_was_reached = false; + + while (true) { + // Fill the buffer if it is empty. + if (coder->buffer_pos == coder->buffer_size) { + coder->buffer_pos = 0; + coder->buffer_size = 0; + + const lzma_ret ret = coder->block_decoder.code( + coder->block_decoder.coder, allocator, + in, in_pos, in_size, coder->buffer, + &coder->buffer_size, LZMA_BUFFER_SIZE, + LZMA_RUN); + + switch (ret) { + case LZMA_OK: + // Return immediatelly if we got no new data. + if (coder->buffer_size == 0) + return LZMA_OK; + + break; + + case LZMA_STREAM_END: + end_was_reached = true; + break; + + default: + return ret; + } + } + + // Process coder->buffer. + const lzma_ret ret = process(coder, allocator); + if (ret != LZMA_OK) + return ret; + + // On success, process() eats all the input. + assert(coder->buffer_pos == coder->buffer_size); + + if (end_was_reached) { + // Check that the sequence is not in the + // middle of anything. + if (coder->todo_count != 0) + return LZMA_DATA_ERROR; + + return LZMA_STREAM_END; + } + } +} + + +static void +metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_options_block *options, lzma_metadata *metadata, + bool want_extra) +{ + if (options == NULL || metadata == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &metadata_decode; + next->end = &metadata_decoder_end; + next->coder->block_decoder = LZMA_NEXT_CODER_INIT; + } + + metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; + metadata->total_size = LZMA_VLI_VALUE_UNKNOWN; + metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + metadata->index = NULL; + metadata->extra = NULL; + + next->coder->sequence = SEQ_FLAGS; + next->coder->todo_count = 0; + next->coder->pos = 0; + next->coder->tmp = 0; + next->coder->metadata = metadata; + next->coder->index_current = NULL; + next->coder->index_count = 0; + next->coder->index_total_size = 0; + next->coder->index_uncompressed_size = 0; + next->coder->want_extra = want_extra; + next->coder->extra_tail = NULL; + next->coder->buffer_pos = 0; + next->coder->buffer_size = 0; + + return lzma_block_decoder_init( + &next->coder->block_decoder, allocator, options); +} + + +extern lzma_ret +lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_options_block *options, lzma_metadata *metadata, + bool want_extra) +{ + lzma_next_coder_init(metadata_decoder_init, next, allocator, + options, metadata, want_extra); +} + + +extern LZMA_API lzma_ret +lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options, + lzma_metadata *metadata, lzma_bool want_extra) +{ + lzma_next_strm_init(strm, lzma_metadata_decoder_init, + options, metadata, want_extra); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/metadata_decoder.h b/src/liblzma/common/metadata_decoder.h new file mode 100644 index 00000000..1fba2179 --- /dev/null +++ b/src/liblzma/common/metadata_decoder.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file metadata_decoder.h +/// \brief Decodes metadata stored in Metadata Blocks +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_METADATA_DECODER_H +#define LZMA_METADATA_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_metadata_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + lzma_options_block *options, lzma_metadata *metadata, + bool want_extra); + +#endif diff --git a/src/liblzma/common/metadata_encoder.c b/src/liblzma/common/metadata_encoder.c new file mode 100644 index 00000000..17587c5c --- /dev/null +++ b/src/liblzma/common/metadata_encoder.c @@ -0,0 +1,436 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file metadata_encoder.c +/// \brief Encodes metadata to be stored into Metadata Blocks +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "metadata_encoder.h" +#include "block_encoder.h" + + +struct lzma_coder_s { + enum { + SEQ_FLAGS, + SEQ_HEADER_METADATA_SIZE, + SEQ_TOTAL_SIZE, + SEQ_UNCOMPRESSED_SIZE, + SEQ_INDEX_COUNT, + SEQ_INDEX_TOTAL, + SEQ_INDEX_UNCOMPRESSED, + SEQ_EXTRA_ID, + SEQ_EXTRA_SIZE, + SEQ_EXTRA_DATA, + SEQ_END, + } sequence; + + /// Position in variable-length integers + size_t pos; + + /// Local copy of the Metadata structure. Note that we keep + /// a copy only of the main structure, not Index or Extra Records. + lzma_metadata metadata; + + /// Number of Records in Index + size_t index_count; + + /// Index Record currently being processed + const lzma_index *index_current; + + /// Block encoder for the encoded Metadata + lzma_next_coder block_encoder; + + /// True once everything except compression has been done. + bool end_was_reached; + + /// buffer[buffer_pos] is the first byte that needs to be compressed. + size_t buffer_pos; + + /// buffer[buffer_size] is the next position where a byte will be + /// written by process(). + size_t buffer_size; + + /// Temporary buffer to which encoded Metadata is written before + /// it is compressed. + uint8_t buffer[LZMA_BUFFER_SIZE]; +}; + + +#define write_vli(num) \ +do { \ + const lzma_ret ret = lzma_vli_encode(num, &coder->pos, 1, \ + coder->buffer, &coder->buffer_size, \ + LZMA_BUFFER_SIZE); \ + if (ret != LZMA_STREAM_END) \ + return ret; \ + coder->pos = 0; \ +} while (0) + + +static lzma_ret +process(lzma_coder *coder) +{ + while (coder->buffer_size < LZMA_BUFFER_SIZE) + switch (coder->sequence) { + case SEQ_FLAGS: + coder->buffer[coder->buffer_size] = 0; + + if (coder->metadata.header_metadata_size + != LZMA_VLI_VALUE_UNKNOWN) + coder->buffer[coder->buffer_size] |= 0x01; + + if (coder->metadata.total_size != LZMA_VLI_VALUE_UNKNOWN) + coder->buffer[coder->buffer_size] |= 0x02; + + if (coder->metadata.uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN) + coder->buffer[coder->buffer_size] |= 0x04; + + if (coder->index_count > 0) + coder->buffer[coder->buffer_size] |= 0x08; + + if (coder->metadata.extra != NULL) + coder->buffer[coder->buffer_size] |= 0x80; + + ++coder->buffer_size; + coder->sequence = SEQ_HEADER_METADATA_SIZE; + break; + + case SEQ_HEADER_METADATA_SIZE: + if (coder->metadata.header_metadata_size + != LZMA_VLI_VALUE_UNKNOWN) + write_vli(coder->metadata.header_metadata_size); + + coder->sequence = SEQ_TOTAL_SIZE; + break; + + case SEQ_TOTAL_SIZE: + if (coder->metadata.total_size != LZMA_VLI_VALUE_UNKNOWN) + write_vli(coder->metadata.total_size); + + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + break; + + case SEQ_UNCOMPRESSED_SIZE: + if (coder->metadata.uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN) + write_vli(coder->metadata.uncompressed_size); + + coder->sequence = SEQ_INDEX_COUNT; + break; + + case SEQ_INDEX_COUNT: + if (coder->index_count == 0) { + if (coder->metadata.extra == NULL) { + coder->sequence = SEQ_END; + return LZMA_STREAM_END; + } + + coder->sequence = SEQ_EXTRA_ID; + break; + } + + write_vli(coder->index_count); + coder->sequence = SEQ_INDEX_TOTAL; + break; + + case SEQ_INDEX_TOTAL: + write_vli(coder->index_current->total_size); + + coder->index_current = coder->index_current->next; + if (coder->index_current == NULL) { + coder->index_current = coder->metadata.index; + coder->sequence = SEQ_INDEX_UNCOMPRESSED; + } + + break; + + case SEQ_INDEX_UNCOMPRESSED: + write_vli(coder->index_current->uncompressed_size); + + coder->index_current = coder->index_current->next; + if (coder->index_current != NULL) + break; + + if (coder->metadata.extra != NULL) { + coder->sequence = SEQ_EXTRA_ID; + break; + } + + coder->sequence = SEQ_END; + return LZMA_STREAM_END; + + case SEQ_EXTRA_ID: { + const lzma_ret ret = lzma_vli_encode( + coder->metadata.extra->id, &coder->pos, 1, + coder->buffer, &coder->buffer_size, + LZMA_BUFFER_SIZE); + switch (ret) { + case LZMA_OK: + break; + + case LZMA_STREAM_END: + coder->pos = 0; + + // Handle the special ID 0. + if (coder->metadata.extra->id == 0) { + coder->metadata.extra + = coder->metadata.extra->next; + if (coder->metadata.extra == NULL) { + coder->sequence = SEQ_END; + return LZMA_STREAM_END; + } + + coder->sequence = SEQ_EXTRA_ID; + + } else { + coder->sequence = SEQ_EXTRA_SIZE; + } + + break; + + default: + return ret; + } + + break; + } + + case SEQ_EXTRA_SIZE: + if (coder->metadata.extra->size >= (lzma_vli)(SIZE_MAX)) + return LZMA_HEADER_ERROR; + + write_vli(coder->metadata.extra->size); + coder->sequence = SEQ_EXTRA_DATA; + break; + + case SEQ_EXTRA_DATA: + bufcpy(coder->metadata.extra->data, &coder->pos, + coder->metadata.extra->size, + coder->buffer, &coder->buffer_size, + LZMA_BUFFER_SIZE); + + if ((size_t)(coder->metadata.extra->size) == coder->pos) { + coder->metadata.extra = coder->metadata.extra->next; + if (coder->metadata.extra == NULL) { + coder->sequence = SEQ_END; + return LZMA_STREAM_END; + } + + coder->pos = 0; + coder->sequence = SEQ_EXTRA_ID; + } + + break; + + case SEQ_END: + // Everything is encoded. Let the compression code finish + // its work now. + return LZMA_STREAM_END; + } + + return LZMA_OK; +} + + +static lzma_ret +metadata_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in lzma_attribute((unused)), + size_t *restrict in_pos lzma_attribute((unused)), + size_t in_size lzma_attribute((unused)), uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action lzma_attribute((unused))) +{ + while (!coder->end_was_reached) { + // Flush coder->buffer if it isn't empty. + if (coder->buffer_size > 0) { + const lzma_ret ret = coder->block_encoder.code( + coder->block_encoder.coder, allocator, + coder->buffer, &coder->buffer_pos, + coder->buffer_size, + out, out_pos, out_size, LZMA_RUN); + if (coder->buffer_pos < coder->buffer_size + || ret != LZMA_OK) + return ret; + + coder->buffer_pos = 0; + coder->buffer_size = 0; + } + + const lzma_ret ret = process(coder); + + switch (ret) { + case LZMA_OK: + break; + + case LZMA_STREAM_END: + coder->end_was_reached = true; + break; + + default: + return ret; + } + } + + // Finish + return coder->block_encoder.code(coder->block_encoder.coder, allocator, + coder->buffer, &coder->buffer_pos, coder->buffer_size, + out, out_pos, out_size, LZMA_FINISH); +} + + +static void +metadata_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->block_encoder, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +metadata_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_options_block *options, const lzma_metadata *metadata) +{ + if (options == NULL || metadata == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &metadata_encode; + next->end = &metadata_encoder_end; + next->coder->block_encoder = LZMA_NEXT_CODER_INIT; + } + + next->coder->sequence = SEQ_FLAGS; + next->coder->pos = 0; + next->coder->metadata = *metadata; + next->coder->index_count = 0; + next->coder->index_current = metadata->index; + next->coder->end_was_reached = false; + next->coder->buffer_pos = 0; + next->coder->buffer_size = 0; + + // Count and validate the Index Records. + { + const lzma_index *i = metadata->index; + while (i != NULL) { + if (i->total_size > LZMA_VLI_VALUE_MAX + || i->uncompressed_size + > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + ++next->coder->index_count; + i = i->next; + } + } + + // Initialize the Block encoder. + return lzma_block_encoder_init( + &next->coder->block_encoder, allocator, options); +} + + +extern lzma_ret +lzma_metadata_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_options_block *options, const lzma_metadata *metadata) +{ + lzma_next_coder_init(metadata_encoder_init, next, allocator, + options, metadata); +} + + +extern LZMA_API lzma_ret +lzma_metadata_encoder(lzma_stream *strm, lzma_options_block *options, + const lzma_metadata *metadata) +{ + lzma_next_strm_init(strm, metadata_encoder_init, options, metadata); + + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API lzma_vli +lzma_metadata_size(const lzma_metadata *metadata) +{ + lzma_vli size = 1; // Metadata Flags + + // Validate header_metadata_size, total_size, and uncompressed_size. + if (!lzma_vli_is_valid(metadata->header_metadata_size) + || !lzma_vli_is_valid(metadata->total_size) + || !lzma_vli_is_valid(metadata->uncompressed_size)) + return 0; + + // Add the sizes of these three fields. + if (metadata->header_metadata_size != LZMA_VLI_VALUE_UNKNOWN) + size += lzma_vli_size(metadata->header_metadata_size); + + if (metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) + size += lzma_vli_size(metadata->total_size); + + if (metadata->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + size += lzma_vli_size(metadata->uncompressed_size); + + // Index + if (metadata->index != NULL) { + const lzma_index *i = metadata->index; + size_t count = 1; + + do { + const size_t x = lzma_vli_size(i->total_size); + const size_t y = lzma_vli_size(i->uncompressed_size); + if (x == 0 || y == 0) + return 0; + + size += x + y; + ++count; + i = i->next; + + } while (i != NULL); + + const size_t tmp = lzma_vli_size(count); + if (tmp == 0) + return 0; + + size += tmp; + } + + // Extra + { + const lzma_extra *e = metadata->extra; + while (e != NULL) { + // Validate the numbers. + if (e->id > LZMA_VLI_VALUE_MAX + || e->size >= (lzma_vli)(SIZE_MAX)) + return 0; + + // Add the sizes. + size += lzma_vli_size(e->id); + if (e->id != 0) { + size += lzma_vli_size(e->size); + size += e->size; + } + + e = e->next; + } + } + + return size; +} diff --git a/src/liblzma/common/metadata_encoder.h b/src/liblzma/common/metadata_encoder.h new file mode 100644 index 00000000..20357fe6 --- /dev/null +++ b/src/liblzma/common/metadata_encoder.h @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file metadata_encoder.h +/// \brief Encodes metadata to be stored into Metadata Blocks +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_METADATA_ENCODER_H +#define LZMA_METADATA_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_metadata_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + lzma_options_block *options, const lzma_metadata *metadata); + +#endif diff --git a/src/liblzma/common/next_coder.c b/src/liblzma/common/next_coder.c new file mode 100644 index 00000000..c10fe24d --- /dev/null +++ b/src/liblzma/common/next_coder.c @@ -0,0 +1,65 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file next_coder.c +/// \brief Initializing and freeing the next coder in the chain +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + +extern lzma_ret +lzma_next_filter_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + lzma_ret ret = LZMA_OK; + + // Free the existing coder if it is different than the current one. + if ((uintptr_t)(filters[0].init) != next->init) + lzma_next_coder_end(next, allocator); + + if (filters[0].init != NULL) { + // Initialize the new coder. + ret = filters[0].init(next, allocator, filters); + + // Set the init function pointer if initialization was + // successful. next->code and next->end are set by the + // initialization function itself. + if (ret == LZMA_OK) { + next->init = (uintptr_t)(filters[0].init); + assert(next->code != NULL); + assert(next->end != NULL); + } else { + lzma_next_coder_end(next, allocator); + } + } + + return ret; +} + + +extern void +lzma_next_coder_end(lzma_next_coder *next, lzma_allocator *allocator) +{ + if (next != NULL) { + if (next->end != NULL) + next->end(next->coder, allocator); + + // Reset the variables so the we don't accidentally think + // that it is an already initialized coder. + *next = LZMA_NEXT_CODER_INIT; + } + + return; +} diff --git a/src/liblzma/common/raw_common.c b/src/liblzma/common/raw_common.c new file mode 100644 index 00000000..394903bc --- /dev/null +++ b/src/liblzma/common/raw_common.c @@ -0,0 +1,175 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file raw_common.c +/// \brief Stuff shared between raw encoder and raw decoder +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "raw_common.h" + + +/// \brief Prepares the filter chain +/// +/// Prepares the filter chain by setting uncompressed sizes for each filter, +/// and adding implicit Subblock filter when needed. +/// +/// \return true if error occurred, false on success. +/// +static bool +prepare(lzma_vli *id, lzma_vli *uncompressed_size, bool implicit) +{ + bool needs_end_of_input = false; + + switch (id[0]) { + case LZMA_FILTER_COPY: + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + case LZMA_FILTER_DELTA: + uncompressed_size[1] = uncompressed_size[0]; + needs_end_of_input = true; + break; + + case LZMA_FILTER_SUBBLOCK: + case LZMA_FILTER_LZMA: + // These change the size of the data unpredictably. + uncompressed_size[1] = LZMA_VLI_VALUE_UNKNOWN; + break; + + case LZMA_FILTER_SUBBLOCK_HELPER: + uncompressed_size[1] = uncompressed_size[0]; + break; + + default: + // Unknown filter. + return true; + } + + // Is this the last filter in the chain? + if (id[1] == LZMA_VLI_VALUE_UNKNOWN) { + if (!needs_end_of_input || !implicit || uncompressed_size[0] + != LZMA_VLI_VALUE_UNKNOWN) + return false; + + // Add implicit Subblock filter. + id[1] = LZMA_FILTER_SUBBLOCK; + uncompressed_size[1] = LZMA_VLI_VALUE_UNKNOWN; + id[2] = LZMA_VLI_VALUE_UNKNOWN; + } + + return prepare(id + 1, uncompressed_size + 1, implicit); +} + + +extern lzma_ret +lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *options, lzma_vli uncompressed_size, + lzma_init_function (*get_function)(lzma_vli id), + bool allow_implicit, bool is_encoder) +{ + if (options == NULL || !lzma_vli_is_valid(uncompressed_size)) + return LZMA_PROG_ERROR; + + // Count the number of filters in the chain. + size_t count = 0; + while (options[count].id != LZMA_VLI_VALUE_UNKNOWN) + ++count; + + // Allocate enough space from the stack for IDs and uncompressed + // sizes. We need two extra: possible implicit Subblock and end + // of array indicator. + lzma_vli ids[count + 2]; + lzma_vli uncompressed_sizes[count + 2]; + bool using_implicit = false; + + uncompressed_sizes[0] = uncompressed_size; + + if (count == 0) { + if (!allow_implicit) + return LZMA_PROG_ERROR; + + count = 1; + using_implicit = true; + + // Special case: no filters were specified, so an implicit + // Copy or Subblock filter is used. + if (uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) + ids[0] = LZMA_FILTER_SUBBLOCK; + else + ids[0] = LZMA_FILTER_COPY; + + ids[1] = LZMA_VLI_VALUE_UNKNOWN; + + } else { + // Prepare the ids[] and uncompressed_sizes[]. + for (size_t i = 0; i < count; ++i) + ids[i] = options[i].id; + + ids[count] = LZMA_VLI_VALUE_UNKNOWN; + + if (prepare(ids, uncompressed_sizes, allow_implicit)) + return LZMA_HEADER_ERROR; + + // Check if implicit Subblock filter was added. + if (ids[count] != LZMA_VLI_VALUE_UNKNOWN) { + assert(ids[count] == LZMA_FILTER_SUBBLOCK); + ++count; + using_implicit = true; + } + } + + // Set the filter functions, and copy uncompressed sizes and options. + lzma_filter_info filters[count + 1]; + if (is_encoder) { + for (size_t i = 0; i < count; ++i) { + // The order of the filters is reversed in the + // encoder. It allows more efficient handling + // of the uncompressed data. + const size_t j = count - i - 1; + + filters[j].init = get_function(ids[i]); + if (filters[j].init == NULL) + return LZMA_HEADER_ERROR; + + filters[j].options = options[i].options; + filters[j].uncompressed_size = uncompressed_sizes[i]; + } + + if (using_implicit) + filters[0].options = NULL; + + } else { + for (size_t i = 0; i < count; ++i) { + filters[i].init = get_function(ids[i]); + if (filters[i].init == NULL) + return LZMA_HEADER_ERROR; + + filters[i].options = options[i].options; + filters[i].uncompressed_size = uncompressed_sizes[i]; + } + + if (using_implicit) + filters[count - 1].options = NULL; + } + + // Terminate the array. + filters[count].init = NULL; + + // Initialize the filters. + return lzma_next_filter_init(next, allocator, filters); +} diff --git a/src/liblzma/common/raw_common.h b/src/liblzma/common/raw_common.h new file mode 100644 index 00000000..172223cb --- /dev/null +++ b/src/liblzma/common/raw_common.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file raw_common.h +/// \brief Stuff shared between raw encoder and raw decoder +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RAW_COMMON_H +#define LZMA_RAW_COMMON_H + +#include "common.h" + +extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, + lzma_allocator *allocator, + const lzma_options_filter *options, lzma_vli uncompressed_size, + lzma_init_function (*get_function)(lzma_vli id), + bool allow_implicit, bool is_encoder); + +#endif diff --git a/src/liblzma/common/raw_decoder.c b/src/liblzma/common/raw_decoder.c new file mode 100644 index 00000000..a11cb5c4 --- /dev/null +++ b/src/liblzma/common/raw_decoder.c @@ -0,0 +1,127 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file raw_decoder.c +/// \brief Raw decoder initialization API +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "raw_decoder.h" +#include "copy_coder.h" +#include "simple_coder.h" +#include "subblock_decoder.h" +#include "subblock_decoder_helper.h" +#include "delta_coder.h" +#include "lzma_decoder.h" +#include "metadata_decoder.h" + + +static lzma_init_function +get_function(lzma_vli id) +{ + switch (id) { +#ifdef HAVE_FILTER_COPY + case LZMA_FILTER_COPY: + return &lzma_copy_decoder_init; +#endif + +#ifdef HAVE_FILTER_SUBBLOCK + case LZMA_FILTER_SUBBLOCK: + return &lzma_subblock_decoder_init; +#endif + +#ifdef HAVE_FILTER_X86 + case LZMA_FILTER_X86: + return &lzma_simple_x86_decoder_init; +#endif + +#ifdef HAVE_FILTER_POWERPC + case LZMA_FILTER_POWERPC: + return &lzma_simple_powerpc_decoder_init; +#endif + +#ifdef HAVE_FILTER_IA64 + case LZMA_FILTER_IA64: + return &lzma_simple_ia64_decoder_init; +#endif + +#ifdef HAVE_FILTER_ARM + case LZMA_FILTER_ARM: + return &lzma_simple_arm_decoder_init; +#endif + +#ifdef HAVE_FILTER_ARMTHUMB + case LZMA_FILTER_ARMTHUMB: + return &lzma_simple_armthumb_decoder_init; +#endif + +#ifdef HAVE_FILTER_SPARC + case LZMA_FILTER_SPARC: + return &lzma_simple_sparc_decoder_init; +#endif + +#ifdef HAVE_FILTER_DELTA + case LZMA_FILTER_DELTA: + return &lzma_delta_decoder_init; +#endif + +#ifdef HAVE_FILTER_LZMA + case LZMA_FILTER_LZMA: + return &lzma_lzma_decoder_init; +#endif + +#ifdef HAVE_FILTER_SUBBLOCK + case LZMA_FILTER_SUBBLOCK_HELPER: + return &lzma_subblock_decoder_helper_init; +#endif + } + + return NULL; +} + + +extern lzma_ret +lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *options, + lzma_vli uncompressed_size, bool allow_implicit) +{ + const lzma_ret ret = lzma_raw_coder_init(next, allocator, + options, uncompressed_size, &get_function, + allow_implicit, false); + + if (ret != LZMA_OK) + lzma_next_coder_end(next, allocator); + + return ret; +} + + +extern LZMA_API lzma_ret +lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options, + lzma_vli uncompressed_size, lzma_bool allow_implicit) +{ + return_if_error(lzma_strm_init(strm)); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + + const lzma_ret ret = lzma_raw_coder_init(&strm->internal->next, + strm->allocator, options, uncompressed_size, + &get_function, allow_implicit, false); + + if (ret != LZMA_OK) + lzma_end(strm); + + return ret; +} diff --git a/src/liblzma/common/raw_decoder.h b/src/liblzma/common/raw_decoder.h new file mode 100644 index 00000000..9d48074b --- /dev/null +++ b/src/liblzma/common/raw_decoder.h @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file raw_decoder.h +/// \brief Raw decoder initialization API +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RAW_DECODER_H +#define LZMA_RAW_DECODER_H + +#include "raw_common.h" + + +extern lzma_ret lzma_raw_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_options_filter *options, + lzma_vli uncompressed_size, bool implicit); + +#endif diff --git a/src/liblzma/common/raw_encoder.c b/src/liblzma/common/raw_encoder.c new file mode 100644 index 00000000..c2cd0a51 --- /dev/null +++ b/src/liblzma/common/raw_encoder.c @@ -0,0 +1,124 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file raw_encoder.c +/// \brief Raw encoder initialization API +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "raw_encoder.h" +#include "copy_coder.h" +#include "simple_coder.h" +#include "subblock_encoder.h" +#include "delta_coder.h" +#include "lzma_encoder.h" + + +static lzma_init_function +get_function(lzma_vli id) +{ + switch (id) { +#ifdef HAVE_FILTER_COPY + case LZMA_FILTER_COPY: + return &lzma_copy_encoder_init; +#endif + +#ifdef HAVE_FILTER_SUBBLOCK + case LZMA_FILTER_SUBBLOCK: + return &lzma_subblock_encoder_init; +#endif + +#ifdef HAVE_FILTER_X86 + case LZMA_FILTER_X86: + return &lzma_simple_x86_encoder_init; +#endif + +#ifdef HAVE_FILTER_POWERPC + case LZMA_FILTER_POWERPC: + return &lzma_simple_powerpc_encoder_init; +#endif + +#ifdef HAVE_FILTER_IA64 + case LZMA_FILTER_IA64: + return &lzma_simple_ia64_encoder_init; +#endif + +#ifdef HAVE_FILTER_ARM + case LZMA_FILTER_ARM: + return &lzma_simple_arm_encoder_init; +#endif + +#ifdef HAVE_FILTER_ARMTHUMB + case LZMA_FILTER_ARMTHUMB: + return &lzma_simple_armthumb_encoder_init; +#endif + +#ifdef HAVE_FILTER_SPARC + case LZMA_FILTER_SPARC: + return &lzma_simple_sparc_encoder_init; +#endif + +#ifdef HAVE_FILTER_DELTA + case LZMA_FILTER_DELTA: + return &lzma_delta_encoder_init; +#endif + +#ifdef HAVE_FILTER_LZMA + case LZMA_FILTER_LZMA: + return &lzma_lzma_encoder_init; +#endif + } + + return NULL; +} + + +extern lzma_ret +lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *options, + lzma_vli uncompressed_size, bool allow_implicit) +{ + // lzma_raw_coder_init() accesses get_function() via function pointer, + // because this way linker doesn't statically link both encoder and + // decoder functions if user needs only encoder or decoder. + const lzma_ret ret = lzma_raw_coder_init(next, allocator, + options, uncompressed_size, &get_function, + allow_implicit, true); + + if (ret != LZMA_OK) + lzma_next_coder_end(next, allocator); + + return ret; +} + + +extern LZMA_API lzma_ret +lzma_raw_encoder(lzma_stream *strm, const lzma_options_filter *options, + lzma_vli uncompressed_size, lzma_bool allow_implicit) +{ + return_if_error(lzma_strm_init(strm)); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + const lzma_ret ret = lzma_raw_coder_init(&strm->internal->next, + strm->allocator, options, uncompressed_size, + &get_function, allow_implicit, true); + + if (ret != LZMA_OK) + lzma_end(strm); + + return ret; +} diff --git a/src/liblzma/common/raw_encoder.h b/src/liblzma/common/raw_encoder.h new file mode 100644 index 00000000..b0aab61a --- /dev/null +++ b/src/liblzma/common/raw_encoder.h @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file raw_encoder.h +/// \brief Raw encoder initialization API +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RAW_ENCODER_H +#define LZMA_RAW_ENCODER_H + +#include "raw_common.h" + + +extern lzma_ret lzma_raw_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_options_filter *options, + lzma_vli uncompressed_size, bool allow_implicit); + +#endif diff --git a/src/liblzma/common/stream_common.c b/src/liblzma/common/stream_common.c new file mode 100644 index 00000000..121a6674 --- /dev/null +++ b/src/liblzma/common/stream_common.c @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_common.c +/// \brief Common stuff for Stream coders +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_common.h" + +const uint8_t lzma_header_magic[6] = { 0xFF, 0x4C, 0x5A, 0x4D, 0x41, 0x00 }; +const uint8_t lzma_footer_magic[2] = { 0x59, 0x5A }; diff --git a/src/liblzma/common/stream_common.h b/src/liblzma/common/stream_common.h new file mode 100644 index 00000000..b2f37f37 --- /dev/null +++ b/src/liblzma/common/stream_common.h @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_common.h +/// \brief Common stuff for Stream coders +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_COMMON_H +#define LZMA_STREAM_COMMON_H + +#include "common.h" + +extern const uint8_t lzma_header_magic[6]; +extern const uint8_t lzma_footer_magic[2]; + +#endif diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c new file mode 100644 index 00000000..d8000c3d --- /dev/null +++ b/src/liblzma/common/stream_decoder.c @@ -0,0 +1,454 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder.c +/// \brief Decodes .lzma Streams +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_common.h" +#include "check.h" +#include "stream_flags_decoder.h" +#include "block_decoder.h" +#include "metadata_decoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER_CODE, + SEQ_BLOCK_HEADER_INIT, + SEQ_BLOCK_HEADER_CODE, + SEQ_METADATA_CODE, + SEQ_DATA_CODE, + SEQ_STREAM_TAIL_INIT, + SEQ_STREAM_TAIL_CODE, + } sequence; + + /// Position in variable-length integers and in some other things. + size_t pos; + + /// Block or Metadata decoder. This takes little memory and the same + /// data structure can be used to decode every Block Header, so it's + /// a good idea to have a separate lzma_next_coder structure for it. + lzma_next_coder block_decoder; + + /// Block Header decoder; this is separate + lzma_next_coder block_header_decoder; + + lzma_options_block block_options; + + /// Information about the sizes of the Blocks + lzma_info *info; + + /// Current Block in *info + lzma_info_iter iter; + + /// Number of bytes not yet processed from Data Blocks in the Stream. + /// This can be LZMA_VLI_VALUE_UNKNOWN. If it is known, it is + /// decremented while decoding and verified to match the reality. + lzma_vli total_left; + + /// Like uncompressed_left above but for uncompressed data from + /// Data Blocks. + lzma_vli uncompressed_left; + + /// Stream Flags from Stream Header + lzma_stream_flags header_flags; + + /// Stream Flags from Stream tail + lzma_stream_flags tail_flags; + + /// Decoder for Stream Header and Stream tail. This takes very + /// little memory and the same data structure can be used for + /// both Header and tail, so it's a good idea to have a separate + /// lzma_next_coder structure for it. + lzma_next_coder flags_decoder; + + /// Temporary destination for the decoded Metadata. + lzma_metadata metadata; + + /// Pointer to application-supplied pointer where to store the list + /// of Extra Records from the Header Metadata Block. + lzma_extra **header_extra; + + /// Same as above but Footer Metadata Block + lzma_extra **footer_extra; +}; + + +static lzma_ret +metadata_init(lzma_coder *coder, lzma_allocator *allocator) +{ + assert(coder->metadata.index == NULL); + assert(coder->metadata.extra == NULL); + + // Single-Block Streams don't have Metadata Blocks. + if (!coder->header_flags.is_multi) + return LZMA_DATA_ERROR; + + coder->block_options.total_limit = LZMA_VLI_VALUE_UNKNOWN; + + // Limit the Uncompressed Size of a Metadata Block. This is to + // prevent security issues where input file would have very huge + // Metadata. + // + // FIXME: Hardcoded constant is ugly. Maybe we should provide + // some way to specify this from the application. + coder->block_options.uncompressed_limit = LZMA_VLI_C(1) << 23; + + lzma_info_size size_type; + bool want_extra; + + // If we haven't decoded any Data Blocks yet, this is Header + // Metadata Block. + if (lzma_info_index_count_get(coder->info) == 0) { + coder->block_options.has_backward_size = false; + coder->block_options.handle_padding = true; + size_type = LZMA_INFO_HEADER_METADATA; + want_extra = coder->header_extra != NULL; + } else { + if (lzma_info_index_finish(coder->info)) + return LZMA_DATA_ERROR; + + coder->block_options.has_backward_size = true; + coder->block_options.handle_padding = false; + size_type = LZMA_INFO_FOOTER_METADATA; + want_extra = coder->footer_extra != NULL; + } + + coder->block_options.has_uncompressed_size_in_footer = false; + coder->block_options.total_size = lzma_info_size_get( + coder->info, size_type); + + coder->sequence = SEQ_METADATA_CODE; + + return lzma_metadata_decoder_init(&coder->block_decoder, allocator, + &coder->block_options, &coder->metadata, want_extra); +} + + +static lzma_ret +data_init(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_ret ret = lzma_info_iter_next(&coder->iter, allocator); + if (ret != LZMA_OK) + return ret; + + ret = lzma_info_iter_set(&coder->iter, LZMA_VLI_VALUE_UNKNOWN, + coder->block_options.uncompressed_size); + if (ret != LZMA_OK) + return ret; + + coder->block_options.total_size = coder->iter.total_size; + coder->block_options.uncompressed_size = coder->iter.uncompressed_size; + coder->block_options.total_limit = coder->total_left; + coder->block_options.uncompressed_limit = coder->uncompressed_left; + + if (coder->header_flags.is_multi) { + coder->block_options.has_uncompressed_size_in_footer = false; + coder->block_options.has_backward_size = false; + coder->block_options.handle_padding = true; + } else { + coder->block_options.has_uncompressed_size_in_footer + = coder->iter.uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN; + coder->block_options.has_backward_size = true; + coder->block_options.handle_padding = false; + } + + coder->sequence = SEQ_DATA_CODE; + + return lzma_block_decoder_init(&coder->block_decoder, allocator, + &coder->block_options); +} + + +static lzma_ret +stream_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + while (*out_pos < out_size && (*in_pos < in_size + || coder->sequence == SEQ_DATA_CODE)) + switch (coder->sequence) { + case SEQ_STREAM_HEADER_CODE: { + const lzma_ret ret = coder->flags_decoder.code( + coder->flags_decoder.coder, + allocator, in, in_pos, in_size, + NULL, NULL, 0, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + coder->sequence = SEQ_BLOCK_HEADER_INIT; + + // Detect if the Check type is supported and give appropriate + // warning if it isn't. We don't warn every time a new Block + // is started. + lzma_check tmp; + if (lzma_check_init(&tmp, coder->header_flags.check)) + return LZMA_UNSUPPORTED_CHECK; + + break; + } + + case SEQ_BLOCK_HEADER_INIT: { + coder->block_options.check = coder->header_flags.check; + coder->block_options.has_crc32 = coder->header_flags.has_crc32; + + const lzma_ret ret = lzma_block_header_decoder_init( + &coder->block_header_decoder, allocator, + &coder->block_options); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_BLOCK_HEADER_CODE; + } + + // Fall through + + case SEQ_BLOCK_HEADER_CODE: { + lzma_ret ret = coder->block_header_decoder.code( + coder->block_header_decoder.coder, + allocator, in, in_pos, in_size, + NULL, NULL, 0, LZMA_RUN); + + if (ret != LZMA_STREAM_END) + return ret; + + if (coder->block_options.is_metadata) + ret = metadata_init(coder, allocator); + else + ret = data_init(coder, allocator); + + if (ret != LZMA_OK) + return ret; + + break; + } + + case SEQ_METADATA_CODE: { + lzma_ret ret = coder->block_decoder.code( + coder->block_decoder.coder, allocator, + in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + const bool is_header_metadata = lzma_info_index_count_get( + coder->info) == 0; + + if (is_header_metadata) { + if (coder->header_extra != NULL) { + *coder->header_extra = coder->metadata.extra; + coder->metadata.extra = NULL; + } + + if (lzma_info_size_set(coder->info, + LZMA_INFO_HEADER_METADATA, + coder->block_options.total_size) + != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->sequence = SEQ_BLOCK_HEADER_INIT; + } else { + if (coder->footer_extra != NULL) { + *coder->footer_extra = coder->metadata.extra; + coder->metadata.extra = NULL; + } + + coder->sequence = SEQ_STREAM_TAIL_INIT; + } + + assert(coder->metadata.extra == NULL); + + ret = lzma_info_metadata_set(coder->info, allocator, + &coder->metadata, is_header_metadata, true); + if (ret != LZMA_OK) + return ret; + + // Intialize coder->total_size and coder->uncompressed_size + // from Header Metadata. + if (is_header_metadata) { + coder->total_left = lzma_info_size_get( + coder->info, LZMA_INFO_TOTAL); + coder->uncompressed_left = lzma_info_size_get( + coder->info, LZMA_INFO_UNCOMPRESSED); + } + + break; + } + + case SEQ_DATA_CODE: { + lzma_ret ret = coder->block_decoder.code( + coder->block_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + if (ret != LZMA_STREAM_END) + return ret; + + ret = lzma_info_iter_set(&coder->iter, + coder->block_options.total_size, + coder->block_options.uncompressed_size); + if (ret != LZMA_OK) + return ret; + + // These won't overflow since lzma_info_iter_set() succeeded. + if (coder->total_left != LZMA_VLI_VALUE_UNKNOWN) + coder->total_left -= coder->block_options.total_size; + if (coder->uncompressed_left != LZMA_VLI_VALUE_UNKNOWN) + coder->uncompressed_left -= coder->block_options + .uncompressed_size; + + if (!coder->header_flags.is_multi) { + ret = lzma_info_index_finish(coder->info); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_STREAM_TAIL_INIT; + break; + } + + coder->sequence = SEQ_BLOCK_HEADER_INIT; + break; + } + + case SEQ_STREAM_TAIL_INIT: { + lzma_ret ret = lzma_info_index_finish(coder->info); + if (ret != LZMA_OK) + return ret; + + ret = lzma_stream_tail_decoder_init(&coder->flags_decoder, + allocator, &coder->tail_flags); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_STREAM_TAIL_CODE; + } + + // Fall through + + case SEQ_STREAM_TAIL_CODE: { + const lzma_ret ret = coder->flags_decoder.code( + coder->flags_decoder.coder, allocator, + in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + if (!lzma_stream_flags_is_equal( + coder->header_flags, coder->tail_flags)) + return LZMA_DATA_ERROR; + + return LZMA_STREAM_END; + } + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->block_decoder, allocator); + lzma_next_coder_end(&coder->block_header_decoder, allocator); + lzma_next_coder_end(&coder->flags_decoder, allocator); + lzma_info_free(coder->info, allocator); + lzma_index_free(coder->metadata.index, allocator); + lzma_extra_free(coder->metadata.extra, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_extra **header, lzma_extra **footer) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_decode; + next->end = &stream_decoder_end; + + next->coder->block_decoder = LZMA_NEXT_CODER_INIT; + next->coder->block_header_decoder = LZMA_NEXT_CODER_INIT; + next->coder->info = NULL; + next->coder->flags_decoder = LZMA_NEXT_CODER_INIT; + next->coder->metadata.index = NULL; + next->coder->metadata.extra = NULL; + } else { + lzma_index_free(next->coder->metadata.index, allocator); + next->coder->metadata.index = NULL; + + lzma_extra_free(next->coder->metadata.extra, allocator); + next->coder->metadata.extra = NULL; + } + + next->coder->info = lzma_info_init(next->coder->info, allocator); + if (next->coder->info == NULL) + return LZMA_MEM_ERROR; + + lzma_info_iter_begin(next->coder->info, &next->coder->iter); + + // Initialize Stream Header decoder. + return_if_error(lzma_stream_header_decoder_init( + &next->coder->flags_decoder, allocator, + &next->coder->header_flags)); + + // Reset the *foo_extra pointers to NULL. This way the caller knows + // if there were no Extra Records. (We don't support appending + // Records to Extra list.) + if (header != NULL) + *header = NULL; + if (footer != NULL) + *footer = NULL; + + // Reset some variables. + next->coder->sequence = SEQ_STREAM_HEADER_CODE; + next->coder->pos = 0; + next->coder->uncompressed_left = LZMA_VLI_VALUE_UNKNOWN; + next->coder->total_left = LZMA_VLI_VALUE_UNKNOWN; + next->coder->header_extra = header; + next->coder->footer_extra = footer; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_extra **header, lzma_extra **footer) +{ + lzma_next_coder_init( + stream_decoder_init, next, allocator, header, footer); +} + + +extern LZMA_API lzma_ret +lzma_stream_decoder(lzma_stream *strm, + lzma_extra **header, lzma_extra **footer) +{ + lzma_next_strm_init(strm, stream_decoder_init, header, footer); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_encoder_multi.c b/src/liblzma/common/stream_encoder_multi.c new file mode 100644 index 00000000..5955f858 --- /dev/null +++ b/src/liblzma/common/stream_encoder_multi.c @@ -0,0 +1,460 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder_multi.c +/// \brief Encodes Multi-Block .lzma files +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_common.h" +#include "block_encoder.h" +#include "metadata_encoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER_COPY, + SEQ_HEADER_METADATA_INIT, + SEQ_HEADER_METADATA_COPY, + SEQ_HEADER_METADATA_CODE, + SEQ_DATA_INIT, + SEQ_DATA_COPY, + SEQ_DATA_CODE, + SEQ_FOOTER_METADATA_INIT, + SEQ_FOOTER_METADATA_COPY, + SEQ_FOOTER_METADATA_CODE, + SEQ_STREAM_FOOTER_INIT, + SEQ_STREAM_FOOTER_COPY, + } sequence; + + /// Block or Metadata encoder + lzma_next_coder next; + + /// Options for the Block encoder + lzma_options_block block_options; + + /// Information about the Stream + lzma_info *info; + + /// Information about the current Data Block + lzma_info_iter iter; + + /// Pointer to user-supplied options structure. We don't write to + /// it, only read instructions from the application, thus this is + /// const even though the user-supplied pointer from + /// lzma_options_filter structure isn't. + const lzma_options_stream *stream_options; + + /// Stream Header or Stream Footer in encoded form + uint8_t *header; + size_t header_pos; + size_t header_size; +}; + + +typedef enum { + BLOCK_HEADER_METADATA, + BLOCK_DATA, + BLOCK_FOOTER_METADATA, +} block_type; + + +static lzma_ret +block_header_encode(lzma_coder *coder, lzma_allocator *allocator, + lzma_vli uncompressed_size, block_type type) +{ + assert(coder->header == NULL); + + coder->block_options = (lzma_options_block){ + .check = coder->stream_options->check, + .has_crc32 = coder->stream_options->has_crc32, + .has_eopm = true, + .is_metadata = type != BLOCK_DATA, + .has_uncompressed_size_in_footer = false, + .has_backward_size = type == BLOCK_FOOTER_METADATA, + .handle_padding = false, + .total_size = LZMA_VLI_VALUE_UNKNOWN, + .compressed_size = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_size = uncompressed_size, + .compressed_reserve = 0, + .uncompressed_reserve = 0, + .total_limit = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_limit = LZMA_VLI_VALUE_UNKNOWN, + .padding = LZMA_BLOCK_HEADER_PADDING_AUTO, + }; + + if (type == BLOCK_DATA) { + memcpy(coder->block_options.filters, + coder->stream_options->filters, + sizeof(coder->stream_options->filters)); + coder->block_options.alignment = coder->iter.stream_offset; + } else { + memcpy(coder->block_options.filters, + coder->stream_options->metadata_filters, + sizeof(coder->stream_options->filters)); + coder->block_options.alignment + = lzma_info_metadata_alignment_get( + coder->info, type == BLOCK_HEADER_METADATA); + } + + lzma_ret ret = lzma_block_header_size(&coder->block_options); + if (ret != LZMA_OK) + return ret; + + coder->header_size = coder->block_options.header_size; + coder->header = lzma_alloc(coder->header_size, allocator); + if (coder->header == NULL) + return LZMA_MEM_ERROR; + + ret = lzma_block_header_encode(coder->header, &coder->block_options); + if (ret != LZMA_OK) + return ret; + + coder->header_pos = 0; + return LZMA_OK; +} + + +static lzma_ret +metadata_encoder_init(lzma_coder *coder, lzma_allocator *allocator, + lzma_metadata *metadata, block_type type) +{ + lzma_ret ret = lzma_info_metadata_set(coder->info, allocator, + metadata, type == BLOCK_HEADER_METADATA, false); + if (ret != LZMA_OK) + return ret; + + const lzma_vli metadata_size = lzma_metadata_size(metadata); + if (metadata_size == 0) + return LZMA_PROG_ERROR; + + ret = block_header_encode(coder, allocator, metadata_size, type); + if (ret != LZMA_OK) + return ret; + + return lzma_metadata_encoder_init(&coder->next, allocator, + &coder->block_options, metadata); +} + + +static lzma_ret +data_encoder_init(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_ret ret = lzma_info_iter_next(&coder->iter, allocator); + if (ret != LZMA_OK) + return ret; + + ret = block_header_encode(coder, allocator, + LZMA_VLI_VALUE_UNKNOWN, BLOCK_DATA); + if (ret != LZMA_OK) + return ret; + + return lzma_block_encoder_init(&coder->next, allocator, + &coder->block_options); +} + + +static lzma_ret +stream_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_STREAM_HEADER_COPY: + case SEQ_HEADER_METADATA_COPY: + case SEQ_DATA_COPY: + case SEQ_FOOTER_METADATA_COPY: + case SEQ_STREAM_FOOTER_COPY: + bufcpy(coder->header, &coder->header_pos, coder->header_size, + out, out_pos, out_size); + if (coder->header_pos < coder->header_size) + return LZMA_OK; + + lzma_free(coder->header, allocator); + coder->header = NULL; + + switch (coder->sequence) { + case SEQ_STREAM_HEADER_COPY: + // Write Header Metadata Block if we have Extra for it + // or known Uncompressed Size. + if (coder->stream_options->header != NULL + || coder->stream_options + ->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN) { + coder->sequence = SEQ_HEADER_METADATA_INIT; + } else { + // Mark that Header Metadata Block doesn't + // exist. + if (lzma_info_size_set(coder->info, + LZMA_INFO_HEADER_METADATA, 0) + != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->sequence = SEQ_DATA_INIT; + } + break; + + case SEQ_HEADER_METADATA_COPY: + case SEQ_DATA_COPY: + case SEQ_FOOTER_METADATA_COPY: + ++coder->sequence; + break; + + case SEQ_STREAM_FOOTER_COPY: + return LZMA_STREAM_END; + + default: + assert(0); + } + + break; + + case SEQ_HEADER_METADATA_INIT: { + lzma_metadata metadata = { + .header_metadata_size = LZMA_VLI_VALUE_UNKNOWN, + .total_size = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_size = coder->stream_options + ->uncompressed_size, + .index = NULL, + .extra = coder->stream_options->header, + }; + + const lzma_ret ret = metadata_encoder_init(coder, allocator, + &metadata, BLOCK_HEADER_METADATA); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_HEADER_METADATA_COPY; + break; + } + + case SEQ_FOOTER_METADATA_INIT: { + lzma_metadata metadata = { + .header_metadata_size + = lzma_info_size_get(coder->info, + LZMA_INFO_HEADER_METADATA), + .total_size = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, + .index = lzma_info_index_get(coder->info, false), + .extra = coder->stream_options->footer, + }; + + const lzma_ret ret = metadata_encoder_init(coder, allocator, + &metadata, BLOCK_FOOTER_METADATA); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_FOOTER_METADATA_COPY; + break; + } + + case SEQ_HEADER_METADATA_CODE: + case SEQ_FOOTER_METADATA_CODE: { + size_t dummy = 0; + lzma_ret ret = coder->next.code(coder->next.coder, + allocator, NULL, &dummy, 0, + out, out_pos, out_size, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + ret = lzma_info_size_set(coder->info, + coder->sequence == SEQ_HEADER_METADATA_CODE + ? LZMA_INFO_HEADER_METADATA + : LZMA_INFO_FOOTER_METADATA, + coder->block_options.total_size); + if (ret != LZMA_OK) + return ret; + + ++coder->sequence; + break; + } + + case SEQ_DATA_INIT: { + // Don't create an empty Block unless it would be + // the only Data Block. + if (*in_pos == in_size) { + if (action != LZMA_FINISH) + return LZMA_OK; + + if (lzma_info_index_count_get(coder->info) != 0) { + if (lzma_info_index_finish(coder->info)) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_FOOTER_METADATA_INIT; + break; + } + } + + const lzma_ret ret = data_encoder_init(coder, allocator); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_DATA_COPY; + break; + } + + case SEQ_DATA_CODE: { + static const lzma_action convert[4] = { + LZMA_RUN, + LZMA_SYNC_FLUSH, + LZMA_FINISH, + LZMA_FINISH, + }; + + lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, convert[action]); + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + ret = lzma_info_iter_set(&coder->iter, + coder->block_options.total_size, + coder->block_options.uncompressed_size); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_DATA_INIT; + break; + } + + case SEQ_STREAM_FOOTER_INIT: { + assert(coder->header == NULL); + + lzma_stream_flags flags = { + .check = coder->stream_options->check, + .has_crc32 = coder->stream_options->has_crc32, + .is_multi = true, + }; + + coder->header = lzma_alloc(LZMA_STREAM_TAIL_SIZE, allocator); + if (coder->header == NULL) + return LZMA_MEM_ERROR; + + const lzma_ret ret = lzma_stream_tail_encode( + coder->header, &flags); + if (ret != LZMA_OK) + return ret; + + coder->header_size = LZMA_STREAM_TAIL_SIZE; + coder->header_pos = 0; + + coder->sequence = SEQ_STREAM_FOOTER_COPY; + break; + } + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_info_free(coder->info, allocator); + lzma_free(coder->header, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_options_stream *options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_encode; + next->end = &stream_encoder_end; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->info = NULL; + } else { + lzma_free(next->coder->header, allocator); + } + + next->coder->header = NULL; + + next->coder->info = lzma_info_init(next->coder->info, allocator); + if (next->coder->info == NULL) + return LZMA_MEM_ERROR; + + next->coder->sequence = SEQ_STREAM_HEADER_COPY; + next->coder->stream_options = options; + + // Encode Stream Flags + { + lzma_stream_flags flags = { + .check = options->check, + .has_crc32 = options->has_crc32, + .is_multi = true, + }; + + next->coder->header = lzma_alloc(LZMA_STREAM_HEADER_SIZE, + allocator); + if (next->coder->header == NULL) + return LZMA_MEM_ERROR; + + return_if_error(lzma_stream_header_encode( + next->coder->header, &flags)); + + next->coder->header_pos = 0; + next->coder->header_size = LZMA_STREAM_HEADER_SIZE; + } + + if (lzma_info_size_set(next->coder->info, LZMA_INFO_STREAM_START, + options->alignment) != LZMA_OK) + return LZMA_PROG_ERROR; + + lzma_info_iter_begin(next->coder->info, &next->coder->iter); + + return LZMA_OK; +} + + +/* +extern lzma_ret +lzma_stream_encoder_multi_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_options_stream *options) +{ + lzma_next_coder_init(stream_encoder_init, next, allocator, options); +} +*/ + + +extern LZMA_API lzma_ret +lzma_stream_encoder_multi( + lzma_stream *strm, const lzma_options_stream *options) +{ + lzma_next_strm_init(strm, stream_encoder_init, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_encoder_single.c b/src/liblzma/common/stream_encoder_single.c new file mode 100644 index 00000000..e8efd004 --- /dev/null +++ b/src/liblzma/common/stream_encoder_single.c @@ -0,0 +1,220 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder_single.c +/// \brief Encodes Single-Block .lzma files +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_common.h" +#include "block_encoder.h" + + +struct lzma_coder_s { + /// Uncompressed Size, Backward Size, and Footer Magic Bytes are + /// part of Block in the file format specification, but it is simpler + /// to implement them as part of Stream. + enum { + SEQ_HEADERS, + SEQ_DATA, + SEQ_FOOTER, + } sequence; + + /// Block encoder + lzma_next_coder block_encoder; + + /// Block encoder options + lzma_options_block block_options; + + /// Stream Flags; we need to have these in this struct so that we + /// can encode Stream Footer. + lzma_stream_flags stream_flags; + + /// Stream Header + Block Header, or Stream Footer + uint8_t *header; + size_t header_pos; + size_t header_size; +}; + + +static lzma_ret +stream_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, size_t *out_pos, + size_t out_size, lzma_action action) +{ + // NOTE: We don't check if the amount of input is in the proper limits, + // because the Block encoder will do it for us. + + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_HEADERS: + bufcpy(coder->header, &coder->header_pos, coder->header_size, + out, out_pos, out_size); + + if (coder->header_pos == coder->header_size) { + coder->header_pos = 0; + coder->sequence = SEQ_DATA; + } + + break; + + case SEQ_DATA: { + lzma_ret ret = coder->block_encoder.code( + coder->block_encoder.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + assert(*in_pos == in_size); + + assert(coder->header_size >= LZMA_STREAM_TAIL_SIZE); + coder->header_size = LZMA_STREAM_TAIL_SIZE; + + ret = lzma_stream_tail_encode( + coder->header, &coder->stream_flags); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_FOOTER; + break; + } + + case SEQ_FOOTER: + bufcpy(coder->header, &coder->header_pos, coder->header_size, + out, out_pos, out_size); + + return coder->header_pos == coder->header_size + ? LZMA_STREAM_END : LZMA_OK; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->block_encoder, allocator); + lzma_free(coder->header, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_options_stream *options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_encode; + next->end = &stream_encoder_end; + next->coder->block_encoder = LZMA_NEXT_CODER_INIT; + } else { + // Free the previous buffer, if any. + lzma_free(next->coder->header, allocator); + } + + // At this point, next->coder->header points to nothing useful. + next->coder->header = NULL; + + // Basic initializations + next->coder->sequence = SEQ_HEADERS; + next->coder->header_pos = 0; + + // Initialize next->coder->stream_flags. + next->coder->stream_flags = (lzma_stream_flags){ + .check = options->check, + .has_crc32 = options->has_crc32, + .is_multi = false, + }; + + // Initialize next->coder->block_options. + next->coder->block_options = (lzma_options_block){ + .check = options->check, + .has_crc32 = options->has_crc32, + .has_eopm = options->uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN, + .is_metadata = false, + .has_uncompressed_size_in_footer = options->uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN, + .has_backward_size = true, + .handle_padding = false, + .compressed_size = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_size = options->uncompressed_size, + .compressed_reserve = 0, + .uncompressed_reserve = 0, + .total_size = LZMA_VLI_VALUE_UNKNOWN, + .total_limit = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_limit = LZMA_VLI_VALUE_UNKNOWN, + .padding = LZMA_BLOCK_HEADER_PADDING_AUTO, + .alignment = options->alignment + LZMA_STREAM_HEADER_SIZE, + }; + memcpy(next->coder->block_options.filters, options->filters, + sizeof(options->filters)); + + return_if_error(lzma_block_header_size(&next->coder->block_options)); + + // Encode Stream Flags and Block Header into next->coder->header. + next->coder->header_size = (size_t)(LZMA_STREAM_HEADER_SIZE) + + next->coder->block_options.header_size; + next->coder->header = lzma_alloc(next->coder->header_size, allocator); + if (next->coder->header == NULL) + return LZMA_MEM_ERROR; + + return_if_error(lzma_stream_header_encode(next->coder->header, + &next->coder->stream_flags)); + + return_if_error(lzma_block_header_encode( + next->coder->header + LZMA_STREAM_HEADER_SIZE, + &next->coder->block_options)); + + // Initialize the Block encoder. + return lzma_block_encoder_init(&next->coder->block_encoder, allocator, + &next->coder->block_options); +} + + +/* +extern lzma_ret +lzma_stream_encoder_single_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_options_stream *options) +{ + lzma_next_coder_init(stream_encoder_init, allocator, options); +} +*/ + + +extern LZMA_API lzma_ret +lzma_stream_encoder_single( + lzma_stream *strm, const lzma_options_stream *options) +{ + lzma_next_strm_init(strm, stream_encoder_init, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_flags_decoder.c b/src/liblzma/common/stream_flags_decoder.c new file mode 100644 index 00000000..d9c847ac --- /dev/null +++ b/src/liblzma/common/stream_flags_decoder.c @@ -0,0 +1,258 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_decoder.c +/// \brief Decodes Stream Header and tail from .lzma files +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_decoder.h" +#include "stream_common.h" + + +//////////// +// Common // +//////////// + +struct lzma_coder_s { + enum { + SEQ_HEADER_MAGIC, + SEQ_HEADER_FLAGS, + SEQ_HEADER_CRC32, + + SEQ_FOOTER_FLAGS, + SEQ_FOOTER_MAGIC, + } sequence; + + size_t pos; + uint32_t crc32; + + lzma_stream_flags *options; +}; + + +static void +stream_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +static bool +stream_flags_decode(const uint8_t *in, lzma_stream_flags *options) +{ + // Reserved bits must be unset. + if (*in & 0xE0) + return true; + + options->check = *in & 0x07; + options->has_crc32 = (*in & 0x08) != 0; + options->is_multi = (*in & 0x10) != 0; + + return false; +} + + +//////////// +// Header // +//////////// + +static lzma_ret +stream_header_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_HEADER_MAGIC: + if (in[*in_pos] != lzma_header_magic[coder->pos]) + return LZMA_DATA_ERROR; + + ++*in_pos; + + if (++coder->pos == sizeof(lzma_header_magic)) { + coder->pos = 0; + coder->sequence = SEQ_HEADER_FLAGS; + } + + break; + + case SEQ_HEADER_FLAGS: + if (stream_flags_decode(in + *in_pos, coder->options)) + return LZMA_HEADER_ERROR; + + coder->crc32 = lzma_crc32(in + *in_pos, 1, 0); + + ++*in_pos; + coder->sequence = SEQ_HEADER_CRC32; + break; + + case SEQ_HEADER_CRC32: + if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF)) + return LZMA_DATA_ERROR; + + ++*in_pos; + + if (++coder->pos == 4) + return LZMA_STREAM_END; + + break; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +stream_header_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_stream_flags *options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + } + + // Set the function pointers unconditionally, because they may + // have been pointing to footer decoder too. + next->code = &stream_header_decode; + next->end = &stream_header_decoder_end; + + next->coder->sequence = SEQ_HEADER_MAGIC; + next->coder->pos = 0; + next->coder->crc32 = 0; + next->coder->options = options; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_stream_header_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_stream_flags *options) +{ + lzma_next_coder_init( + stream_header_decoder_init, next, allocator, options); +} + + +extern LZMA_API lzma_ret +lzma_stream_header_decoder(lzma_stream *strm, lzma_stream_flags *options) +{ + lzma_next_strm_init(strm, stream_header_decoder_init, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} + + +////////// +// Tail // +////////// + +static lzma_ret +stream_tail_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_FOOTER_FLAGS: + if (stream_flags_decode(in + *in_pos, coder->options)) + return LZMA_HEADER_ERROR; + + ++*in_pos; + coder->sequence = SEQ_FOOTER_MAGIC; + break; + + case SEQ_FOOTER_MAGIC: + if (in[*in_pos] != lzma_footer_magic[coder->pos]) + return LZMA_DATA_ERROR; + + ++*in_pos; + + if (++coder->pos == sizeof(lzma_footer_magic)) + return LZMA_STREAM_END; + + break; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +stream_tail_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_stream_flags *options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + } + + // Set the function pointers unconditionally, because they may + // have been pointing to footer decoder too. + next->code = &stream_tail_decode; + next->end = &stream_header_decoder_end; + + next->coder->sequence = SEQ_FOOTER_FLAGS; + next->coder->pos = 0; + next->coder->options = options; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_stream_tail_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_stream_flags *options) +{ + lzma_next_coder_init2(next, allocator, stream_header_decoder_init, + stream_tail_decoder_init, allocator, options); +} + + +extern LZMA_API lzma_ret +lzma_stream_tail_decoder(lzma_stream *strm, lzma_stream_flags *options) +{ + lzma_next_strm_init2(strm, stream_header_decoder_init, + stream_tail_decoder_init, strm->allocator, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_flags_decoder.h b/src/liblzma/common/stream_flags_decoder.h new file mode 100644 index 00000000..e4b8e3c5 --- /dev/null +++ b/src/liblzma/common/stream_flags_decoder.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_decoder.h +/// \brief Decodes Stream Header and Footer from .lzma files +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_FLAGS_DECODER_H +#define LZMA_STREAM_FLAGS_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_stream_header_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_stream_flags *options); + +extern lzma_ret lzma_stream_tail_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_stream_flags *options); + +#endif diff --git a/src/liblzma/common/stream_flags_encoder.c b/src/liblzma/common/stream_flags_encoder.c new file mode 100644 index 00000000..55468580 --- /dev/null +++ b/src/liblzma/common/stream_flags_encoder.c @@ -0,0 +1,75 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_encoder.c +/// \brief Encodes Stream Header and Footer for .lzma files +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_common.h" + + +static bool +stream_flags_encode(uint8_t *flags_byte, const lzma_stream_flags *options) +{ + // Check type + if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) + return true; + + *flags_byte = options->check; + + // Usage of CRC32 in Block Headers + if (options->has_crc32) + *flags_byte |= 0x08; + + // Single- or Multi-Block + if (options->is_multi) + *flags_byte |= 0x10; + + return false; +} + + +extern LZMA_API lzma_ret +lzma_stream_header_encode(uint8_t *out, const lzma_stream_flags *options) +{ + // Magic + memcpy(out, lzma_header_magic, sizeof(lzma_header_magic)); + + // Stream Flags + if (stream_flags_encode(out + sizeof(lzma_header_magic), options)) + return LZMA_PROG_ERROR;; + + // CRC32 of the Stream Header + const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), 1, 0); + + for (size_t i = 0; i < 4; ++i) + out[sizeof(lzma_header_magic) + 1 + i] = crc >> (i * 8); + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_stream_tail_encode(uint8_t *out, const lzma_stream_flags *options) +{ + // Stream Flags + if (stream_flags_encode(out, options)) + return LZMA_PROG_ERROR; + + // Magic + memcpy(out + 1, lzma_footer_magic, sizeof(lzma_footer_magic)); + + return LZMA_OK; +} diff --git a/src/liblzma/common/sysdefs.h b/src/liblzma/common/sysdefs.h new file mode 120000 index 00000000..c6cb6768 --- /dev/null +++ b/src/liblzma/common/sysdefs.h @@ -0,0 +1 @@ +../../common/sysdefs.h
\ No newline at end of file diff --git a/src/liblzma/common/version.c b/src/liblzma/common/version.c new file mode 100644 index 00000000..dffec7ff --- /dev/null +++ b/src/liblzma/common/version.c @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file version.c +/// \brief liblzma version number +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +LZMA_API const uint32_t lzma_version_number = LZMA_VERSION; + +LZMA_API const char *const lzma_version_string = PACKAGE_VERSION; diff --git a/src/liblzma/common/vli_decoder.c b/src/liblzma/common/vli_decoder.c new file mode 100644 index 00000000..2b89c1a7 --- /dev/null +++ b/src/liblzma/common/vli_decoder.c @@ -0,0 +1,69 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_decoder.c +/// \brief Decodes variable-length integers +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API lzma_ret +lzma_vli_decode(lzma_vli *restrict vli, size_t *restrict vli_pos, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + if (*vli > LZMA_VLI_VALUE_MAX || *vli_pos >= 9 + || (*vli >> (7 * *vli_pos)) != 0) + return LZMA_PROG_ERROR; + + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + + if (*vli_pos == 0) { + *vli_pos = 1; + + if (in[*in_pos] <= 0x7F) { + // Single-byte integer + *vli = in[*in_pos]; + ++*in_pos; + return LZMA_STREAM_END; + } + + *vli = in[*in_pos] & 0x7F; + ++*in_pos; + } + + while (*in_pos < in_size) { + // Read in the next byte. + *vli |= (lzma_vli)(in[*in_pos] & 0x7F) << (*vli_pos * 7); + ++*vli_pos; + + // Check if this is the last byte of a multibyte integer. + if (in[*in_pos] & 0x80) { + ++*in_pos; + return LZMA_STREAM_END; + } + + // Limit variable-length representation to nine bytes. + if (*vli_pos == 9) + return LZMA_DATA_ERROR; + + // Increment input position only when the byte was accepted. + ++*in_pos; + } + + return LZMA_OK; +} diff --git a/src/liblzma/common/vli_encoder.c b/src/liblzma/common/vli_encoder.c new file mode 100644 index 00000000..1ecdb0d2 --- /dev/null +++ b/src/liblzma/common/vli_encoder.c @@ -0,0 +1,81 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_encoder.c +/// \brief Encodes variable-length integers +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API lzma_ret +lzma_vli_encode(lzma_vli vli, size_t *restrict vli_pos, size_t vli_size, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + if (vli > LZMA_VLI_VALUE_MAX || *vli_pos >= 9 || vli_size > 9 + || (vli != 0 && (vli >> (7 * *vli_pos)) == 0)) + return LZMA_PROG_ERROR; + + if (*out_pos >= out_size) + return LZMA_BUF_ERROR; + + if (*vli_pos == 0) { + *vli_pos = 1; + + if (vli <= 0x7F && *vli_pos >= vli_size) { + // Single-byte integer + out[(*out_pos)++] = vli; + return LZMA_STREAM_END; + } + + // First byte of a multibyte integer + out[(*out_pos)++] = (vli & 0x7F) | 0x80; + } + + while (*out_pos < out_size) { + const lzma_vli b = vli >> (7 * *vli_pos); + ++*vli_pos; + + if (b <= 0x7F && *vli_pos >= vli_size) { + // Last byte of a multibyte integer + out[(*out_pos)++] = (b & 0xFF) | 0x80; + return LZMA_STREAM_END; + } + + // Middle byte of a multibyte integer + out[(*out_pos)++] = b & 0x7F; + } + + // vli is not yet completely written out. + return LZMA_OK; +} + + +extern LZMA_API size_t +lzma_vli_size(lzma_vli vli) +{ + if (vli > LZMA_VLI_VALUE_MAX) + return 0; + + size_t i = 0; + do { + vli >>= 7; + ++i; + } while (vli != 0); + + assert(i <= 9); + return i; +} diff --git a/src/liblzma/common/vli_reverse_decoder.c b/src/liblzma/common/vli_reverse_decoder.c new file mode 100644 index 00000000..68ca6a42 --- /dev/null +++ b/src/liblzma/common/vli_reverse_decoder.c @@ -0,0 +1,55 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_reverse_decoder.c +/// \brief Decodes variable-length integers starting at end of the buffer +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API lzma_ret +lzma_vli_reverse_decode(lzma_vli *vli, const uint8_t *in, size_t *in_size) +{ + if (*in_size == 0) + return LZMA_BUF_ERROR; + + size_t i = *in_size - 1; + *vli = in[i] & 0x7F; + + if (!(in[i] & 0x80)) { + *in_size = i; + return LZMA_OK; + } + + const size_t end = *in_size > LZMA_VLI_BYTES_MAX + ? *in_size - LZMA_VLI_BYTES_MAX : 0; + + do { + if (i-- == end) { + if (*in_size < LZMA_VLI_BYTES_MAX) + return LZMA_BUF_ERROR; + + return LZMA_DATA_ERROR; + } + + *vli <<= 7; + *vli = in[i] & 0x7F; + + } while (!(in[i] & 0x80)); + + *in_size = i; + return LZMA_OK; +} diff --git a/src/liblzma/lz/Makefile.am b/src/liblzma/lz/Makefile.am new file mode 100644 index 00000000..5c27e2f2 --- /dev/null +++ b/src/liblzma/lz/Makefile.am @@ -0,0 +1,63 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +noinst_LTLIBRARIES = liblz.la +liblz_la_CPPFLAGS = \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_srcdir@/src/liblzma/common \ + -I@top_srcdir@/src/liblzma/check +liblz_la_SOURCES = + + +if COND_MAIN_ENCODER +liblz_la_SOURCES += \ + lz_encoder.c \ + lz_encoder.h \ + lz_encoder_private.h \ + match_c.h \ + match_h.h + +if COND_MF_HC3 +liblz_la_SOURCES += hc3.c hc3.h +liblz_la_CPPFLAGS += -DHAVE_HC3 +endif + +if COND_MF_HC4 +liblz_la_SOURCES += hc4.c hc4.h +liblz_la_CPPFLAGS += -DHAVE_HC4 +endif + +if COND_MF_BT2 +liblz_la_SOURCES += bt2.c bt2.h +liblz_la_CPPFLAGS += -DHAVE_BT2 +endif + +if COND_MF_BT3 +liblz_la_SOURCES += bt3.c bt3.h +liblz_la_CPPFLAGS += -DHAVE_BT3 +endif + +if COND_MF_BT4 +liblz_la_SOURCES += bt4.c bt4.h +liblz_la_CPPFLAGS += -DHAVE_BT4 +endif + +endif + + +if COND_MAIN_DECODER +liblz_la_SOURCES += \ + lz_decoder.c \ + lz_decoder.h +endif diff --git a/src/liblzma/lz/bt2.c b/src/liblzma/lz/bt2.c new file mode 100644 index 00000000..7dc4cb80 --- /dev/null +++ b/src/liblzma/lz/bt2.c @@ -0,0 +1,27 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt2.c +/// \brief Binary Tree 2 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "bt2.h" + +#undef IS_HASH_CHAIN +#undef HASH_ARRAY_2 +#undef HASH_ARRAY_3 + +#include "match_c.h" diff --git a/src/liblzma/lz/bt2.h b/src/liblzma/lz/bt2.h new file mode 100644 index 00000000..33cb52cd --- /dev/null +++ b/src/liblzma/lz/bt2.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt2.h +/// \brief Binary Tree 2 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BT2_H +#define LZMA_BT2_H + +#undef LZMA_MATCH_FINDER_NAME_LOWER +#undef LZMA_MATCH_FINDER_NAME_UPPER +#define LZMA_MATCH_FINDER_NAME_LOWER bt2 +#define LZMA_MATCH_FINDER_NAME_UPPER BT2 + +#include "match_h.h" + +#endif diff --git a/src/liblzma/lz/bt3.c b/src/liblzma/lz/bt3.c new file mode 100644 index 00000000..d44310f3 --- /dev/null +++ b/src/liblzma/lz/bt3.c @@ -0,0 +1,29 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt3.c +/// \brief Binary Tree 3 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "bt3.h" + +#undef IS_HASH_CHAIN +#undef HASH_ARRAY_2 +#undef HASH_ARRAY_3 + +#define HASH_ARRAY_2 + +#include "match_c.h" diff --git a/src/liblzma/lz/bt3.h b/src/liblzma/lz/bt3.h new file mode 100644 index 00000000..247c7e5f --- /dev/null +++ b/src/liblzma/lz/bt3.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt3.h +/// \brief Binary Tree 3 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BT3_H +#define LZMA_BT3_H + +#undef LZMA_MATCH_FINDER_NAME_LOWER +#undef LZMA_MATCH_FINDER_NAME_UPPER +#define LZMA_MATCH_FINDER_NAME_LOWER bt3 +#define LZMA_MATCH_FINDER_NAME_UPPER BT3 + +#include "match_h.h" + +#endif diff --git a/src/liblzma/lz/bt4.c b/src/liblzma/lz/bt4.c new file mode 100644 index 00000000..6e1042c9 --- /dev/null +++ b/src/liblzma/lz/bt4.c @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt4.c +/// \brief Binary Tree 4 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "bt4.h" + +#undef IS_HASH_CHAIN +#undef HASH_ARRAY_2 +#undef HASH_ARRAY_3 + +#define HASH_ARRAY_2 +#define HASH_ARRAY_3 + +#include "match_c.h" diff --git a/src/liblzma/lz/bt4.h b/src/liblzma/lz/bt4.h new file mode 100644 index 00000000..e3fcf6ac --- /dev/null +++ b/src/liblzma/lz/bt4.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt4.h +/// \brief Binary Tree 4 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BT4_H +#define LZMA_BT4_H + +#undef LZMA_MATCH_FINDER_NAME_LOWER +#undef LZMA_MATCH_FINDER_NAME_UPPER +#define LZMA_MATCH_FINDER_NAME_LOWER bt4 +#define LZMA_MATCH_FINDER_NAME_UPPER BT4 + +#include "match_h.h" + +#endif diff --git a/src/liblzma/lz/hc3.c b/src/liblzma/lz/hc3.c new file mode 100644 index 00000000..22b5689b --- /dev/null +++ b/src/liblzma/lz/hc3.c @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hc3.c +/// \brief Hash Chain 3 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "hc3.h" + +#undef IS_HASH_CHAIN +#undef HASH_ARRAY_2 +#undef HASH_ARRAY_3 + +#define IS_HASH_CHAIN +#define HASH_ARRAY_2 + +#include "match_c.h" diff --git a/src/liblzma/lz/hc3.h b/src/liblzma/lz/hc3.h new file mode 100644 index 00000000..97be0b1d --- /dev/null +++ b/src/liblzma/lz/hc3.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hc3.h +/// \brief Hash Chain 3 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_HC3_H +#define LZMA_HC3_H + +#undef LZMA_MATCH_FINDER_NAME_LOWER +#undef LZMA_MATCH_FINDER_NAME_UPPER +#define LZMA_MATCH_FINDER_NAME_LOWER hc3 +#define LZMA_MATCH_FINDER_NAME_UPPER HC3 + +#include "match_h.h" + +#endif diff --git a/src/liblzma/lz/hc4.c b/src/liblzma/lz/hc4.c new file mode 100644 index 00000000..a55cfd09 --- /dev/null +++ b/src/liblzma/lz/hc4.c @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hc4.c +/// \brief Hash Chain 4 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "hc4.h" + +#undef IS_HASH_CHAIN +#undef HASH_ARRAY_2 +#undef HASH_ARRAY_3 + +#define IS_HASH_CHAIN +#define HASH_ARRAY_2 +#define HASH_ARRAY_3 + +#include "match_c.h" diff --git a/src/liblzma/lz/hc4.h b/src/liblzma/lz/hc4.h new file mode 100644 index 00000000..dc072e2f --- /dev/null +++ b/src/liblzma/lz/hc4.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hc4.h +/// \brief Hash Chain 4 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_HC4_H +#define LZMA_HC4_H + +#undef LZMA_MATCH_FINDER_NAME_LOWER +#undef LZMA_MATCH_FINDER_NAME_UPPER +#define LZMA_MATCH_FINDER_NAME_LOWER hc4 +#define LZMA_MATCH_FINDER_NAME_UPPER HC4 + +#include "match_h.h" + +#endif diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c new file mode 100644 index 00000000..9c110dec --- /dev/null +++ b/src/liblzma/lz/lz_decoder.c @@ -0,0 +1,462 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.c +/// \brief LZ out window +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_decoder.h" + + +/// Minimum size of allocated dictionary +#define DICT_SIZE_MIN 8192 + +/// When there is less than this amount of data available for decoding, +/// it is moved to the temporary buffer which +/// - protects from reads past the end of the buffer; and +/// - stored the incomplete data between lzma_code() calls. +/// +/// \note TEMP_LIMIT must be at least as much as +/// REQUIRED_IN_BUFFER_SIZE defined in lzma_decoder.c. +#define TEMP_LIMIT 32 + +// lzma_lz_decoder.dict[] must be three times the size of TEMP_LIMIT. +// 2 * TEMP_LIMIT is used for the actual data, and the third TEMP_LIMIT +// bytes is needed for safety to allow decode_dummy() in lzma_decoder.c +// to read past end of the buffer. This way it should be both fast and simple. +#if LZMA_BUFFER_SIZE < 3 * TEMP_LIMIT +# error LZMA_BUFFER_SIZE < 3 * TEMP_LIMIT +#endif + + +struct lzma_coder_s { + lzma_next_coder next; + lzma_lz_decoder lz; + + // There are more members in this structure but they are not + // visible in LZ coder. +}; + + +/// - Copy as much data as possible from lz->dict[] to out[]. +/// - Update *out_pos, lz->start, and lz->end accordingly. +/// - Wrap lz-pos to the beginning of lz->dict[] if there is a danger that +/// it may go past the end of the buffer (lz->pos >= lz->must_flush_pos). +static inline bool +flush(lzma_lz_decoder *restrict lz, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + // Flush uncompressed data from the history buffer to + // the output buffer. This is done in two phases. + + assert(lz->start <= lz->end); + + // Flush if pos < start < end. + if (lz->pos < lz->start && lz->start < lz->end) { + bufcpy(lz->dict, &lz->start, lz->end, out, out_pos, out_size); + + // If we reached end of the data in history buffer, + // wrap to the beginning. + if (lz->start == lz->end) + lz->start = 0; + } + + // Flush if start start < pos <= end. This is not as `else' for + // previous `if' because the previous one may make this one true. + if (lz->start < lz->pos) { + bufcpy(lz->dict, &lz->start, + lz->pos, out, out_pos, out_size); + + if (lz->pos >= lz->must_flush_pos) { + // Wrap the flushing position if we have + // flushed the whole history buffer. + if (lz->pos == lz->start) + lz->start = 0; + + // Wrap the write position and store to lz.end + // how much there is new data available. + lz->end = lz->pos; + lz->pos = 0; + lz->is_full = true; + } + } + + assert(lz->pos < lz->must_flush_pos); + + return *out_pos == out_size; +} + + +/// Calculate safe value for lz->limit. If no safe value can be found, +/// set lz->limit to zero. When flushing, only as little data will be +/// decoded as is needed to fill the output buffer (lowers both latency +/// and throughput). +/// +/// \return true if there is no space for new uncompressed data. +/// +static inline bool +set_limit(lzma_lz_decoder *lz, size_t out_avail, bool flushing) +{ + // Set the limit so that writing to dict[limit + match_max_len - 1] + // doesn't overwrite any unflushed data and doesn't write past the + // end of the dict buffer. + if (lz->start <= lz->pos) { + // We can fill the buffer from pos till the end + // of the dict buffer. + lz->limit = lz->must_flush_pos; + } else if (lz->pos + lz->match_max_len < lz->start) { + // There's some unflushed data between pos and end of the + // buffer. Limit so that we don't overwrite the unflushed data. + lz->limit = lz->start - lz->match_max_len; + } else { + // Buffer is too full. + lz->limit = 0; + return true; + } + + // Finetune the limit a bit if it isn't zero. + + assert(lz->limit > lz->pos); + const size_t dict_avail = lz->limit - lz->pos; + + if (lz->uncompressed_size < dict_avail) { + // Finishing a stream that doesn't have + // an end of stream marker. + lz->limit = lz->pos + lz->uncompressed_size; + + } else if (flushing && out_avail < dict_avail) { + // Flushing enabled, decoding only as little as needed to + // fill the out buffer (if there's enough input, of course). + lz->limit = lz->pos + out_avail; + } + + return lz->limit == lz->pos; +} + + +/// Takes care of wrapping the data into temporary buffer when needed, +/// and calls the actual decoder. +/// +/// \return true if error occurred +/// +static inline bool +call_process(lzma_coder *restrict coder, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + // It would be nice and simple if we could just give in[] to the + // decoder, but the requirement of zlib-like API forces us to be + // able to make *in_pos == in_size whenever there is enough output + // space. If needed, we will append a few bytes from in[] to + // a temporary buffer and decode enough to reach the part that + // was copied from in[]. Then we can continue with the real in[]. + + bool error; + const size_t dict_old_pos = coder->lz.pos; + const size_t in_avail = in_size - *in_pos; + + if (coder->lz.temp_size + in_avail < 2 * TEMP_LIMIT) { + // Copy all the available input from in[] to temp[]. + memcpy(coder->lz.temp + coder->lz.temp_size, + in + *in_pos, in_avail); + coder->lz.temp_size += in_avail; + *in_pos += in_avail; + assert(*in_pos == in_size); + + // Decode as much as possible. + size_t temp_used = 0; + error = coder->lz.process(coder, coder->lz.temp, &temp_used, + coder->lz.temp_size, true); + assert(temp_used <= coder->lz.temp_size); + + // Move the remaining data to the beginning of temp[]. + coder->lz.temp_size -= temp_used; + memmove(coder->lz.temp, coder->lz.temp + temp_used, + coder->lz.temp_size); + + } else if (coder->lz.temp_size > 0) { + // Fill temp[] unless it is already full because we aren't + // the last filter in the chain. + size_t copy_size = 0; + if (coder->lz.temp_size < 2 * TEMP_LIMIT) { + assert(*in_pos < in_size); + copy_size = 2 * TEMP_LIMIT - coder->lz.temp_size; + memcpy(coder->lz.temp + coder->lz.temp_size, + in + *in_pos, copy_size); + // NOTE: We don't update lz.temp_size or *in_pos yet. + } + + size_t temp_used = 0; + error = coder->lz.process(coder, coder->lz.temp, &temp_used, + coder->lz.temp_size + copy_size, false); + + if (temp_used < coder->lz.temp_size) { + // Only very little input data was consumed. Move + // the unprocessed data to the beginning temp[]. + coder->lz.temp_size += copy_size - temp_used; + memmove(coder->lz.temp, coder->lz.temp + temp_used, + coder->lz.temp_size); + *in_pos += copy_size; + assert(*in_pos <= in_size); + + } else { + // We were able to decode so much data that next time + // we can decode directly from in[]. That is, we can + // consider temp[] to be empty now. + *in_pos += temp_used - coder->lz.temp_size; + coder->lz.temp_size = 0; + assert(*in_pos <= in_size); + } + + } else { + // Decode directly from in[]. + error = coder->lz.process(coder, in, in_pos, in_size, false); + assert(*in_pos <= in_size); + } + + assert(coder->lz.pos >= dict_old_pos); + if (coder->lz.uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + // Update uncompressed size. + coder->lz.uncompressed_size -= coder->lz.pos - dict_old_pos; + + // Check that End of Payload Marker hasn't been detected + // since it must not be present because uncompressed size + // is known. + if (coder->lz.eopm_detected) + error = true; + } + + return error; +} + + +static lzma_ret +decode_buffer(lzma_coder *coder, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + bool flushing) +{ + bool stop = false; + + while (true) { + // Flush from coder->lz.dict to out[]. + flush(&coder->lz, out, out_pos, out_size); + + // All done? + if (*out_pos == out_size + || stop + || coder->lz.eopm_detected + || coder->lz.uncompressed_size == 0) + break; + + // Set write limit in the dictionary. + if (set_limit(&coder->lz, out_size - *out_pos, flushing)) + break; + + // Decode more data. + if (call_process(coder, in, in_pos, in_size)) + return LZMA_DATA_ERROR; + + // Set stop to true if we must not call call_process() again + // during this function call. + // FIXME: Can this make the loop exist too early? It wouldn't + // cause data corruption so not a critical problem. It can + // happen if dictionary gets full and lz.temp still contains + // a few bytes data that we could decode right now. + if (*in_pos == in_size && coder->lz.temp_size <= TEMP_LIMIT + && coder->lz.pos < coder->lz.limit) + stop = true; + } + + // If we have decoded everything (EOPM detected or uncompressed_size + // bytes were processed) to the history buffer, and also flushed + // everything from the history buffer, our job is done. + if ((coder->lz.eopm_detected + || coder->lz.uncompressed_size == 0) + && coder->lz.start == coder->lz.pos) + return LZMA_STREAM_END; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lz_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + if (coder->next.code == NULL) { + const lzma_ret ret = decode_buffer(coder, in, in_pos, in_size, + out, out_pos, out_size, + action == LZMA_SYNC_FLUSH); + + if (*out_pos == out_size || ret == LZMA_STREAM_END) { + // Unread to make coder->temp[] empty. This is easy, + // because we know that all the data currently in + // coder->temp[] has been copied form in[] during this + // call to the decoder. + // + // If we didn't do this, we could have data left in + // coder->temp[] when end of stream is reached. That + // data could be left there from *previous* call to + // the decoder; in that case we wouldn't know where + // to put that data. + assert(*in_pos >= coder->lz.temp_size); + *in_pos -= coder->lz.temp_size; + coder->lz.temp_size = 0; + } + + return ret; + } + + // We aren't the last coder in the chain, we need to decode + // our input to a temporary buffer. + const bool flushing = action == LZMA_SYNC_FLUSH; + while (*out_pos < out_size) { + if (!coder->lz.next_finished + && coder->lz.temp_size < LZMA_BUFFER_SIZE) { + const lzma_ret ret = coder->next.code( + coder->next.coder, + allocator, in, in_pos, in_size, + coder->lz.temp, &coder->lz.temp_size, + LZMA_BUFFER_SIZE, action); + + if (ret == LZMA_STREAM_END) + coder->lz.next_finished = true; + else if (coder->lz.temp_size < LZMA_BUFFER_SIZE + || ret != LZMA_OK) + return ret; + } + + if (coder->lz.this_finished) { + if (coder->lz.temp_size != 0) + return LZMA_DATA_ERROR; + + if (coder->lz.next_finished) + return LZMA_STREAM_END; + + return LZMA_OK; + } + + size_t dummy = 0; + const lzma_ret ret = decode_buffer(coder, NULL, &dummy, 0, + out, out_pos, out_size, flushing); + + if (ret == LZMA_STREAM_END) + coder->lz.this_finished = true; + else if (ret != LZMA_OK) + return ret; + else if (coder->lz.next_finished && *out_pos < out_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} + + +/// \brief Initializes LZ part of the LZMA decoder or Inflate +/// +/// \param history_size Number of bytes the LZ out window is +/// supposed keep available from the output +/// history. +/// \param match_max_len Number of bytes a single decoding loop +/// can advance the write position (lz->pos) +/// in the history buffer (lz->dict). +/// +/// \note This function is called by LZMA decoder and Inflate init()s. +/// It's up to those functions allocate *lz and initialize it +/// with LZMA_LZ_DECODER_INIT. +extern lzma_ret +lzma_lz_decoder_reset(lzma_lz_decoder *lz, lzma_allocator *allocator, + bool (*process)(lzma_coder *restrict coder, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, bool has_safe_buffer), + lzma_vli uncompressed_size, + size_t history_size, size_t match_max_len) +{ + // Set uncompressed size. + lz->uncompressed_size = uncompressed_size; + + // Limit the history size to roughly sane values. This is primarily + // to prevent integer overflows. + if (history_size > UINT32_MAX / 2) + return LZMA_HEADER_ERROR; + + // Store the value actually requested. We use it for sanity checks + // when repeating data from the history buffer. + lz->requested_size = history_size; + + // Avoid tiny history buffer sizes for performance reasons. + // TODO: Test if this actually helps... + if (history_size < DICT_SIZE_MIN) + history_size = DICT_SIZE_MIN; + + // The real size of the history buffer is a bit bigger than + // requested by our caller. This allows us to do some optimizations, + // which help not only speed but simplicity of the code; specifically, + // we can make sure that there is always at least match_max_len + // bytes immediatelly available for writing without a need to wrap + // the history buffer. + const size_t dict_real_size = history_size + 2 * match_max_len + 1; + + // Reallocate memory if needed. + if (history_size != lz->size || match_max_len != lz->match_max_len) { + // Destroy the old buffer. + lzma_lz_decoder_end(lz, allocator); + + lz->size = history_size; + lz->match_max_len = match_max_len; + lz->must_flush_pos = history_size + match_max_len + 1; + + lz->dict = lzma_alloc(dict_real_size, allocator); + if (lz->dict == NULL) + return LZMA_MEM_ERROR; + } + + // Clean up the buffers to make it very sure that there are + // no information leaks when multiple steams are decoded + // with the same decoder structures. + memzero(lz->dict, dict_real_size); + memzero(lz->temp, LZMA_BUFFER_SIZE); + + // Reset the variables so that lz_get_byte(lz, 0) will return '\0'. + lz->pos = 0; + lz->start = 0; + lz->end = dict_real_size; + lz->is_full = false; + lz->eopm_detected = false; + lz->next_finished = false; + lz->this_finished = false; + + // Set the process function pointer. + lz->process = process; + + return LZMA_OK; +} + + +extern void +lzma_lz_decoder_end(lzma_lz_decoder *lz, lzma_allocator *allocator) +{ + lzma_free(lz->dict, allocator); + lz->dict = NULL; + lz->size = 0; + lz->match_max_len = 0; + return; +} diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h new file mode 100644 index 00000000..a8a585cd --- /dev/null +++ b/src/liblzma/lz/lz_decoder.h @@ -0,0 +1,214 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.h +/// \brief LZ out window +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_OUT_H +#define LZMA_LZ_OUT_H + +#include "common.h" + + +/// Get a byte from the history buffer. +#define lz_get_byte(lz, distance) \ + ((distance) < (lz).pos \ + ? (lz).dict[(lz).pos - (distance) - 1] \ + : (lz).dict[(lz).pos - (distance) - 1 + (lz).end]) + + +#define LZMA_LZ_DECODER_INIT \ + (lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 } + + +typedef struct { + /// Function to do the actual decoding (LZMA or Inflate) + bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in, + size_t *restrict in_pos, size_t size_in, + bool has_safe_buffer); + + /// Pointer to dictionary (history) buffer. + /// \note Not 'restrict' because can alias next_out. + uint8_t *dict; + + /// Next write goes to dict[pos]. + size_t pos; + + /// Next byte to flush is buffer[start]. + size_t start; + + /// First byte to not flush is buffer[end]. + size_t end; + + /// First position to which data must not be written. + size_t limit; + + /// True if dictionary has needed wrapping. + bool is_full; + + /// True if process() has detected End of Payload Marker. + bool eopm_detected; + + /// True if the next coder in the chain has returned LZMA_STREAM_END. + bool next_finished; + + /// True if the LZ decoder (e.g. LZMA) has detected End of Payload + /// Marker. This may become true before next_finished becomes true. + bool this_finished; + + /// When pos >= must_flush_pos, we must not call process(). + size_t must_flush_pos; + + /// Maximum number of bytes that a single decoding loop inside + /// process() can produce data into dict. This amount is kept + /// always available at dict + pos i.e. it is safe to write a byte + /// to dict[pos + match_max_len - 1]. + size_t match_max_len; + + /// Number of bytes allocated to dict. + size_t size; + + /// Requested size of the dictionary. This is needed because we avoid + /// using extremely tiny history buffers. + size_t requested_size; + + /// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown. + lzma_vli uncompressed_size; + + /// Number of bytes currently in temp[]. + size_t temp_size; + + /// Temporary buffer needed when + /// 1) we cannot make the input buffer completely empty; or + /// 2) we are not the last filter in the chain. + uint8_t temp[LZMA_BUFFER_SIZE]; + +} lzma_lz_decoder; + + +///////////////////////// +// Function prototypes // +///////////////////////// + +extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz, + lzma_allocator *allocator, bool (*process)( + lzma_coder *restrict coder, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size, + bool has_safe_buffer), + lzma_vli uncompressed_size, + size_t history_size, size_t match_max_len); + +extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action); + +/// Deallocates the history buffer if one exists. +extern void lzma_lz_decoder_end( + lzma_lz_decoder *lz, lzma_allocator *allocator); + +////////////////////// +// Inline functions // +////////////////////// + +// Repeat a block of data from the history. Because memcpy() is faster +// than copying byte by byte in a loop, the copying process gets split +// into three cases: +// 1. distance < length +// Source and target areas overlap, thus we can't use memcpy() +// (nor memmove()) safely. +// TODO: If this is common enough, it might be worth optimizing this +// more e.g. by checking if distance > sizeof(uint8_t*) and using +// memcpy in small chunks. +// 2. distance < pos +// This is the easiest and the fastest case. The block being copied +// is a contiguous piece in the history buffer. The buffer offset +// doesn't need wrapping. +// 3. distance >= pos +// We need to wrap the position, because otherwise we would try copying +// behind the first byte of the allocated buffer. It is possible that +// the block is fragmeneted into two pieces, thus we might need to call +// memcpy() twice. +// NOTE: The function using this macro must ensure that length is positive +// and that distance is FIXME +static inline bool +lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length) +{ + // Validate offset of the block to be repeated. It doesn't + // make sense to copy data behind the beginning of the stream. + // Leaving this check away would lead to a security problem, + // in which e.g. the data of the previously decoded file(s) + // would be leaked (or whatever happens to be in unused + // part of the dictionary buffer). + if (distance >= lz->pos && !lz->is_full) + return false; + + // It also doesn't make sense to copy data farer than + // the dictionary size. + if (distance >= lz->requested_size) + return false; + + // The caller must have checked these! + assert(distance <= lz->size); + assert(length > 0); + assert(length <= lz->match_max_len); + + // Copy the amount of data requested by the decoder. + if (distance < length) { + // Source and target areas overlap, thus we can't use + // memcpy() nor even memmove() safely. :-( + // TODO: Copying byte by byte is slow. It might be + // worth optimizing this more if this case is common. + do { + lz->dict[lz->pos] = lz_get_byte(*lz, distance); + ++lz->pos; + } while (--length > 0); + + } else if (distance < lz->pos) { + // The easiest and fastest case + memcpy(lz->dict + lz->pos, + lz->dict + lz->pos - distance - 1, + length); + lz->pos += length; + + } else { + // The bigger the dictionary, the more rare this + // case occurs. We need to "wrap" the dict, thus + // we might need two memcpy() to copy all the data. + assert(lz->is_full); + const uint32_t copy_pos = lz->pos - distance - 1 + lz->end; + uint32_t copy_size = lz->end - copy_pos; + + if (copy_size < length) { + memcpy(lz->dict + lz->pos, lz->dict + copy_pos, + copy_size); + lz->pos += copy_size; + copy_size = length - copy_size; + memcpy(lz->dict + lz->pos, lz->dict, copy_size); + lz->pos += copy_size; + } else { + memcpy(lz->dict + lz->pos, lz->dict + copy_pos, + length); + lz->pos += length; + } + } + + return true; +} + +#endif diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c new file mode 100644 index 00000000..bc38cae2 --- /dev/null +++ b/src/liblzma/lz/lz_encoder.c @@ -0,0 +1,481 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder.c +/// \brief LZ in window +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder_private.h" + +// Hash Chains +#ifdef HAVE_HC3 +# include "hc3.h" +#endif +#ifdef HAVE_HC4 +# include "hc4.h" +#endif + +// Binary Trees +#ifdef HAVE_BT2 +# include "bt2.h" +#endif +#ifdef HAVE_BT3 +# include "bt3.h" +#endif +#ifdef HAVE_BT4 +# include "bt4.h" +#endif + + +/// This is needed in two places so provide a macro. +#define get_cyclic_buffer_size(history_size) ((history_size) + 1) + + +/// Calculate certain match finder properties and validate the calculated +/// values. This is as its own function, because *num_items is needed to +/// calculate memory requirements in common/memory.c. +extern uint32_t +lzma_lz_encoder_hash_properties(lzma_match_finder match_finder, + uint32_t history_size, uint32_t *restrict hash_mask, + uint32_t *restrict hash_size_sum, uint32_t *restrict num_items) +{ + uint32_t fix_hash_size; + uint32_t sons; + + switch (match_finder) { +#ifdef HAVE_HC3 + case LZMA_MF_HC3: + fix_hash_size = LZMA_HC3_FIX_HASH_SIZE; + sons = 1; + break; +#endif +#ifdef HAVE_HC4 + case LZMA_MF_HC4: + fix_hash_size = LZMA_HC4_FIX_HASH_SIZE; + sons = 1; + break; +#endif +#ifdef HAVE_BT2 + case LZMA_MF_BT2: + fix_hash_size = LZMA_BT2_FIX_HASH_SIZE; + sons = 2; + break; +#endif +#ifdef HAVE_BT3 + case LZMA_MF_BT3: + fix_hash_size = LZMA_BT3_FIX_HASH_SIZE; + sons = 2; + break; +#endif +#ifdef HAVE_BT4 + case LZMA_MF_BT4: + fix_hash_size = LZMA_BT4_FIX_HASH_SIZE; + sons = 2; + break; +#endif + default: + return true; + } + + uint32_t hs; + +#ifdef HAVE_LZMA_BT2 + if (match_finder == LZMA_BT2) { + // NOTE: hash_mask is not used by the BT2 match finder, + // but it is initialized just in case. + hs = LZMA_BT2_HASH_SIZE; + *hash_mask = 0; + } else +#endif + { + hs = history_size - 1; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + hs >>= 1; + hs |= 0xFFFF; + + if (hs > (UINT32_C(1) << 24)) { + if (match_finder == LZMA_MF_HC4 + || match_finder == LZMA_MF_BT4) + hs >>= 1; + else + hs = (1 << 24) - 1; + } + + *hash_mask = hs; + ++hs; + } + + *hash_size_sum = hs + fix_hash_size; + + *num_items = *hash_size_sum + + get_cyclic_buffer_size(history_size) * sons; + + return false; +} + + +extern lzma_ret +lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator, + bool (*process)(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size), + lzma_vli uncompressed_size, + size_t history_size, size_t additional_buffer_before, + size_t match_max_len, size_t additional_buffer_after, + lzma_match_finder match_finder, uint32_t match_finder_cycles, + const uint8_t *preset_dictionary, + size_t preset_dictionary_size) +{ + // Set uncompressed size. + lz->uncompressed_size = uncompressed_size; + + /////////////// + // In Window // + /////////////// + + // Validate history size. + if (history_size < LZMA_DICTIONARY_SIZE_MIN + || history_size > LZMA_DICTIONARY_SIZE_MAX) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_HEADER_ERROR; + } + + assert(history_size <= MAX_VAL_FOR_NORMALIZE - 256); + assert(LZMA_DICTIONARY_SIZE_MAX <= MAX_VAL_FOR_NORMALIZE - 256); + + // Calculate the size of the history buffer to allocate. + // TODO: Get a reason for magic constant of 256. + const size_t size_reserv = (history_size + additional_buffer_before + + match_max_len + additional_buffer_after) / 2 + 256; + + lz->keep_size_before = history_size + additional_buffer_before; + lz->keep_size_after = match_max_len + additional_buffer_after; + + const size_t buffer_size = lz->keep_size_before + lz->keep_size_after + + size_reserv; + + // Allocate history buffer if its size has changed. + if (buffer_size != lz->size) { + lzma_free(lz->buffer, allocator); + lz->buffer = lzma_alloc(buffer_size, allocator); + if (lz->buffer == NULL) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_MEM_ERROR; + } + } + + // Allocation successful. Store the new size and calculate + // must_move_pos. + lz->size = buffer_size; + lz->must_move_pos = lz->size - lz->keep_size_after; + + // Reset in window variables. + lz->offset = 0; + lz->read_pos = 0; + lz->read_limit = 0; + lz->write_pos = 0; + lz->stream_end_was_reached = false; + + + ////////////////// + // Match Finder // + ////////////////// + + // Validate match_finder, set function pointers and a few match + // finder specific variables. + switch (match_finder) { +#ifdef HAVE_HC3 + case LZMA_MF_HC3: + lz->get_matches = &lzma_hc3_get_matches; + lz->skip = &lzma_hc3_skip; + lz->cut_value = 8 + (match_max_len >> 2); + break; +#endif +#ifdef HAVE_HC4 + case LZMA_MF_HC4: + lz->get_matches = &lzma_hc4_get_matches; + lz->skip = &lzma_hc4_skip; + lz->cut_value = 8 + (match_max_len >> 2); + break; +#endif +#ifdef HAVE_BT2 + case LZMA_MF_BT2: + lz->get_matches = &lzma_bt2_get_matches; + lz->skip = &lzma_bt2_skip; + lz->cut_value = 16 + (match_max_len >> 1); + break; +#endif +#ifdef HAVE_BT3 + case LZMA_MF_BT3: + lz->get_matches = &lzma_bt3_get_matches; + lz->skip = &lzma_bt3_skip; + lz->cut_value = 16 + (match_max_len >> 1); + break; +#endif +#ifdef HAVE_BT4 + case LZMA_MF_BT4: + lz->get_matches = &lzma_bt4_get_matches; + lz->skip = &lzma_bt4_skip; + lz->cut_value = 16 + (match_max_len >> 1); + break; +#endif + default: + lzma_lz_encoder_end(lz, allocator); + return LZMA_HEADER_ERROR; + } + + // Check if we have been requested to use a non-default cut_value. + if (match_finder_cycles > 0) + lz->cut_value = match_finder_cycles; + + lz->match_max_len = match_max_len; + lz->cyclic_buffer_size = get_cyclic_buffer_size(history_size); + + uint32_t hash_size_sum; + uint32_t num_items; + if (lzma_lz_encoder_hash_properties(match_finder, history_size, + &lz->hash_mask, &hash_size_sum, &num_items)) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_HEADER_ERROR; + } + + if (num_items != lz->num_items) { +#if UINT32_MAX >= SIZE_MAX / 4 + // Check for integer overflow. (Huge dictionaries are not + // possible on 32-bit CPU.) + if (num_items > SIZE_MAX / sizeof(uint32_t)) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_MEM_ERROR; + } +#endif + + const size_t size_in_bytes + = (size_t)(num_items) * sizeof(uint32_t); + + lzma_free(lz->hash, allocator); + lz->hash = lzma_alloc(size_in_bytes, allocator); + if (lz->hash == NULL) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_MEM_ERROR; + } + + lz->num_items = num_items; + } + + lz->son = lz->hash + hash_size_sum; + + // Reset the hash table to empty hash values. + { + uint32_t *restrict items = lz->hash; + + for (uint32_t i = 0; i < hash_size_sum; ++i) + items[i] = EMPTY_HASH_VALUE; + } + + lz->cyclic_buffer_pos = 0; + + // Because zero is used as empty hash value, make the first byte + // appear at buffer[1 - offset]. + ++lz->offset; + + // If we are using a preset dictionary, read it now. + // TODO: This isn't implemented yet so return LZMA_HEADER_ERROR. + if (preset_dictionary != NULL && preset_dictionary_size > 0) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_HEADER_ERROR; + } + + // Set the process function pointer. + lz->process = process; + + return LZMA_OK; +} + + +extern void +lzma_lz_encoder_end(lzma_lz_encoder *lz, lzma_allocator *allocator) +{ + lzma_free(lz->hash, allocator); + lz->hash = NULL; + lz->num_items = 0; + + lzma_free(lz->buffer, allocator); + lz->buffer = NULL; + lz->size = 0; + + return; +} + + +/// \brief Moves the data in the input window to free space for new data +/// +/// lz->buffer is a sliding input window, which keeps lz->keep_size_before +/// bytes of input history available all the time. Now and then we need to +/// "slide" the buffer to make space for the new data to the end of the +/// buffer. At the same time, data older than keep_size_before is dropped. +/// +static void +move_window(lzma_lz_encoder *lz) +{ + // buffer[move_offset] will become buffer[0]. + assert(lz->read_pos > lz->keep_size_after); + size_t move_offset = lz->read_pos - lz->keep_size_before; + + // We need one additional byte, since move_pos() moves on 1 byte. + // TODO: Clean up? At least document more. + if (move_offset > 0) + --move_offset; + + assert(lz->write_pos > move_offset); + const size_t move_size = lz->write_pos - move_offset; + + assert(move_offset + move_size <= lz->size); + + memmove(lz->buffer, lz->buffer + move_offset, move_size); + + lz->offset += move_offset; + lz->read_pos -= move_offset; + lz->read_limit -= move_offset; + lz->write_pos -= move_offset; + + return; +} + + +/// \brief Tries to fill the input window (lz->buffer) +/// +/// If we are the last encoder in the chain, our input data is in in[]. +/// Otherwise we call the next filter in the chain to process in[] and +/// write its output to lz->buffer. +/// +/// This function must not be called once it has returned LZMA_STREAM_END. +/// +static lzma_ret +fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, + size_t *in_pos, size_t in_size, lzma_action action) +{ + assert(coder->lz.read_pos <= coder->lz.write_pos); + lzma_ret ret; + + // Move the sliding window if needed. + if (coder->lz.read_pos >= coder->lz.must_move_pos) + move_window(&coder->lz); + + if (coder->next.code == NULL) { + // Not using a filter, simply memcpy() as much as possible. + bufcpy(in, in_pos, in_size, coder->lz.buffer, + &coder->lz.write_pos, coder->lz.size); + + if (action == LZMA_FINISH && *in_pos == in_size) + ret = LZMA_STREAM_END; + else + ret = LZMA_OK; + + } else { + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, + coder->lz.buffer, &coder->lz.write_pos, + coder->lz.size, action); + } + + // If end of stream has been reached, we allow the encoder to process + // all the input (that is, read_pos is allowed to reach write_pos). + // Otherwise we keep keep_size_after bytes available as prebuffer. + if (ret == LZMA_STREAM_END) { + coder->lz.stream_end_was_reached = true; + coder->lz.read_limit = coder->lz.write_pos; + + } else if (coder->lz.write_pos > coder->lz.keep_size_after) { + // This needs to be done conditionally, because if we got + // only little new input, there may be too little input + // to do any encoding yet. + coder->lz.read_limit = coder->lz.write_pos + - coder->lz.keep_size_after; + } + + return ret; +} + + +extern lzma_ret +lzma_lz_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action) +{ + while (*out_pos < out_size + && (*in_pos < in_size || action == LZMA_FINISH)) { + // Fill the input window if there is no more usable data. + if (!coder->lz.stream_end_was_reached && coder->lz.read_pos + >= coder->lz.read_limit) { + const lzma_ret ret = fill_window(coder, allocator, + in, in_pos, in_size, action); + if (ret != LZMA_OK && ret != LZMA_STREAM_END) + return ret; + } + + // Encode + if (coder->lz.process(coder, out, out_pos, out_size)) + return LZMA_STREAM_END; + } + + return LZMA_OK; +} + + +/// \brief Normalizes hash values +/// +/// lzma_lz_normalize is called when lz->pos hits MAX_VAL_FOR_NORMALIZE, +/// which currently happens once every 2 GiB of input data (to be exact, +/// after the first 2 GiB it happens once every 2 GiB minus dictionary_size +/// bytes). lz->pos is incremented by lzma_lz_move_pos(). +/// +/// lz->hash contains big amount of offsets relative to lz->buffer. +/// The offsets are stored as uint32_t, which is the only reasonable +/// datatype for these offsets; uint64_t would waste far too much RAM +/// and uint16_t would limit the dictionary to 64 KiB (far too small). +/// +/// When compressing files over 2 GiB, lz->buffer needs to be moved forward +/// to avoid integer overflows. We scan the lz->hash array and fix every +/// value to match the updated lz->buffer. +extern void +lzma_lz_encoder_normalize(lzma_lz_encoder *lz) +{ + const uint32_t subvalue = lz->read_pos - lz->cyclic_buffer_size; + assert(subvalue <= INT32_MAX); + + { + const uint32_t num_items = lz->num_items; + uint32_t *restrict items = lz->hash; + + for (uint32_t i = 0; i < num_items; ++i) { + // If the distance is greater than the dictionary + // size, we can simply mark the item as empty. + if (items[i] <= subvalue) + items[i] = EMPTY_HASH_VALUE; + else + items[i] -= subvalue; + } + } + + // Update offset to match the new locations. + lz->offset -= subvalue; + + return; +} diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h new file mode 100644 index 00000000..b39c88e5 --- /dev/null +++ b/src/liblzma/lz/lz_encoder.h @@ -0,0 +1,161 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder.h +/// \brief LZ in window and match finder API +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_H +#define LZMA_LZ_ENCODER_H + +#include "common.h" + + +typedef struct lzma_lz_encoder_s lzma_lz_encoder; +struct lzma_lz_encoder_s { + enum { + SEQ_INIT, + SEQ_RUN, + SEQ_FINISH, + SEQ_END + } sequence; + + bool (*process)(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + + lzma_vli uncompressed_size; + + /////////////// + // In Window // + /////////////// + + /// Pointer to buffer with data to be compressed + uint8_t *buffer; + + /// Total size of the allocated buffer (that is, including all + /// the extra space) + size_t size; + + /// Match finders store locations of matches using 32-bit integers. + /// To avoid adjusting several megabytes of integers every time the + /// input window is moved with move_window(), we only adjust the + /// offset of the buffer. Thus, buffer[match_finder_pos - offset] + /// is the byte pointed by match_finder_pos. + size_t offset; + + /// buffer[read_pos] is the current byte. + size_t read_pos; + + /// As long as read_pos is less than read_limit, there is enough + /// input available in buffer for at least one encoding loop. + /// + /// Because of the stateful API, read_limit may and will get greater + /// than read_pos quite often. This is taken into account when + /// calculating the value for keep_size_after. + size_t read_limit; + + /// buffer[write_pos] is the first byte that doesn't contain valid + /// uncompressed data; that is, the next input byte will be copied + /// to buffer[write_pos]. + size_t write_pos; + + /// When read_pos >= must_move_pos, move_window() must be called + /// to make more space for the input data. + size_t must_move_pos; + + /// Number of bytes that must be kept available in our input history. + /// That is, once keep_size_before bytes have been processed, + /// buffer[read_pos - keep_size_before] is the oldest byte that + /// must be available for reading. + size_t keep_size_before; + + /// Number of bytes that must be kept in buffer after read_pos. + /// That is, read_pos <= write_pos - keep_size_after as long as + /// stream_end_was_reached is false (once it is true, read_pos + /// is allowed to reach write_pos). + size_t keep_size_after; + + /// This is set to true once the last byte of the input data has + /// been copied to buffer. + bool stream_end_was_reached; + + ////////////////// + // Match Finder // + ////////////////// + + // Pointers to match finder functions + void (*get_matches)(lzma_lz_encoder *restrict lz, + uint32_t *restrict distances); + void (*skip)(lzma_lz_encoder *restrict lz, uint32_t num); + + // Match finder data + uint32_t *hash; // TODO: Check if hash aliases son + uint32_t *son; // and add 'restrict' if possible. + uint32_t cyclic_buffer_pos; + uint32_t cyclic_buffer_size; // Must be dictionary_size + 1. + uint32_t hash_mask; + uint32_t cut_value; + uint32_t hash_size_sum; + uint32_t num_items; + uint32_t match_max_len; +}; + + +#define LZMA_LZ_ENCODER_INIT \ + (lzma_lz_encoder){ \ + .buffer = NULL, \ + .size = 0, \ + .hash = NULL, \ + .num_items = 0, \ + } + + +/// Calculates +extern uint32_t lzma_lz_encoder_hash_properties(lzma_match_finder match_finder, + uint32_t history_size, uint32_t *restrict hash_mask, + uint32_t *restrict hash_size_sum, + uint32_t *restrict num_items); + +// NOTE: liblzma doesn't use callback API like LZMA SDK does. The caller +// must make sure that keep_size_after is big enough for single encoding pass +// i.e. keep_size_after >= maximum number of bytes possibly needed after +// the current position between calls to lzma_lz_read(). +extern lzma_ret lzma_lz_encoder_reset(lzma_lz_encoder *lz, + lzma_allocator *allocator, + bool (*process)(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size), + lzma_vli uncompressed_size, + size_t history_size, size_t additional_buffer_before, + size_t match_max_len, size_t additional_buffer_after, + lzma_match_finder match_finder, uint32_t match_finder_cycles, + const uint8_t *preset_dictionary, + size_t preset_dictionary_size); + +/// Frees memory allocated for in window and match finder buffers. +extern void lzma_lz_encoder_end( + lzma_lz_encoder *lz, lzma_allocator *allocator); + +extern lzma_ret lzma_lz_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action); + +/// This should not be called directly, but only via move_pos() macro. +extern void lzma_lz_encoder_normalize(lzma_lz_encoder *lz); + +#endif diff --git a/src/liblzma/lz/lz_encoder_private.h b/src/liblzma/lz/lz_encoder_private.h new file mode 100644 index 00000000..638fcb2d --- /dev/null +++ b/src/liblzma/lz/lz_encoder_private.h @@ -0,0 +1,40 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder_private.h +/// \brief Private definitions for LZ encoder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_PRIVATE_H +#define LZMA_LZ_ENCODER_PRIVATE_H + +#include "lz_encoder.h" + +/// Value used to indicate unused slot +#define EMPTY_HASH_VALUE 0 + +/// When the dictionary and hash variables need to be adjusted to prevent +/// integer overflows. Since we use uint32_t to store the offsets, half +/// of it is the biggest safe limit. +#define MAX_VAL_FOR_NORMALIZE (UINT32_MAX / 2) + + +struct lzma_coder_s { + lzma_next_coder next; + lzma_lz_encoder lz; +}; + +#endif diff --git a/src/liblzma/lz/match_c.h b/src/liblzma/lz/match_c.h new file mode 100644 index 00000000..68766385 --- /dev/null +++ b/src/liblzma/lz/match_c.h @@ -0,0 +1,401 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file match_c.h +/// \brief Template for different match finders +/// +/// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file +/// sets slighly different #defines, resulting the different match finders. +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +////////////// +// Includes // +////////////// + +#include "check.h" + + +/////////////// +// Constants // +/////////////// + +#define START_MAX_LEN 1 + +#ifdef HASH_ARRAY_2 +# define NUM_HASH_DIRECT_BYTES 0 +# define HASH_2_SIZE (1 << 10) +# ifdef HASH_ARRAY_3 +# define NUM_HASH_BYTES 4 +# define HASH_3_SIZE (1 << 16) +# define HASH_3_OFFSET HASH_2_SIZE +# define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE) +# else +# define NUM_HASH_BYTES 3 +# define FIX_HASH_SIZE HASH_2_SIZE +# endif +# define HASH_SIZE 0 +# define MIN_MATCH_CHECK NUM_HASH_BYTES +#else +# define NUM_HASH_DIRECT_BYTES 2 +# define NUM_HASH_BYTES 2 +# define HASH_SIZE (1 << (8 * NUM_HASH_BYTES)) +# define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1) +# define FIX_HASH_SIZE 0 +#endif + + +//////////// +// Macros // +//////////// + +#ifdef HASH_ARRAY_2 +# ifdef HASH_ARRAY_3 +# define HASH_CALC() \ + do { \ + const uint32_t temp = lzma_crc32_table[0][ \ + cur[0]] ^ cur[1]; \ + hash_2_value = temp & (HASH_2_SIZE - 1); \ + hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \ + & (HASH_3_SIZE - 1); \ + hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \ + ^ (lzma_crc32_table[0][cur[3]] << 5)) \ + & lz->hash_mask; \ + } while (0) +# else +# define HASH_CALC() \ + do { \ + const uint32_t temp = lzma_crc32_table[0][ \ + cur[0]] ^ cur[1]; \ + hash_2_value = temp & (HASH_2_SIZE - 1); \ + hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \ + & lz->hash_mask; \ + } while (0) +# endif +#else +# define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8) +#endif + + +// Moves the current read position forward by one byte. In LZMA SDK, +// CLZInWindow::MovePos() can read more input data if needed, because of +// the callback style API. In liblzma we must have ensured earlier, that +// there is enough data available in lz->buffer. +#define move_pos() \ +do { \ + if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \ + lz->cyclic_buffer_pos = 0; \ + ++lz->read_pos; \ + assert(lz->read_pos <= lz->write_pos); \ + if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \ + lzma_lz_encoder_normalize(lz); \ +} while (0) + + +////////////////////// +// Global constants // +////////////////////// + +LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE; +LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE; + + +/////////////////// +// API functions // +/////////////////// + +LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER) +{ + uint32_t len_limit; + if (lz->read_pos + lz->match_max_len <= lz->write_pos) { + len_limit = lz->match_max_len; + } else { + assert(lz->stream_end_was_reached); + len_limit = lz->write_pos - lz->read_pos; + if (len_limit < MIN_MATCH_CHECK) { + distances[0] = 0; + move_pos(); + return; + } + } + + int32_t offset = 1; + const uint32_t match_min_pos + = lz->read_pos + lz->offset > lz->cyclic_buffer_size + ? lz->read_pos + lz->offset - lz->cyclic_buffer_size + : 0; + const uint8_t *cur = lz->buffer + lz->read_pos; + uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size + +#ifdef HASH_ARRAY_2 + uint32_t hash_2_value; +# ifdef HASH_ARRAY_3 + uint32_t hash_3_value; +# endif +#endif + uint32_t hash_value; + HASH_CALC(); + + uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value]; +#ifdef HASH_ARRAY_2 + uint32_t cur_match2 = lz->hash[hash_2_value]; +# ifdef HASH_ARRAY_3 + uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value]; +# endif + lz->hash[hash_2_value] = lz->read_pos + lz->offset; + + if (cur_match2 > match_min_pos) { + if (lz->buffer[cur_match2 - lz->offset] == cur[0]) { + max_len = 2; + distances[offset++] = 2; + distances[offset++] = lz->read_pos + lz->offset + - cur_match2 - 1; + } + } + +# ifdef HASH_ARRAY_3 + lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset; + if (cur_match3 > match_min_pos) { + if (lz->buffer[cur_match3 - lz->offset] == cur[0]) { + if (cur_match3 == cur_match2) + offset -= 2; + + max_len = 3; + distances[offset++] = 3; + distances[offset++] = lz->read_pos + lz->offset + - cur_match3 - 1; + cur_match2 = cur_match3; + } + } +# endif + + if (offset != 1 && cur_match2 == cur_match) { + offset -= 2; + max_len = START_MAX_LEN; + } +#endif + + lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset; + +#ifdef IS_HASH_CHAIN + lz->son[lz->cyclic_buffer_pos] = cur_match; +#else + uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1; + uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1); + + uint32_t len0 = NUM_HASH_DIRECT_BYTES; + uint32_t len1 = NUM_HASH_DIRECT_BYTES; +#endif + +#if NUM_HASH_DIRECT_BYTES != 0 + if (cur_match > match_min_pos) { + if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset] + != cur[NUM_HASH_DIRECT_BYTES]) { + max_len = NUM_HASH_DIRECT_BYTES; + distances[offset++] = NUM_HASH_DIRECT_BYTES; + distances[offset++] = lz->read_pos + lz->offset + - cur_match - 1; + } + } +#endif + + uint32_t count = lz->cut_value; + + while (true) { + if (cur_match <= match_min_pos || count-- == 0) { +#ifndef IS_HASH_CHAIN + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; +#endif + break; + } + + const uint32_t delta = lz->read_pos + lz->offset - cur_match; + const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos + ? lz->cyclic_buffer_pos - delta + : lz->cyclic_buffer_pos - delta + + lz->cyclic_buffer_size; + uint32_t *pair = lz->son + +#ifdef IS_HASH_CHAIN + cyclic_pos; +#else + (cyclic_pos << 1); +#endif + + const uint8_t *pb = lz->buffer + cur_match - lz->offset; + uint32_t len = +#ifdef IS_HASH_CHAIN + NUM_HASH_DIRECT_BYTES; + if (pb[max_len] == cur[max_len]) +#else + MIN(len0, len1); +#endif + + if (pb[len] == cur[len]) { + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (max_len < len) { + max_len = len; + distances[offset++] = len; + distances[offset++] = delta - 1; + if (len == len_limit) { +#ifndef IS_HASH_CHAIN + *ptr1 = pair[0]; + *ptr0 = pair[1]; +#endif + break; + } + } + } + +#ifdef IS_HASH_CHAIN + cur_match = *pair; +#else + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } +#endif + } + + distances[0] = offset - 1; + + move_pos(); + + return; +} + + +LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER) +{ + do { +#ifdef IS_HASH_CHAIN + if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) { + move_pos(); + continue; + } +#else + uint32_t len_limit; + if (lz->read_pos + lz->match_max_len <= lz->write_pos) { + len_limit = lz->match_max_len; + } else { + assert(lz->stream_end_was_reached == true); + len_limit = lz->write_pos - lz->read_pos; + if (len_limit < MIN_MATCH_CHECK) { + move_pos(); + continue; + } + } + const uint32_t match_min_pos + = lz->read_pos + lz->offset > lz->cyclic_buffer_size + ? lz->read_pos + lz->offset - lz->cyclic_buffer_size + : 0; +#endif + + const uint8_t *cur = lz->buffer + lz->read_pos; + +#ifdef HASH_ARRAY_2 + uint32_t hash_2_value; +# ifdef HASH_ARRAY_3 + uint32_t hash_3_value; + uint32_t hash_value; + HASH_CALC(); + lz->hash[HASH_3_OFFSET + hash_3_value] + = lz->read_pos + lz->offset; +# else + uint32_t hash_value; + HASH_CALC(); +# endif + lz->hash[hash_2_value] = lz->read_pos + lz->offset; +#else + uint32_t hash_value; + HASH_CALC(); +#endif + + uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value]; + lz->hash[FIX_HASH_SIZE + hash_value] + = lz->read_pos + lz->offset; + +#ifdef IS_HASH_CHAIN + lz->son[lz->cyclic_buffer_pos] = cur_match; +#else + uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1; + uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1); + + uint32_t len0 = NUM_HASH_DIRECT_BYTES; + uint32_t len1 = NUM_HASH_DIRECT_BYTES; + uint32_t count = lz->cut_value; + + while (true) { + if (cur_match <= match_min_pos || count-- == 0) { + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; + break; + } + + const uint32_t delta = lz->read_pos + + lz->offset - cur_match; + const uint32_t cyclic_pos + = delta <= lz->cyclic_buffer_pos + ? lz->cyclic_buffer_pos - delta + : lz->cyclic_buffer_pos - delta + + lz->cyclic_buffer_size; + uint32_t *pair = lz->son + (cyclic_pos << 1); + + const uint8_t *pb = lz->buffer + cur_match + - lz->offset; + uint32_t len = MIN(len0, len1); + + if (pb[len] == cur[len]) { + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len == len_limit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + break; + } + } + + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } + } +#endif + + move_pos(); + + } while (--num != 0); + + return; +} diff --git a/src/liblzma/lz/match_h.h b/src/liblzma/lz/match_h.h new file mode 100644 index 00000000..2eae90ba --- /dev/null +++ b/src/liblzma/lz/match_h.h @@ -0,0 +1,69 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file match_h.h +/// \brief Header template for different match finders +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder_private.h" + + +////////////////////// +// Global constants // +////////////////////// + +#undef LZMA_HASH_SIZE +#undef LZMA_FIX_HASH_SIZE +#undef LZMA_HASH_SIZE_C +#undef LZMA_FIX_HASH_SIZE_C + +#define LZMA_HASH_SIZE(mf_name) LZMA_HASH_SIZE_C(mf_name) +#define LZMA_FIX_HASH_SIZE(mf_name) LZMA_FIX_HASH_SIZE_C(mf_name) + +#define LZMA_HASH_SIZE_C(mf_name) \ + const uint32_t LZMA_ ## mf_name ## _HASH_SIZE + +#define LZMA_FIX_HASH_SIZE_C(mf_name) \ + const uint32_t LZMA_ ## mf_name ## _FIX_HASH_SIZE + +extern LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER); +extern LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER); + + +/////////////// +// Functions // +/////////////// + +#undef LZMA_GET_MATCHES +#undef LZMA_SKIP +#undef LZMA_GET_MATCHES_C +#undef LZMA_SKIP_C + +#define LZMA_GET_MATCHES(mf_name) LZMA_GET_MATCHES_C(mf_name) +#define LZMA_SKIP(mf_name) LZMA_SKIP_C(mf_name) + +#define LZMA_GET_MATCHES_C(mf_name) \ + extern void lzma_ ## mf_name ## _get_matches( \ + lzma_lz_encoder *restrict lz, \ + uint32_t *restrict distances) + +#define LZMA_SKIP_C(mf_name) \ + extern void lzma_ ## mf_name ## _skip( \ + lzma_lz_encoder *lz, uint32_t num) + +LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER); + +LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER); diff --git a/src/liblzma/lzma.pc.in b/src/liblzma/lzma.pc.in new file mode 100644 index 00000000..5bf9bb10 --- /dev/null +++ b/src/liblzma/lzma.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: liblzma +Description: LZMA compression library +URL: http://tukaani.org/lzma/ +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir} +Libs: -L${libdir} -llzma diff --git a/src/liblzma/lzma/Makefile.am b/src/liblzma/lzma/Makefile.am new file mode 100644 index 00000000..48f3bb23 --- /dev/null +++ b/src/liblzma/lzma/Makefile.am @@ -0,0 +1,43 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +noinst_LTLIBRARIES = liblzma4.la +liblzma4_la_CPPFLAGS = \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_srcdir@/src/liblzma/common \ + -I@top_srcdir@/src/liblzma/lz \ + -I@top_srcdir@/src/liblzma/rangecoder + +liblzma4_la_SOURCES = \ + lzma_common.h \ + lzma_literal.c \ + lzma_literal.h + +if COND_MAIN_ENCODER +liblzma4_la_SOURCES += \ + lzma_encoder.h \ + lzma_encoder.c \ + lzma_encoder_presets.c \ + lzma_encoder_private.h \ + lzma_encoder_init.c \ + lzma_encoder_features.c \ + lzma_encoder_getoptimum.c \ + lzma_encoder_getoptimumfast.c +endif + +if COND_MAIN_DECODER +liblzma4_la_SOURCES += \ + lzma_decoder.c \ + lzma_decoder.h +endif diff --git a/src/liblzma/lzma/lzma_common.h b/src/liblzma/lzma/lzma_common.h new file mode 100644 index 00000000..4ff59ae6 --- /dev/null +++ b/src/liblzma/lzma/lzma_common.h @@ -0,0 +1,128 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_common.h +/// \brief Private definitions common to LZMA encoder and decoder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_COMMON_H +#define LZMA_LZMA_COMMON_H + +#include "common.h" +#include "lzma_literal.h" +#include "range_common.h" + + +/////////////// +// Constants // +/////////////// + +#define REP_DISTANCES 4 +#define STATES 12 +#define LIT_STATES 7 + +#define POS_SLOT_BITS 6 +#define DICT_LOG_SIZE_MAX 30 +#define DIST_TABLE_SIZE_MAX (DICT_LOG_SIZE_MAX * 2) +#if (UINT32_C(1) << DICT_LOG_SIZE_MAX) != LZMA_DICTIONARY_SIZE_MAX +# error DICT_LOG_SIZE_MAX is inconsistent with LZMA_DICTIONARY_SIZE_MAX +#endif + +// 2 is for speed optimization +#define LEN_TO_POS_STATES_BITS 2 +#define LEN_TO_POS_STATES (1 << LEN_TO_POS_STATES_BITS) + +#define MATCH_MIN_LEN 2 + +#define ALIGN_BITS 4 +#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1) + +#define START_POS_MODEL_INDEX 4 +#define END_POS_MODEL_INDEX 14 +#define POS_MODELS (END_POS_MODEL_INDEX - START_POS_MODEL_INDEX) + +#define FULL_DISTANCES (1 << (END_POS_MODEL_INDEX / 2)) + +#define LIT_POS_STATES_BITS_MAX LZMA_LITERAL_POS_BITS_MAX +#define LIT_CONTEXT_BITS_MAX LZMA_LITERAL_CONTEXT_BITS_MAX + +#define POS_STATES_BITS_MAX LZMA_POS_BITS_MAX +#define POS_STATES_MAX (1 << POS_STATES_BITS_MAX) + + +// Length coder & Length price table encoder +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) +#define LEN_SPEC_SYMBOLS (LOW_LOW_SYMBOLS + LEN_MID_LEN_SYMBOLS) +#define MATCH_MAX_LEN (MATCH_MIN_LEN + LEN_SYMBOLS - 1) + +// Total number of probs in a Len Encoder +#define LEN_CODER_TOTAL_PROBS (LEN_HIGH_CODER + LEN_HIGH_SYMBOLS) + +// Price table size of Len Encoder +#define LEN_PRICES (LEN_SYMBOLS << POS_STATES_BITS_MAX) + + +// Optimal - Number of entries in the optimum array. +#define OPTS (1 << 12) + + +// Miscellaneous +#define INFINITY_PRICE 0x0FFFFFFF + + +//////////// +// Macros // +//////////// + +#define get_len_to_pos_state(len) \ + ((len) < LEN_TO_POS_STATES + MATCH_MIN_LEN \ + ? (len) - MATCH_MIN_LEN \ + : LEN_TO_POS_STATES - 1) + + +/////////// +// State // +/////////// + +// Used for updating strm->data->state in both encoder and decoder. + +#define update_char(index) \ + index = ((index) < 4 \ + ? 0 \ + : ((index) < 10 \ + ? (index) - 3 \ + : (index) - 6)) + +#define update_match(index) \ + index = ((index) < LIT_STATES ? 7 : 10) + +#define update_rep(index) \ + index = ((index) < LIT_STATES ? 8 : 11) + +#define update_short_rep(index) \ + index = ((index) < LIT_STATES ? 9 : 11) + +#define is_char_state(index) \ + ((index) < LIT_STATES) + +#endif diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c new file mode 100644 index 00000000..6e2c166d --- /dev/null +++ b/src/liblzma/lzma/lzma_decoder.c @@ -0,0 +1,844 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_decoder.c +/// \brief LZMA decoder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_common.h" +#include "lzma_decoder.h" +#include "lz_decoder.h" +#include "range_decoder.h" + + +/// REQUIRED_IN_BUFFER_SIZE is the number of required input bytes +/// for the worst case: longest match with longest distance. +/// LZMA_IN_BUFFER_SIZE must be larger than REQUIRED_IN_BUFFER_SIZE. +/// 23 bits = 2 (match select) + 10 (len) + 6 (distance) + 4 (align) +/// + 1 (rc_normalize) +/// +/// \todo Is this correct for sure? +/// +#define REQUIRED_IN_BUFFER_SIZE \ + ((23 * (BIT_MODEL_TOTAL_BITS - MOVE_BITS + 1) + 26 + 9) / 8) + + +// Length decoders are easiest to have as macros, because they use range +// decoder macros, which use local variables rc_range and rc_code. + +#define length_decode(target, len_decoder, pos_state) \ +do { \ + if_bit_0(len_decoder.choice) { \ + update_bit_0(len_decoder.choice); \ + target = MATCH_MIN_LEN; \ + bittree_decode(target, \ + len_decoder.low[pos_state], LEN_LOW_BITS); \ + } else { \ + update_bit_1(len_decoder.choice); \ + if_bit_0(len_decoder.choice2) { \ + update_bit_0(len_decoder.choice2); \ + target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS; \ + bittree_decode(target, len_decoder.mid[pos_state], \ + LEN_MID_BITS); \ + } else { \ + update_bit_1(len_decoder.choice2); \ + target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS \ + + LEN_MID_SYMBOLS; \ + bittree_decode(target, len_decoder.high, \ + LEN_HIGH_BITS); \ + } \ + } \ +} while (0) + + +#define length_decode_dummy(target, len_decoder, pos_state) \ +do { \ + if_bit_0(len_decoder.choice) { \ + update_bit_0_dummy(); \ + target = MATCH_MIN_LEN; \ + bittree_decode_dummy(target, \ + len_decoder.low[pos_state], LEN_LOW_BITS); \ + } else { \ + update_bit_1_dummy(); \ + if_bit_0(len_decoder.choice2) { \ + update_bit_0_dummy(); \ + target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS; \ + bittree_decode_dummy(target, \ + len_decoder.mid[pos_state], \ + LEN_MID_BITS); \ + } else { \ + update_bit_1_dummy(); \ + target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS \ + + LEN_MID_SYMBOLS; \ + bittree_decode_dummy(target, len_decoder.high, \ + LEN_HIGH_BITS); \ + } \ + } \ +} while (0) + + +typedef struct { + probability choice; + probability choice2; + probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + probability high[LEN_HIGH_SYMBOLS]; +} lzma_length_decoder; + + +struct lzma_coder_s { + /// Data of the next coder, if any. + lzma_next_coder next; + + /// Sliding output window a.k.a. dictionary a.k.a. history buffer. + lzma_lz_decoder lz; + + // Range coder + lzma_range_decoder rc; + + // State + uint32_t state; + uint32_t rep0; ///< Distance of the latest match + uint32_t rep1; ///< Distance of second latest match + uint32_t rep2; ///< Distance of third latest match + uint32_t rep3; ///< Distance of fourth latest match + uint32_t pos_bits; + uint32_t pos_mask; + uint32_t now_pos; // Lowest 32-bits are enough here. + + lzma_literal_coder *literal_coder; + + /// If 1, it's a match. Otherwise it's a single 8-bit literal. + probability is_match[STATES][POS_STATES_MAX]; + + /// If 1, it's a repeated match. The distance is one of rep0 .. rep3. + probability is_rep[STATES]; + + /// If 0, distance of a repeated match is rep0. + /// Otherwise check is_rep1. + probability is_rep0[STATES]; + + /// If 0, distance of a repeated match is rep1. + /// Otherwise check is_rep2. + probability is_rep1[STATES]; + + /// If 0, distance of a repeated match is rep2. Otherwise it is rep3. + probability is_rep2[STATES]; + + /// If 1, the repeated match has length of one byte. Otherwise + /// the length is decoded from rep_match_len_decoder. + probability is_rep0_long[STATES][POS_STATES_MAX]; + + probability pos_slot_decoder[LEN_TO_POS_STATES][1 << POS_SLOT_BITS]; + probability pos_decoders[FULL_DISTANCES - END_POS_MODEL_INDEX]; + probability pos_align_decoder[1 << ALIGN_BITS]; + + /// Length of a match + lzma_length_decoder len_decoder; + + /// Length of a repeated match. + lzma_length_decoder rep_match_len_decoder; + + /// The first five bytes of LZMA compressed data are treated + /// specially. Once they are read, this stays at zero. + size_t init_bytes_left; +}; + + +/// \brief Check if the next iteration of the decoder loop can be run. +/// +/// \note There must always be REQUIRED_IN_BUFFER_SIZE bytes +/// readable space! +/// +static bool lzma_attribute((pure)) +decode_dummy(const lzma_coder *restrict coder, + const uint8_t *restrict in, size_t in_pos_local, + const size_t in_size, uint32_t rc_range, uint32_t rc_code, + uint32_t state, uint32_t rep0, const uint32_t now_pos) +{ + uint32_t rc_bound; + + do { + const uint32_t pos_state = now_pos & coder->pos_mask; + + if_bit_0(coder->is_match[state][pos_state]) { + // It's a literal i.e. a single 8-bit byte. + + update_bit_0_dummy(); + + const probability *subcoder = literal_get_subcoder( + coder->literal_coder, + now_pos, lz_get_byte(coder->lz, 0)); + uint32_t symbol = 1; + + if (!is_char_state(state)) { + // Decode literal with match byte. + + assert(rep0 != UINT32_MAX); + uint32_t match_byte + = lz_get_byte(coder->lz, rep0); + + do { + match_byte <<= 1; + const uint32_t match_bit + = match_byte & 0x100; + const uint32_t subcoder_index = 0x100 + + match_bit + symbol; + + if_bit_0(subcoder[subcoder_index]) { + update_bit_0_dummy(); + symbol <<= 1; + if (match_bit != 0) + break; + } else { + update_bit_1_dummy(); + symbol = (symbol << 1) | 1; + if (match_bit == 0) + break; + } + } while (symbol < 0x100); + } + + // Decode literal without match byte. This is also + // the tail of the with-match-byte function. + while (symbol < 0x100) { + if_bit_0(subcoder[symbol]) { + update_bit_0_dummy(); + symbol <<= 1; + } else { + update_bit_1_dummy(); + symbol = (symbol << 1) | 1; + } + } + + break; + } + + update_bit_1_dummy(); + uint32_t len; + + if_bit_0(coder->is_rep[state]) { + update_bit_0_dummy(); + length_decode_dummy(len, coder->len_decoder, pos_state); + update_match(state); + + const uint32_t len_to_pos_state + = get_len_to_pos_state(len); + uint32_t pos_slot = 0; + bittree_decode_dummy(pos_slot, coder->pos_slot_decoder[ + len_to_pos_state], POS_SLOT_BITS); + assert(pos_slot <= 63); + + if (pos_slot >= START_POS_MODEL_INDEX) { + uint32_t direct_bits = (pos_slot >> 1) - 1; + assert(direct_bits >= 1 && direct_bits <= 31); + rep0 = 2 | (pos_slot & 1); + + if (pos_slot < END_POS_MODEL_INDEX) { + assert(direct_bits <= 5); + rep0 <<= direct_bits; + assert(rep0 <= 96); + // -1 is fine, because + // bittree_reverse_decode() + // starts from table index [1] + // (not [0]). + assert((int32_t)(rep0 - pos_slot - 1) + >= -1); + assert((int32_t)(rep0 - pos_slot - 1) + <= 82); + // We add the result to rep0, so rep0 + // must not be part of second argument + // of the macro. + const int32_t offset + = rep0 - pos_slot - 1; + bittree_reverse_decode_dummy( + coder->pos_decoders + offset, + direct_bits); + } else { + // Decode direct bits + assert(pos_slot >= 14); + assert(direct_bits >= 6); + direct_bits -= ALIGN_BITS; + assert(direct_bits >= 2); + do { + rc_normalize(); + rc_range >>= 1; + const uint32_t t + = (rc_code - rc_range) + >> 31; + rc_code -= rc_range & (t - 1); + } while (--direct_bits > 0); + rep0 <<= ALIGN_BITS; + + bittree_reverse_decode_dummy( + coder->pos_align_decoder, + ALIGN_BITS); + } + } + + } else { + update_bit_1_dummy(); + + if_bit_0(coder->is_rep0[state]) { + update_bit_0_dummy(); + + if_bit_0(coder->is_rep0_long[state][ + pos_state]) { + update_bit_0_dummy(); + break; + } else { + update_bit_1_dummy(); + } + + } else { + update_bit_1_dummy(); + + if_bit_0(coder->is_rep1[state]) { + update_bit_0_dummy(); + } else { + update_bit_1_dummy(); + + if_bit_0(coder->is_rep2[state]) { + update_bit_0_dummy(); + } else { + update_bit_1_dummy(); + } + } + } + + length_decode_dummy(len, coder->rep_match_len_decoder, + pos_state); + } + } while (0); + + rc_normalize(); + + // Validate the buffer position. + if (in_pos_local > in_size) + return false; + + return true; +} + + +static bool +decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size, bool has_safe_buffer) +{ + //////////////////// + // Initialization // + //////////////////// + + while (coder->init_bytes_left > 0) { + if (*in_pos == in_size) + return false; + + coder->rc.code = (coder->rc.code << 8) | in[*in_pos]; + ++*in_pos; + --coder->init_bytes_left; + } + + + /////////////// + // Variables // + /////////////// + + // Making local copies of often-used variables improves both + // speed and readability. + + // Range decoder + rc_to_local(coder->rc); + + // State + uint32_t state = coder->state; + uint32_t rep0 = coder->rep0; + uint32_t rep1 = coder->rep1; + uint32_t rep2 = coder->rep2; + uint32_t rep3 = coder->rep3; + + // Misc + uint32_t now_pos = coder->now_pos; + + // Variables derived from decoder settings + const uint32_t pos_mask = coder->pos_mask; + + size_t in_pos_local = *in_pos; // Local copy + size_t in_limit; + if (in_size <= REQUIRED_IN_BUFFER_SIZE) + in_limit = 0; + else + in_limit = in_size - REQUIRED_IN_BUFFER_SIZE; + + + while (coder->lz.pos < coder->lz.limit && (in_pos_local < in_limit + || (has_safe_buffer && decode_dummy( + coder, in, in_pos_local, in_size, + rc_range, rc_code, state, rep0, now_pos)))) { + + ///////////////////// + // Actual decoding // + ///////////////////// + + const uint32_t pos_state = now_pos & pos_mask; + + if_bit_0(coder->is_match[state][pos_state]) { + update_bit_0(coder->is_match[state][pos_state]); + + // It's a literal i.e. a single 8-bit byte. + + probability *subcoder = literal_get_subcoder( + coder->literal_coder, + now_pos, lz_get_byte(coder->lz, 0)); + uint32_t symbol = 1; + + if (!is_char_state(state)) { + // Decode literal with match byte. + + assert(rep0 != UINT32_MAX); + uint32_t match_byte + = lz_get_byte(coder->lz, rep0); + + do { + match_byte <<= 1; + const uint32_t match_bit + = match_byte & 0x100; + const uint32_t subcoder_index = 0x100 + + match_bit + symbol; + + if_bit_0(subcoder[subcoder_index]) { + update_bit_0(subcoder[ + subcoder_index]); + symbol <<= 1; + if (match_bit != 0) + break; + } else { + update_bit_1(subcoder[ + subcoder_index]); + symbol = (symbol << 1) | 1; + if (match_bit == 0) + break; + } + } while (symbol < 0x100); + } + + // Decode literal without match byte. This is also + // the tail of the with-match-byte function. + while (symbol < 0x100) { + if_bit_0(subcoder[symbol]) { + update_bit_0(subcoder[symbol]); + symbol <<= 1; + } else { + update_bit_1(subcoder[symbol]); + symbol = (symbol << 1) | 1; + } + } + + // Put the decoded byte to the dictionary, update the + // decoder state, and start a new decoding loop. + coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol); + ++now_pos; + update_char(state); + continue; + } + + // Instead of a new byte we are going to get a byte range + // (distance and length) which will be repeated from our + // output history. + + update_bit_1(coder->is_match[state][pos_state]); + uint32_t len; + + if_bit_0(coder->is_rep[state]) { + update_bit_0(coder->is_rep[state]); + + // Not a repeated match + // + // We will decode a new distance and store + // the value to rep0. + + // The latest three match distances are kept in + // memory in case there are repeated matches. + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + + // Decode the length of the match. + length_decode(len, coder->len_decoder, pos_state); + + update_match(state); + + const uint32_t len_to_pos_state + = get_len_to_pos_state(len); + uint32_t pos_slot = 0; + bittree_decode(pos_slot, coder->pos_slot_decoder[ + len_to_pos_state], POS_SLOT_BITS); + assert(pos_slot <= 63); + + if (pos_slot >= START_POS_MODEL_INDEX) { + uint32_t direct_bits = (pos_slot >> 1) - 1; + assert(direct_bits >= 1 && direct_bits <= 30); + rep0 = 2 | (pos_slot & 1); + + if (pos_slot < END_POS_MODEL_INDEX) { + assert(direct_bits <= 5); + rep0 <<= direct_bits; + assert(rep0 <= 96); + // -1 is fine, because + // bittree_reverse_decode() + // starts from table index [1] + // (not [0]). + assert((int32_t)(rep0 - pos_slot - 1) + >= -1); + assert((int32_t)(rep0 - pos_slot - 1) + <= 82); + // We add the result to rep0, so rep0 + // must not be part of second argument + // of the macro. + const int32_t offset + = rep0 - pos_slot - 1; + bittree_reverse_decode(rep0, + coder->pos_decoders + offset, + direct_bits); + } else { + // Decode direct bits + assert(pos_slot >= 14); + assert(direct_bits >= 6); + direct_bits -= ALIGN_BITS; + assert(direct_bits >= 2); + do { + rc_normalize(); + rc_range >>= 1; + const uint32_t t + = (rc_code - rc_range) + >> 31; + rc_code -= rc_range & (t - 1); + rep0 = (rep0 << 1) | (1 - t); + } while (--direct_bits > 0); + rep0 <<= ALIGN_BITS; + + bittree_reverse_decode(rep0, + coder->pos_align_decoder, + ALIGN_BITS); + + if (rep0 == UINT32_MAX) { + // End of Payload Marker found. + coder->lz.eopm_detected = true; + break; + } + } + } else { + rep0 = pos_slot; + } + + } else { + update_bit_1(coder->is_rep[state]); + + // Repeated match + // + // The match distance is a value that we have had + // earlier. The latest four match distances are + // available as rep0, rep1, rep2 and rep3. We will + // now decode which of them is the new distance. + + if_bit_0(coder->is_rep0[state]) { + update_bit_0(coder->is_rep0[state]); + + // The distance is rep0. + + if_bit_0(coder->is_rep0_long[state][ + pos_state]) { + update_bit_0(coder->is_rep0_long[ + state][pos_state]); + + // Repeating exactly one byte. For + // simplicity, it is done here inline + // instead of at the end of the main + // loop. + + update_short_rep(state); + + // Security/sanity checks. See the end + // of the main loop for explanation + // of these. + if ((rep0 >= coder->lz.pos + && !coder->lz.is_full) + || in_pos_local + > in_size) + goto error; + + // Repeat one byte and start a new + // decoding loop. + coder->lz.dict[coder->lz.pos] + = lz_get_byte( + coder->lz, rep0); + ++coder->lz.pos; + ++now_pos; + continue; + + } else { + update_bit_1(coder->is_rep0_long[ + state][pos_state]); + + // Repeating more than one byte at + // distance of rep0. + } + + } else { + update_bit_1(coder->is_rep0[state]); + + // The distance is rep1, rep2 or rep3. Once + // we find out which one of these three, it + // is stored to rep0 and rep1, rep2 and rep3 + // are updated accordingly. + + uint32_t distance; + + if_bit_0(coder->is_rep1[state]) { + update_bit_0(coder->is_rep1[state]); + distance = rep1; + } else { + update_bit_1(coder->is_rep1[state]); + + if_bit_0(coder->is_rep2[state]) { + update_bit_0(coder->is_rep2[ + state]); + distance = rep2; + } else { + update_bit_1(coder->is_rep2[ + state]); + distance = rep3; + rep3 = rep2; + } + + rep2 = rep1; + } + + rep1 = rep0; + rep0 = distance; + } + + // Decode the length of the repeated match. + length_decode(len, coder->rep_match_len_decoder, + pos_state); + + update_rep(state); + } + + + ///////////////////////////////// + // Repeat from history buffer. // + ///////////////////////////////// + + // The length is always between these limits. There is no way + // to trigger the algorithm to set len outside this range. + assert(len >= MATCH_MIN_LEN); + assert(len <= MATCH_MAX_LEN); + + now_pos += len; + + // Validate the buffer position to avoid buffer overflows + // on corrupted input data. + if (in_pos_local > in_size) + goto error; + + // Repeat len bytes from distance of rep0. + if (!lzma_lz_out_repeat(&coder->lz, rep0, len)) + goto error; + } + + rc_normalize(); + + + ///////////////////////////////// + // Update the *data structure. // + ///////////////////////////////// + + // Range decoder + rc_from_local(coder->rc); + + // State + coder->state = state; + coder->rep0 = rep0; + coder->rep1 = rep1; + coder->rep2 = rep2; + coder->rep3 = rep3; + + // Misc + coder->now_pos = now_pos; + *in_pos = in_pos_local; + + return false; + +error: + return true; +} + + +static void +lzma_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_lz_decoder_end(&coder->lz, allocator); + lzma_literal_end(&coder->literal_coder, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // Validate pos_bits. Other options are validated by the + // respective initialization functions. + const lzma_options_lzma *options = filters[0].options; + if (options->pos_bits > LZMA_POS_BITS_MAX) + return LZMA_HEADER_ERROR; + + // Allocate memory for the decoder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + // Initialize variables so that we know later that we don't + // have an existing decoder initialized. + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->lz = LZMA_LZ_DECODER_INIT; + next->coder->literal_coder = NULL; + } + + // Store the pos_bits and calculate pos_mask. + next->coder->pos_bits = options->pos_bits; + next->coder->pos_mask = (1U << next->coder->pos_bits) - 1; + + // Allocate (if needed) and initialize the literal decoder. + { + const lzma_ret ret = lzma_literal_init( + &next->coder->literal_coder, allocator, + options->literal_context_bits, + options->literal_pos_bits); + if (ret != LZMA_OK) { + lzma_free(next->coder, allocator); + next->coder = NULL; + return ret; + } + } + + // Allocate and initialize the LZ decoder. + { + const lzma_ret ret = lzma_lz_decoder_reset( + &next->coder->lz, allocator, &decode_real, + filters[0].uncompressed_size, + options->dictionary_size, MATCH_MAX_LEN); + if (ret != LZMA_OK) { + lzma_literal_end(&next->coder->literal_coder, + allocator); + lzma_free(next->coder, allocator); + next->coder = NULL; + return ret; + } + } + + // State + next->coder->state = 0; + next->coder->rep0 = 0; + next->coder->rep1 = 0; + next->coder->rep2 = 0; + next->coder->rep3 = 0; + next->coder->pos_bits = options->pos_bits; + next->coder->pos_mask = (1 << next->coder->pos_bits) - 1; + next->coder->now_pos = 0; + next->coder->init_bytes_left = 5; + + // Range decoder + rc_reset(next->coder->rc); + + // Bit and bittree decoders + for (uint32_t i = 0; i < STATES; ++i) { + for (uint32_t j = 0; j <= next->coder->pos_mask; ++j) { + bit_reset(next->coder->is_match[i][j]); + bit_reset(next->coder->is_rep0_long[i][j]); + } + + bit_reset(next->coder->is_rep[i]); + bit_reset(next->coder->is_rep0[i]); + bit_reset(next->coder->is_rep1[i]); + bit_reset(next->coder->is_rep2[i]); + } + + for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i) + bittree_reset(next->coder->pos_slot_decoder[i], POS_SLOT_BITS); + + for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) + bit_reset(next->coder->pos_decoders[i]); + + bittree_reset(next->coder->pos_align_decoder, ALIGN_BITS); + + // Len decoders (also bit/bittree) + const uint32_t num_pos_states = 1 << next->coder->pos_bits; + bit_reset(next->coder->len_decoder.choice); + bit_reset(next->coder->len_decoder.choice2); + bit_reset(next->coder->rep_match_len_decoder.choice); + bit_reset(next->coder->rep_match_len_decoder.choice2); + + for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { + bittree_reset(next->coder->len_decoder.low[pos_state], + LEN_LOW_BITS); + bittree_reset(next->coder->len_decoder.mid[pos_state], + LEN_MID_BITS); + + bittree_reset(next->coder->rep_match_len_decoder.low[ + pos_state], LEN_LOW_BITS); + bittree_reset(next->coder->rep_match_len_decoder.mid[ + pos_state], LEN_MID_BITS); + } + + bittree_reset(next->coder->len_decoder.high, LEN_HIGH_BITS); + bittree_reset(next->coder->rep_match_len_decoder.high, LEN_HIGH_BITS); + + // Initialize the next decoder in the chain, if any. + { + const lzma_ret ret = lzma_next_filter_init(&next->coder->next, + allocator, filters + 1); + if (ret != LZMA_OK) { + lzma_decoder_end(next->coder, allocator); + return ret; + } + } + + // Initialization successful. Set the function pointers. + next->code = &lzma_lz_decode; + next->end = &lzma_decoder_end; + + return LZMA_OK; +} + + +extern bool +lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte) +{ + if (byte > (4 * 5 + 4) * 9 + 8) + return true; + + // See the file format specification to understand this. + options->pos_bits = byte / (9 * 5); + byte -= options->pos_bits * 9 * 5; + options->literal_pos_bits = byte / 9; + options->literal_context_bits = byte - options->literal_pos_bits * 9; + + return false; +} diff --git a/src/liblzma/lzma/lzma_decoder.h b/src/liblzma/lzma/lzma_decoder.h new file mode 100644 index 00000000..929c2bff --- /dev/null +++ b/src/liblzma/lzma/lzma_decoder.h @@ -0,0 +1,41 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_decoder.h +/// \brief LZMA decoder API +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_DECODER_H +#define LZMA_LZMA_DECODER_H + +#include "common.h" + + +/// \brief Allocates and initializes LZMA decoder +extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +/// \brief Decodes the LZMA Properties byte (lc/lp/pb) +/// +/// \return true if error occorred, false on success +/// +extern bool lzma_lzma_decode_properties( + lzma_options_lzma *options, uint8_t byte); + +// There is no public lzma_lzma_encode() because lzma_lz_encode() works +// as a wrapper for it. + +#endif diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c new file mode 100644 index 00000000..f9c1e3fe --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder.c @@ -0,0 +1,413 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder.c +/// \brief LZMA encoder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +// NOTE: If you want to keep the line length in 80 characters, set +// tab width to 4 or less in your editor when editing this file. + + +#include "lzma_encoder_private.h" + + +//////////// +// Macros // +//////////// + +// These are as macros mostly because they use local range encoder variables. + +#define literal_encode(subcoder, symbol) \ +do { \ + uint32_t context = 1; \ + int i = 8; \ + do { \ + --i; \ + const uint32_t bit = ((symbol) >> i) & 1; \ + bit_encode(subcoder[context], bit); \ + context = (context << 1) | bit; \ + } while (i != 0); \ +} while (0) + + +#define literal_encode_matched(subcoder, match_byte, symbol) \ +do { \ + uint32_t context = 1; \ + int i = 8; \ + do { \ + --i; \ + uint32_t bit = ((symbol) >> i) & 1; \ + const uint32_t match_bit = ((match_byte) >> i) & 1; \ + const uint32_t subcoder_index = 0x100 + (match_bit << 8) + context; \ + bit_encode(subcoder[subcoder_index], bit); \ + context = (context << 1) | bit; \ + if (match_bit != bit) { \ + while (i != 0) { \ + --i; \ + bit = ((symbol) >> i) & 1; \ + bit_encode(subcoder[context], bit); \ + context = (context << 1) | bit; \ + } \ + break; \ + } \ + } while (i != 0); \ +} while (0) + + +#define length_encode(length_encoder, symbol, pos_state, update_price) \ +do { \ + \ + if ((symbol) < LEN_LOW_SYMBOLS) { \ + bit_encode_0((length_encoder).choice); \ + bittree_encode((length_encoder).low[pos_state], \ + LEN_LOW_BITS, symbol); \ + } else { \ + bit_encode_1((length_encoder).choice); \ + if ((symbol) < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS) { \ + bit_encode_0((length_encoder).choice2); \ + bittree_encode((length_encoder).mid[pos_state], \ + LEN_MID_BITS, \ + (symbol) - LEN_LOW_SYMBOLS); \ + } else { \ + bit_encode_1((length_encoder).choice2); \ + bittree_encode((length_encoder).high, LEN_HIGH_BITS, \ + (symbol) - LEN_LOW_SYMBOLS \ + - LEN_MID_SYMBOLS); \ + } \ + } \ + if (update_price) \ + if (--(length_encoder).counters[pos_state] == 0) \ + lzma_length_encoder_update_table(&(length_encoder), pos_state); \ +} while (0) + + +/////////////// +// Functions // +/////////////// + +/// \brief Updates price table of the length encoder +/// +/// All all the other prices in LZMA, these are used by lzma_get_optimum(). +/// +extern void +lzma_length_encoder_update_table(lzma_length_encoder *lencoder, + const uint32_t pos_state) +{ + const uint32_t num_symbols = lencoder->table_size; + const uint32_t a0 = bit_get_price_0(lencoder->choice); + const uint32_t a1 = bit_get_price_1(lencoder->choice); + const uint32_t b0 = a1 + bit_get_price_0(lencoder->choice2); + const uint32_t b1 = a1 + bit_get_price_1(lencoder->choice2); + + uint32_t *prices = lencoder->prices[pos_state]; + uint32_t i = 0; + + for (i = 0; i < num_symbols && i < LEN_LOW_SYMBOLS; ++i) { + prices[i] = a0; + bittree_get_price(prices[i], lencoder->low[pos_state], + LEN_LOW_BITS, i); + } + + for (; i < num_symbols && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) { + prices[i] = b0; + bittree_get_price(prices[i], lencoder->mid[pos_state], + LEN_MID_BITS, i - LEN_LOW_SYMBOLS); + } + + for (; i < num_symbols; ++i) { + prices[i] = b1; + bittree_get_price(prices[i], lencoder->high, LEN_HIGH_BITS, + i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); + } + + lencoder->counters[pos_state] = num_symbols; + + return; +} + + +/** + * \brief LZMA encoder + * + * \return true if end of stream was reached, false otherwise. + */ +extern bool +lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + // Flush the range encoder's temporary buffer to out[]. + // Return immediatelly if not everything could be flushed. + if (rc_flush_buffer(&coder->rc, out, out_pos, out_size)) + return false; + + // Return immediatelly if we have already finished our work. + if (coder->lz.stream_end_was_reached + && coder->is_initialized + && coder->lz.read_pos == coder->lz.write_pos + && coder->additional_offset == 0) + return true; + + // Local copies + rc_to_local(coder->rc); + size_t out_pos_local = *out_pos; + const uint32_t pos_mask = coder->pos_mask; + const bool best_compression = coder->best_compression; + + // Initialize the stream if no data has been encoded yet. + if (!coder->is_initialized) { + if (coder->lz.read_pos == coder->lz.read_limit) { + // Cannot initialize, because there is no input data. + if (!coder->lz.stream_end_was_reached) + return false; + + // If we get here, we are encoding an empty file. + // Initialization is skipped completely. + assert(coder->lz.write_pos == coder->lz.read_pos); + + } else { + // Do the actual initialization. + uint32_t len; + uint32_t num_distance_pairs; + lzma_read_match_distances(coder, &len, &num_distance_pairs); + + bit_encode_0(coder->is_match[coder->state][0]); + update_char(coder->state); + + const uint8_t cur_byte = coder->lz.buffer[ + coder->lz.read_pos - coder->additional_offset]; + probability *subcoder = literal_get_subcoder(coder->literal_coder, + coder->now_pos, coder->previous_byte); + literal_encode(subcoder, cur_byte); + + coder->previous_byte = cur_byte; + --coder->additional_offset; + ++coder->now_pos; + + assert(coder->additional_offset == 0); + } + + // Initialization is done (except if empty file). + coder->is_initialized = true; + } + + // Encoding loop + while (true) { + // Check that there is free output space. + if (out_pos_local == out_size) + break; + + assert(rc_buffer_size == 0); + + // Check that there is some input to process. + if (coder->lz.read_pos >= coder->lz.read_limit) { + // If end of input has been reached, we must keep + // encoding until additional_offset becomes zero. + if (!coder->lz.stream_end_was_reached + || coder->additional_offset == 0) + break; + } + + assert(coder->lz.read_pos <= coder->lz.write_pos); + +#ifndef NDEBUG + if (coder->lz.stream_end_was_reached) { + assert(coder->lz.read_limit == coder->lz.write_pos); + } else { + assert(coder->lz.read_limit + coder->lz.keep_size_after + == coder->lz.write_pos); + } +#endif + + const uint32_t pos_state = coder->now_pos & pos_mask; + + uint32_t pos; + uint32_t len; + + // Get optimal match (repeat position and length). + // Value ranges for pos: + // - [0, REP_DISTANCES): repeated match + // - [REP_DISTANCES, UINT32_MAX): match at (pos - REP_DISTANCES) + // - UINT32_MAX: not a match but a literal + // Value ranges for len: + // - [MATCH_MIN_LEN, MATCH_MAX_LEN] + if (best_compression) + lzma_get_optimum(coder, &pos, &len); + else + lzma_get_optimum_fast(coder, &pos, &len); + + if (len == 1 && pos == UINT32_MAX) { + // It's a literal. + bit_encode_0(coder->is_match[coder->state][pos_state]); + + const uint8_t cur_byte = coder->lz.buffer[ + coder->lz.read_pos - coder->additional_offset]; + probability *subcoder = literal_get_subcoder(coder->literal_coder, + coder->now_pos, coder->previous_byte); + + if (is_char_state(coder->state)) { + literal_encode(subcoder, cur_byte); + } else { + const uint8_t match_byte = coder->lz.buffer[ + coder->lz.read_pos + - coder->rep_distances[0] - 1 + - coder->additional_offset]; + literal_encode_matched(subcoder, match_byte, cur_byte); + } + + update_char(coder->state); + coder->previous_byte = cur_byte; + + } else { + // It's a match. + bit_encode_1(coder->is_match[coder->state][pos_state]); + + if (pos < REP_DISTANCES) { + // It's a repeated match i.e. the same distance + // has been used earlier. + bit_encode_1(coder->is_rep[coder->state]); + + if (pos == 0) { + bit_encode_0(coder->is_rep0[coder->state]); + const uint32_t symbol = (len == 1) ? 0 : 1; + bit_encode(coder->is_rep0_long[coder->state][pos_state], + symbol); + } else { + const uint32_t distance = coder->rep_distances[pos]; + bit_encode_1(coder->is_rep0[coder->state]); + + if (pos == 1) { + bit_encode_0(coder->is_rep1[coder->state]); + } else { + bit_encode_1(coder->is_rep1[coder->state]); + bit_encode(coder->is_rep2[coder->state], pos - 2); + + if (pos == 3) + coder->rep_distances[3] = coder->rep_distances[2]; + + coder->rep_distances[2] = coder->rep_distances[1]; + } + + coder->rep_distances[1] = coder->rep_distances[0]; + coder->rep_distances[0] = distance; + } + + if (len == 1) { + update_short_rep(coder->state); + } else { + length_encode(coder->rep_match_len_encoder, + len - MATCH_MIN_LEN, pos_state, + best_compression); + update_rep(coder->state); + } + + } else { + bit_encode_0(coder->is_rep[coder->state]); + update_match(coder->state); + length_encode(coder->len_encoder, len - MATCH_MIN_LEN, + pos_state, best_compression); + pos -= REP_DISTANCES; + + const uint32_t pos_slot = get_pos_slot(pos); + const uint32_t len_to_pos_state = get_len_to_pos_state(len); + bittree_encode(coder->pos_slot_encoder[len_to_pos_state], + POS_SLOT_BITS, pos_slot); + + if (pos_slot >= START_POS_MODEL_INDEX) { + const uint32_t footer_bits = (pos_slot >> 1) - 1; + const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; + const uint32_t pos_reduced = pos - base; + + if (pos_slot < END_POS_MODEL_INDEX) { + bittree_reverse_encode( + coder->pos_encoders + base - pos_slot - 1, + footer_bits, pos_reduced); + } else { + rc_encode_direct_bits(pos_reduced >> ALIGN_BITS, + footer_bits - ALIGN_BITS); + bittree_reverse_encode(coder->pos_align_encoder, + ALIGN_BITS, pos_reduced & ALIGN_MASK); + ++coder->align_price_count; + } + } + + coder->rep_distances[3] = coder->rep_distances[2]; + coder->rep_distances[2] = coder->rep_distances[1]; + coder->rep_distances[1] = coder->rep_distances[0]; + coder->rep_distances[0] = pos; + ++coder->match_price_count; + } + + coder->previous_byte = coder->lz.buffer[ + coder->lz.read_pos + len - 1 + - coder->additional_offset]; + } + + assert(coder->additional_offset >= len); + coder->additional_offset -= len; + coder->now_pos += len; + } + + // Check if everything is done. + bool all_done = false; + if (coder->lz.stream_end_was_reached + && coder->lz.read_pos == coder->lz.write_pos + && coder->additional_offset == 0) { + // Write end of stream marker. It is encoded as a match with + // distance of UINT32_MAX. Match length is needed but it is + // ignored by the decoder. + if (coder->lz.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { + const uint32_t pos_state = coder->now_pos & pos_mask; + bit_encode_1(coder->is_match[coder->state][pos_state]); + bit_encode_0(coder->is_rep[coder->state]); + update_match(coder->state); + + const uint32_t len = MATCH_MIN_LEN; // MATCH_MAX_LEN; + length_encode(coder->len_encoder, len - MATCH_MIN_LEN, + pos_state, best_compression); + + const uint32_t pos_slot = (1 << POS_SLOT_BITS) - 1; + const uint32_t len_to_pos_state = get_len_to_pos_state(len); + bittree_encode(coder->pos_slot_encoder[len_to_pos_state], + POS_SLOT_BITS, pos_slot); + + const uint32_t footer_bits = 30; + const uint32_t pos_reduced + = (UINT32_C(1) << footer_bits) - 1; + rc_encode_direct_bits(pos_reduced >> ALIGN_BITS, + footer_bits - ALIGN_BITS); + + bittree_reverse_encode(coder->pos_align_encoder, ALIGN_BITS, + pos_reduced & ALIGN_MASK); + } + + // Flush the last bytes of compressed data from + // the range coder to the output buffer. + rc_flush(); + + // All done. Note that some output bytes might be + // pending in coder->buffer. lzma_encode() will + // take care of those bytes. + if (rc_buffer_size == 0) + all_done = true; + } + + // Store local variables back to *coder. + rc_from_local(coder->rc); + *out_pos = out_pos_local; + + return all_done; +} diff --git a/src/liblzma/lzma/lzma_encoder.h b/src/liblzma/lzma/lzma_encoder.h new file mode 100644 index 00000000..1c57f80a --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder.h @@ -0,0 +1,35 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder.h +/// \brief LZMA method handler API +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_ENCODER_H +#define LZMA_LZMA_ENCODER_H + +#include "common.h" + +extern lzma_ret lzma_lzma_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern bool lzma_lzma_encode_properties( + const lzma_options_lzma *options, uint8_t *byte); + +/// Initializes the lzma_fastpos[] array. +extern void lzma_fastpos_init(void); + +#endif diff --git a/src/liblzma/lzma/lzma_encoder_features.c b/src/liblzma/lzma/lzma_encoder_features.c new file mode 100644 index 00000000..56e59c6a --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_features.c @@ -0,0 +1,59 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_features.c +/// \brief Information about features enabled at compile time +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +static lzma_mode modes[] = { + LZMA_MODE_FAST, + LZMA_MODE_BEST, + LZMA_MODE_INVALID +}; + + +LZMA_API const lzma_mode *const lzma_available_modes = modes; + + +static lzma_match_finder match_finders[] = { +#ifdef HAVE_MF_HC3 + LZMA_MF_HC3, +#endif + +#ifdef HAVE_MF_HC4 + LZMA_MF_HC4, +#endif + +#ifdef HAVE_MF_BT2 + LZMA_MF_BT2, +#endif + +#ifdef HAVE_MF_BT3 + LZMA_MF_BT3, +#endif + +#ifdef HAVE_MF_BT4 + LZMA_MF_BT4, +#endif + + LZMA_MF_INVALID +}; + + +LZMA_API const lzma_match_finder *const lzma_available_match_finders + = match_finders; diff --git a/src/liblzma/lzma/lzma_encoder_getoptimum.c b/src/liblzma/lzma/lzma_encoder_getoptimum.c new file mode 100644 index 00000000..cdeb3145 --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_getoptimum.c @@ -0,0 +1,893 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_getoptimum.c +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +// NOTE: If you want to keep the line length in 80 characters, set +// tab width to 4 or less in your editor when editing this file. + + +// "Would you love the monster code? +// Could you understand beauty of the beast?" +// --Adapted from Lordi's "Would you love a monster man". + + +#include "lzma_encoder_private.h" + + +#define length_get_price(length_encoder, symbol, pos_state) \ + (length_encoder).prices[pos_state][symbol] + + +#define get_rep_len_1_price(state, pos_state) \ + bit_get_price_0(coder->is_rep0[state]) \ + + bit_get_price_0(coder->is_rep0_long[state][pos_state]) + + +// Adds to price_target. +#define get_pure_rep_price(price_target, rep_index, state, pos_state) \ +do { \ + if ((rep_index) == 0) { \ + price_target += bit_get_price_0(coder->is_rep0[state]); \ + price_target += bit_get_price_1( \ + coder->is_rep0_long[state][pos_state]); \ + } else { \ + price_target += bit_get_price_1(coder->is_rep0[state]); \ + if ((rep_index) == 1) { \ + price_target += bit_get_price_0(coder->is_rep1[state]); \ + } else { \ + price_target += bit_get_price_1(coder->is_rep1[state]); \ + price_target += bit_get_price( \ + coder->is_rep2[state], (rep_index) - 2); \ + } \ + } \ +} while (0) + + +// Adds to price_target. +#define get_rep_price(price_target, rep_index, len, state, pos_state) \ +do { \ + get_pure_rep_price(price_target, rep_index, state, pos_state); \ + price_target += length_get_price(coder->rep_match_len_encoder, \ + (len) - MATCH_MIN_LEN, pos_state); \ +} while (0) + + +// Adds to price_target. +#define get_pos_len_price(price_target, pos, len, pos_state) \ +do { \ + const uint32_t len_to_pos_state_tmp = get_len_to_pos_state(len); \ + if ((pos) < FULL_DISTANCES) { \ + price_target += distances_prices[len_to_pos_state_tmp][pos]; \ + } else { \ + price_target \ + += pos_slot_prices[len_to_pos_state_tmp][get_pos_slot_2(pos)] \ + + align_prices[(pos) & ALIGN_MASK]; \ + } \ + price_target += length_get_price( \ + coder->len_encoder, (len) - MATCH_MIN_LEN, pos_state); \ +} while (0) + + +// Three macros to manipulate lzma_optimal structures: +#define make_as_char(opt) \ +do { \ + (opt).back_prev = UINT32_MAX; \ + (opt).prev_1_is_char = false; \ +} while (0) + + +#define make_as_short_rep(opt) \ +do { \ + (opt).back_prev = 0; \ + (opt).prev_1_is_char = false; \ +} while (0) + + +#define is_short_rep(opt) \ + ((opt).back_prev == 0) + + +static void +fill_distances_prices(lzma_coder *coder) +{ + uint32_t temp_prices[FULL_DISTANCES]; + + for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) { + const uint32_t pos_slot = get_pos_slot(i); + const uint32_t footer_bits = ((pos_slot >> 1) - 1); + const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; + temp_prices[i] = 0; + bittree_reverse_get_price(temp_prices[i], + coder->pos_encoders + base - pos_slot - 1, + footer_bits, i - base); + } + + const uint32_t dist_table_size = coder->dist_table_size; + + for (uint32_t len_to_pos_state = 0; + len_to_pos_state < LEN_TO_POS_STATES; + ++len_to_pos_state) { + + const probability *encoder = coder->pos_slot_encoder[len_to_pos_state]; + uint32_t *pos_slot_prices = coder->pos_slot_prices[len_to_pos_state]; + + for (uint32_t pos_slot = 0; + pos_slot < dist_table_size; + ++pos_slot) { + pos_slot_prices[pos_slot] = 0; + bittree_get_price(pos_slot_prices[pos_slot], encoder, + POS_SLOT_BITS, pos_slot); + } + + for (uint32_t pos_slot = END_POS_MODEL_INDEX; + pos_slot < dist_table_size; + ++pos_slot) + pos_slot_prices[pos_slot] += (((pos_slot >> 1) - 1) + - ALIGN_BITS) << BIT_PRICE_SHIFT_BITS; + + + uint32_t *distances_prices + = coder->distances_prices[len_to_pos_state]; + + uint32_t i; + for (i = 0; i < START_POS_MODEL_INDEX; ++i) + distances_prices[i] = pos_slot_prices[i]; + + for (; i < FULL_DISTANCES; ++i) + distances_prices[i] = pos_slot_prices[get_pos_slot(i)] + + temp_prices[i]; + } + + coder->match_price_count = 0; + + return; +} + + +static void +fill_align_prices(lzma_coder *coder) +{ + for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i) { + uint32_t tmp = 0; + bittree_reverse_get_price(tmp, coder->pos_align_encoder, + ALIGN_BITS, i); + coder->align_prices[i] = tmp; + } + + coder->align_price_count = 0; +} + + +// The first argument is a pointer returned by literal_get_subcoder(). +static uint32_t +literal_get_price(const probability *encoders, const bool match_mode, + const uint8_t match_byte, const uint8_t symbol) +{ + uint32_t price = 0; + uint32_t context = 1; + int i = 8; + + if (match_mode) { + do { + --i; + const uint32_t match_bit = (match_byte >> i) & 1; + const uint32_t bit = (symbol >> i) & 1; + const uint32_t subcoder_index + = 0x100 + (match_bit << 8) + context; + + price += bit_get_price(encoders[subcoder_index], bit); + context = (context << 1) | bit; + + if (match_bit != bit) + break; + + } while (i != 0); + } + + while (i != 0) { + --i; + const uint32_t bit = (symbol >> i) & 1; + price += bit_get_price(encoders[context], bit); + context = (context << 1) | bit; + } + + return price; +} + + +static void +backward(lzma_coder *restrict coder, uint32_t *restrict len_res, + uint32_t *restrict back_res, uint32_t cur) +{ + coder->optimum_end_index = cur; + + uint32_t pos_mem = coder->optimum[cur].pos_prev; + uint32_t back_mem = coder->optimum[cur].back_prev; + + do { + if (coder->optimum[cur].prev_1_is_char) { + make_as_char(coder->optimum[pos_mem]); + coder->optimum[pos_mem].pos_prev = pos_mem - 1; + + if (coder->optimum[cur].prev_2) { + coder->optimum[pos_mem - 1].prev_1_is_char = false; + coder->optimum[pos_mem - 1].pos_prev + = coder->optimum[cur].pos_prev_2; + coder->optimum[pos_mem - 1].back_prev + = coder->optimum[cur].back_prev_2; + } + } + + uint32_t pos_prev = pos_mem; + uint32_t back_cur = back_mem; + + back_mem = coder->optimum[pos_prev].back_prev; + pos_mem = coder->optimum[pos_prev].pos_prev; + + coder->optimum[pos_prev].back_prev = back_cur; + coder->optimum[pos_prev].pos_prev = cur; + cur = pos_prev; + + } while (cur != 0); + + coder->optimum_current_index = coder->optimum[0].pos_prev; + *len_res = coder->optimum[0].pos_prev; + *back_res = coder->optimum[0].back_prev; + + return; +} + + +extern void +lzma_get_optimum(lzma_coder *restrict coder, + uint32_t *restrict back_res, uint32_t *restrict len_res) +{ + // Update the price tables. In the C++ LZMA SDK 4.42 this was done in both + // initialization function and in the main loop. In liblzma they were + // moved into this single place. + if (coder->additional_offset == 0) { + if (coder->match_price_count >= (1 << 7)) + fill_distances_prices(coder); + + if (coder->align_price_count >= ALIGN_TABLE_SIZE) + fill_align_prices(coder); + } + + + if (coder->optimum_end_index != coder->optimum_current_index) { + *len_res = coder->optimum[coder->optimum_current_index].pos_prev + - coder->optimum_current_index; + *back_res = coder->optimum[coder->optimum_current_index].back_prev; + coder->optimum_current_index = coder->optimum[ + coder->optimum_current_index].pos_prev; + return; + } + + coder->optimum_current_index = 0; + coder->optimum_end_index = 0; + + + const uint32_t fast_bytes = coder->fast_bytes; + uint32_t *match_distances = coder->match_distances; + + uint32_t len_main; + uint32_t num_distance_pairs; + + if (!coder->longest_match_was_found) { + lzma_read_match_distances(coder, &len_main, &num_distance_pairs); + } else { + len_main = coder->longest_match_length; + num_distance_pairs = coder->num_distance_pairs; + coder->longest_match_was_found = false; + } + + + const uint8_t *buf = coder->lz.buffer + coder->lz.read_pos - 1; + uint32_t num_available_bytes + = coder->lz.write_pos - coder->lz.read_pos + 1; + if (num_available_bytes < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + + if (num_available_bytes > MATCH_MAX_LEN) + num_available_bytes = MATCH_MAX_LEN; + + + uint32_t reps[REP_DISTANCES]; + uint32_t rep_lens[REP_DISTANCES]; + uint32_t rep_max_index = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + reps[i] = coder->rep_distances[i]; + const uint32_t back_offset = reps[i] + 1; + + if (buf[0] != *(buf - back_offset) + || buf[1] != *(buf + 1 - back_offset)) { + rep_lens[i] = 0; + continue; + } + + uint32_t len_test; + for (len_test = 2; len_test < num_available_bytes + && buf[len_test] == *(buf + len_test - back_offset); + ++len_test) ; + + rep_lens[i] = len_test; + if (len_test > rep_lens[rep_max_index]) + rep_max_index = i; + } + + if (rep_lens[rep_max_index] >= fast_bytes) { + *back_res = rep_max_index; + *len_res = rep_lens[rep_max_index]; + move_pos(*len_res - 1); + return; + } + + + if (len_main >= fast_bytes) { + *back_res = match_distances[num_distance_pairs] + REP_DISTANCES; + *len_res = len_main; + move_pos(len_main - 1); + return; + } + + uint8_t current_byte = *buf; + uint8_t match_byte = *(buf - reps[0] - 1); + + if (len_main < 2 && current_byte != match_byte + && rep_lens[rep_max_index] < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + + const uint32_t pos_mask = coder->pos_mask; + + coder->optimum[0].state = coder->state; + + uint32_t position = coder->now_pos; + uint32_t pos_state = (position & pos_mask); + + coder->optimum[1].price = bit_get_price_0( + coder->is_match[coder->state][pos_state]) + + literal_get_price( + literal_get_subcoder(coder->literal_coder, + position, coder->previous_byte), + !is_char_state(coder->state), match_byte, current_byte); + + make_as_char(coder->optimum[1]); + + uint32_t match_price + = bit_get_price_1(coder->is_match[coder->state][pos_state]); + uint32_t rep_match_price + = match_price + bit_get_price_1(coder->is_rep[coder->state]); + + + if (match_byte == current_byte) { + const uint32_t short_rep_price = rep_match_price + + get_rep_len_1_price(coder->state, pos_state); + + if (short_rep_price < coder->optimum[1].price) { + coder->optimum[1].price = short_rep_price; + make_as_short_rep(coder->optimum[1]); + } + } + + uint32_t len_end = (len_main >= rep_lens[rep_max_index]) + ? len_main + : rep_lens[rep_max_index]; + + if (len_end < 2) { + *back_res = coder->optimum[1].back_prev; + *len_res = 1; + return; + } + + coder->optimum[1].pos_prev = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) + coder->optimum[0].backs[i] = reps[i]; + + uint32_t len = len_end; + do { + coder->optimum[len].price = INFINITY_PRICE; + } while (--len >= 2); + + + uint32_t (*distances_prices)[FULL_DISTANCES] = coder->distances_prices; + uint32_t (*pos_slot_prices)[DIST_TABLE_SIZE_MAX] = coder->pos_slot_prices; + uint32_t *align_prices = coder->align_prices; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + uint32_t rep_len = rep_lens[i]; + if (rep_len < 2) + continue; + + uint32_t price = rep_match_price; + get_pure_rep_price(price, i, coder->state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + length_get_price( + coder->rep_match_len_encoder, + rep_len - 2, pos_state); + + if (cur_and_len_price < coder->optimum[rep_len].price) { + coder->optimum[rep_len].price = cur_and_len_price; + coder->optimum[rep_len].pos_prev = 0; + coder->optimum[rep_len].back_prev = i; + coder->optimum[rep_len].prev_1_is_char = false; + } + } while (--rep_len >= 2); + } + + + uint32_t normal_match_price = match_price + + bit_get_price_0(coder->is_rep[coder->state]); + + len = (rep_lens[0] >= 2) ? rep_lens[0] + 1 : 2; + + if (len <= len_main) { + uint32_t offs = 0; + + while (len > match_distances[offs + 1]) + offs += 2; + + for(; ; ++len) { + const uint32_t distance = match_distances[offs + 2]; + uint32_t cur_and_len_price = normal_match_price; + get_pos_len_price(cur_and_len_price, distance, len, pos_state); + + if (cur_and_len_price < coder->optimum[len].price) { + coder->optimum[len].price = cur_and_len_price; + coder->optimum[len].pos_prev = 0; + coder->optimum[len].back_prev = distance + REP_DISTANCES; + coder->optimum[len].prev_1_is_char = false; + } + + if (len == match_distances[offs + 1]) { + offs += 2; + if (offs == num_distance_pairs) + break; + } + } + } + + + ////////////////// + // Big loop ;-) // + ////////////////// + + uint32_t cur = 0; + + // The rest of this function is a huge while-loop. To avoid extreme + // indentation, the indentation level is not increased here. + while (true) { + + ++cur; + + assert(cur < OPTS); + + if (cur == len_end) { + backward(coder, len_res, back_res, cur); + return; + } + + uint32_t new_len; + + lzma_read_match_distances(coder, &new_len, &num_distance_pairs); + + if (new_len >= fast_bytes) { + coder->num_distance_pairs = num_distance_pairs; + coder->longest_match_length = new_len; + coder->longest_match_was_found = true; + backward(coder, len_res, back_res, cur); + return; + } + + + ++position; + + uint32_t pos_prev = coder->optimum[cur].pos_prev; + uint32_t state; + + if (coder->optimum[cur].prev_1_is_char) { + --pos_prev; + + if (coder->optimum[cur].prev_2) { + state = coder->optimum[coder->optimum[cur].pos_prev_2].state; + + if (coder->optimum[cur].back_prev_2 < REP_DISTANCES) + update_rep(state); + else + update_match(state); + + } else { + state = coder->optimum[pos_prev].state; + } + + update_char(state); + + } else { + state = coder->optimum[pos_prev].state; + } + + if (pos_prev == cur - 1) { + if (is_short_rep(coder->optimum[cur])) + update_short_rep(state); + else + update_char(state); + } else { + uint32_t pos; + if (coder->optimum[cur].prev_1_is_char && coder->optimum[cur].prev_2) { + pos_prev = coder->optimum[cur].pos_prev_2; + pos = coder->optimum[cur].back_prev_2; + update_rep(state); + } else { + pos = coder->optimum[cur].back_prev; + if (pos < REP_DISTANCES) + update_rep(state); + else + update_match(state); + } + + if (pos < REP_DISTANCES) { + reps[0] = coder->optimum[pos_prev].backs[pos]; + + uint32_t i; + for (i = 1; i <= pos; ++i) + reps[i] = coder->optimum[pos_prev].backs[i - 1]; + + for (; i < REP_DISTANCES; ++i) + reps[i] = coder->optimum[pos_prev].backs[i]; + + } else { + reps[0] = pos - REP_DISTANCES; + + for (uint32_t i = 1; i < REP_DISTANCES; ++i) + reps[i] = coder->optimum[pos_prev].backs[i - 1]; + } + } + + coder->optimum[cur].state = state; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) + coder->optimum[cur].backs[i] = reps[i]; + + const uint32_t cur_price = coder->optimum[cur].price; + + buf = coder->lz.buffer + coder->lz.read_pos - 1; + current_byte = *buf; + match_byte = *(buf - reps[0] - 1); + + pos_state = position & pos_mask; + + const uint32_t cur_and_1_price = cur_price + + bit_get_price_0(coder->is_match[state][pos_state]) + + literal_get_price( + literal_get_subcoder(coder->literal_coder, + position, buf[-1]), + !is_char_state(state), match_byte, current_byte); + + bool next_is_char = false; + + if (cur_and_1_price < coder->optimum[cur + 1].price) { + coder->optimum[cur + 1].price = cur_and_1_price; + coder->optimum[cur + 1].pos_prev = cur; + make_as_char(coder->optimum[cur + 1]); + next_is_char = true; + } + + match_price = cur_price + + bit_get_price_1(coder->is_match[state][pos_state]); + rep_match_price = match_price + + bit_get_price_1(coder->is_rep[state]); + + if (match_byte == current_byte + && !(coder->optimum[cur + 1].pos_prev < cur + && coder->optimum[cur + 1].back_prev == 0)) { + + const uint32_t short_rep_price = rep_match_price + + get_rep_len_1_price(state, pos_state); + + if (short_rep_price <= coder->optimum[cur + 1].price) { + coder->optimum[cur + 1].price = short_rep_price; + coder->optimum[cur + 1].pos_prev = cur; + make_as_short_rep(coder->optimum[cur + 1]); + next_is_char = true; + } + } + + uint32_t num_available_bytes_full + = coder->lz.write_pos - coder->lz.read_pos + 1; + num_available_bytes_full = MIN(OPTS - 1 - cur, num_available_bytes_full); + num_available_bytes = num_available_bytes_full; + + if (num_available_bytes < 2) + continue; + + if (num_available_bytes > fast_bytes) + num_available_bytes = fast_bytes; + + if (!next_is_char && match_byte != current_byte) { // speed optimization + // try literal + rep0 + const uint32_t back_offset = reps[0] + 1; + const uint32_t limit = MIN(num_available_bytes_full, fast_bytes + 1); + + uint32_t temp; + for (temp = 1; temp < limit + && buf[temp] == *(buf + temp - back_offset); + ++temp) ; + + const uint32_t len_test_2 = temp - 1; + + if (len_test_2 >= 2) { + uint32_t state_2 = state; + update_char(state_2); + + const uint32_t pos_state_next = (position + 1) & pos_mask; + const uint32_t next_rep_match_price = cur_and_1_price + + bit_get_price_1(coder->is_match[state_2][pos_state_next]) + + bit_get_price_1(coder->is_rep[state_2]); + + // for (; len_test_2 >= 2; --len_test_2) { + const uint32_t offset = cur + 1 + len_test_2; + + while (len_end < offset) + coder->optimum[++len_end].price = INFINITY_PRICE; + + uint32_t cur_and_len_price = next_rep_match_price; + get_rep_price(cur_and_len_price, + 0, len_test_2, state_2, pos_state_next); + + if (cur_and_len_price < coder->optimum[offset].price) { + coder->optimum[offset].price = cur_and_len_price; + coder->optimum[offset].pos_prev = cur + 1; + coder->optimum[offset].back_prev = 0; + coder->optimum[offset].prev_1_is_char = true; + coder->optimum[offset].prev_2 = false; + } +// } + } + } + + + uint32_t start_len = 2; // speed optimization + + for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) { + const uint32_t back_offset = reps[rep_index] + 1; + + if (buf[0] != *(buf - back_offset) || buf[1] != *(buf + 1 - back_offset)) + continue; + + uint32_t len_test; + for (len_test = 2; len_test < num_available_bytes + && buf[len_test] == *(buf + len_test - back_offset); + ++len_test) ; + + while (len_end < cur + len_test) + coder->optimum[++len_end].price = INFINITY_PRICE; + + const uint32_t len_test_temp = len_test; + uint32_t price = rep_match_price; + get_pure_rep_price(price, rep_index, state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + length_get_price(coder->rep_match_len_encoder, + len_test - 2, pos_state); + + if (cur_and_len_price < coder->optimum[cur + len_test].price) { + coder->optimum[cur + len_test].price = cur_and_len_price; + coder->optimum[cur + len_test].pos_prev = cur; + coder->optimum[cur + len_test].back_prev = rep_index; + coder->optimum[cur + len_test].prev_1_is_char = false; + } + } while (--len_test >= 2); + + len_test = len_test_temp; + + if (rep_index == 0) + start_len = len_test + 1; + + + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = MIN(num_available_bytes_full, + len_test_2 + fast_bytes); + for (; len_test_2 < limit + && buf[len_test_2] == *(buf + len_test_2 - back_offset); + ++len_test_2) ; + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + uint32_t state_2 = state; + update_rep(state_2); + + uint32_t pos_state_next = (position + len_test) & pos_mask; + + const uint32_t cur_and_len_char_price = price + + length_get_price(coder->rep_match_len_encoder, + len_test - 2, pos_state) + + bit_get_price_0(coder->is_match[state_2][pos_state_next]) + + literal_get_price( + literal_get_subcoder(coder->literal_coder, + position + len_test, buf[len_test - 1]), + true, *(buf + len_test - back_offset), buf[len_test]); + + update_char(state_2); + + pos_state_next = (position + len_test + 1) & pos_mask; + + const uint32_t next_rep_match_price = cur_and_len_char_price + + bit_get_price_1(coder->is_match[state_2][pos_state_next]) + + bit_get_price_1(coder->is_rep[state_2]); + +// for(; len_test_2 >= 2; len_test_2--) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->optimum[++len_end].price = INFINITY_PRICE; + + uint32_t cur_and_len_price = next_rep_match_price; + get_rep_price(cur_and_len_price, + 0, len_test_2, state_2, pos_state_next); + + if (cur_and_len_price < coder->optimum[offset].price) { + coder->optimum[offset].price = cur_and_len_price; + coder->optimum[offset].pos_prev = cur + len_test + 1; + coder->optimum[offset].back_prev = 0; + coder->optimum[offset].prev_1_is_char = true; + coder->optimum[offset].prev_2 = true; + coder->optimum[offset].pos_prev_2 = cur; + coder->optimum[offset].back_prev_2 = rep_index; + } +// } + } + } + + +// for (uint32_t len_test = 2; len_test <= new_len; ++len_test) + if (new_len > num_available_bytes) { + new_len = num_available_bytes; + + for (num_distance_pairs = 0; + new_len > match_distances[num_distance_pairs + 1]; + num_distance_pairs += 2) ; + + match_distances[num_distance_pairs + 1] = new_len; + num_distance_pairs += 2; + } + + + if (new_len >= start_len) { + normal_match_price = match_price + + bit_get_price_0(coder->is_rep[state]); + + while (len_end < cur + new_len) + coder->optimum[++len_end].price = INFINITY_PRICE; + + uint32_t offs = 0; + while (start_len > match_distances[offs + 1]) + offs += 2; + + uint32_t cur_back = match_distances[offs + 2]; + uint32_t pos_slot = get_pos_slot_2(cur_back); + + for (uint32_t len_test = start_len; ; ++len_test) { + uint32_t cur_and_len_price = normal_match_price; + const uint32_t len_to_pos_state = get_len_to_pos_state(len_test); + + if (cur_back < FULL_DISTANCES) + cur_and_len_price += distances_prices[ + len_to_pos_state][cur_back]; + else + cur_and_len_price += pos_slot_prices[ + len_to_pos_state][pos_slot] + + align_prices[cur_back & ALIGN_MASK]; + + cur_and_len_price += length_get_price(coder->len_encoder, + len_test - MATCH_MIN_LEN, pos_state); + + if (cur_and_len_price < coder->optimum[cur + len_test].price) { + coder->optimum[cur + len_test].price = cur_and_len_price; + coder->optimum[cur + len_test].pos_prev = cur; + coder->optimum[cur + len_test].back_prev + = cur_back + REP_DISTANCES; + coder->optimum[cur + len_test].prev_1_is_char = false; + } + + if (len_test == match_distances[offs + 1]) { + // Try Match + Literal + Rep0 + const uint32_t back_offset = cur_back + 1; + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = MIN(num_available_bytes_full, + len_test_2 + fast_bytes); + + for (; len_test_2 < limit && + buf[len_test_2] == *(buf + len_test_2 - back_offset); + ++len_test_2) ; + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + uint32_t state_2 = state; + update_match(state_2); + uint32_t pos_state_next + = (position + len_test) & pos_mask; + + const uint32_t cur_and_len_char_price = cur_and_len_price + + bit_get_price_0( + coder->is_match[state_2][pos_state_next]) + + literal_get_price( + literal_get_subcoder( + coder->literal_coder, + position + len_test, + buf[len_test - 1]), + true, + *(buf + len_test - back_offset), + buf[len_test]); + + update_char(state_2); + pos_state_next = (pos_state_next + 1) & pos_mask; + + const uint32_t next_rep_match_price + = cur_and_len_char_price + + bit_get_price_1( + coder->is_match[state_2][pos_state_next]) + + bit_get_price_1(coder->is_rep[state_2]); + + // for(; len_test_2 >= 2; --len_test_2) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->optimum[++len_end].price = INFINITY_PRICE; + + cur_and_len_price = next_rep_match_price; + get_rep_price(cur_and_len_price, + 0, len_test_2, state_2, pos_state_next); + + if (cur_and_len_price < coder->optimum[offset].price) { + coder->optimum[offset].price = cur_and_len_price; + coder->optimum[offset].pos_prev = cur + len_test + 1; + coder->optimum[offset].back_prev = 0; + coder->optimum[offset].prev_1_is_char = true; + coder->optimum[offset].prev_2 = true; + coder->optimum[offset].pos_prev_2 = cur; + coder->optimum[offset].back_prev_2 + = cur_back + REP_DISTANCES; + } +// } + } + + offs += 2; + if (offs == num_distance_pairs) + break; + + cur_back = match_distances[offs + 2]; + if (cur_back >= FULL_DISTANCES) + pos_slot = get_pos_slot_2(cur_back); + } + } + } + + } // Closes: while (true) +} diff --git a/src/liblzma/lzma/lzma_encoder_getoptimumfast.c b/src/liblzma/lzma/lzma_encoder_getoptimumfast.c new file mode 100644 index 00000000..e6cee19d --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_getoptimumfast.c @@ -0,0 +1,201 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_getoptimumfast.c +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +// NOTE: If you want to keep the line length in 80 characters, set +// tab width to 4 or less in your editor when editing this file. + + +#include "lzma_encoder_private.h" + + +#define change_pair(small_dist, big_dist) \ + (((big_dist) >> 7) > (small_dist)) + + +extern void +lzma_get_optimum_fast(lzma_coder *restrict coder, + uint32_t *restrict back_res, uint32_t *restrict len_res) +{ + // Local copies + const uint32_t fast_bytes = coder->fast_bytes; + + uint32_t len_main; + uint32_t num_distance_pairs; + if (!coder->longest_match_was_found) { + lzma_read_match_distances(coder, &len_main, &num_distance_pairs); + } else { + len_main = coder->longest_match_length; + num_distance_pairs = coder->num_distance_pairs; + coder->longest_match_was_found = false; + } + + const uint8_t *buf = coder->lz.buffer + coder->lz.read_pos - 1; + uint32_t num_available_bytes + = coder->lz.write_pos - coder->lz.read_pos + 1; + + if (num_available_bytes < 2) { + // There's not enough input left to encode a match. + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + + if (num_available_bytes > MATCH_MAX_LEN) + num_available_bytes = MATCH_MAX_LEN; + + + // Look for repetitive matches; scan the previous four match distances + uint32_t rep_lens[REP_DISTANCES]; + uint32_t rep_max_index = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + const uint32_t back_offset = coder->rep_distances[i] + 1; + + // If the first two bytes (2 == MATCH_MIN_LEN) do not match, + // this rep_distance[i] is not useful. This is indicated + // using zero as the length of the repetitive match. + if (buf[0] != *(buf - back_offset) + || buf[1] != *(buf + 1 - back_offset)) { + rep_lens[i] = 0; + continue; + } + + // The first two bytes matched. + // Calculate the length of the match. + uint32_t len; + for (len = 2; len < num_available_bytes + && buf[len] == *(buf + len - back_offset); + ++len) ; + + // If we have found a repetitive match that is at least + // as long as fast_bytes, return it immediatelly. + if (len >= fast_bytes) { + *back_res = i; + *len_res = len; + move_pos(len - 1); + return; + } + + rep_lens[i] = len; + + // After this loop, rep_lens[rep_max_index] is the biggest + // value of all values in rep_lens[]. + if (len > rep_lens[rep_max_index]) + rep_max_index = i; + } + + + if (len_main >= fast_bytes) { + *back_res = coder->match_distances[num_distance_pairs] + + REP_DISTANCES; + *len_res = len_main; + move_pos(len_main - 1); + return; + } + + uint32_t back_main = 0; + if (len_main >= 2) { + back_main = coder->match_distances[num_distance_pairs]; + + while (num_distance_pairs > 2 && len_main == + coder->match_distances[num_distance_pairs - 3] + 1) { + if (!change_pair(coder->match_distances[ + num_distance_pairs - 2], back_main)) + break; + + num_distance_pairs -= 2; + len_main = coder->match_distances[num_distance_pairs - 1]; + back_main = coder->match_distances[num_distance_pairs]; + } + + if (len_main == 2 && back_main >= 0x80) + len_main = 1; + } + + if (rep_lens[rep_max_index] >= 2) { + if (rep_lens[rep_max_index] + 1 >= len_main + || (rep_lens[rep_max_index] + 2 >= len_main + && (back_main > (1 << 9))) + || (rep_lens[rep_max_index] + 3 >= len_main + && (back_main > (1 << 15)))) { + *back_res = rep_max_index; + *len_res = rep_lens[rep_max_index]; + move_pos(*len_res - 1); + return; + } + } + + if (len_main >= 2 && num_available_bytes > 2) { + lzma_read_match_distances(coder, &coder->longest_match_length, + &coder->num_distance_pairs); + + if (coder->longest_match_length >= 2) { + const uint32_t new_distance = coder->match_distances[ + coder->num_distance_pairs]; + + if ((coder->longest_match_length >= len_main + && new_distance < back_main) + || (coder->longest_match_length == len_main + 1 + && !change_pair(back_main, new_distance)) + || (coder->longest_match_length > len_main + 1) + || (coder->longest_match_length + 1 >= len_main + && len_main >= 3 + && change_pair(new_distance, back_main))) { + coder->longest_match_was_found = true; + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + } + + ++buf; + --num_available_bytes; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + const uint32_t back_offset = coder->rep_distances[i] + 1; + + if (buf[1] != *(buf + 1 - back_offset) + || buf[2] != *(buf + 2 - back_offset)) { + rep_lens[i] = 0; + continue; + } + + uint32_t len; + for (len = 2; len < num_available_bytes + && buf[len] == *(buf + len - back_offset); + ++len) ; + + if (len + 1 >= len_main) { + coder->longest_match_was_found = true; + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + } + + *back_res = back_main + REP_DISTANCES; + *len_res = len_main; + move_pos(len_main - 2); + return; + } + + *back_res = UINT32_MAX; + *len_res = 1; + return; +} diff --git a/src/liblzma/lzma/lzma_encoder_init.c b/src/liblzma/lzma/lzma_encoder_init.c new file mode 100644 index 00000000..d7807529 --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_init.c @@ -0,0 +1,245 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_init.c +/// \brief Creating, resetting and destroying the LZMA encoder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" + + +uint8_t lzma_fastpos[1 << 11]; + +extern void +lzma_fastpos_init(void) +{ + static const uint8_t fast_slots = 22; + + int c = 2; + lzma_fastpos[0] = 0; + lzma_fastpos[1] = 1; + + for (uint8_t slot_fast = 2; slot_fast < fast_slots; ++slot_fast) { + const uint32_t k = (1 << ((slot_fast >> 1) - 1)); + + for (uint32_t j = 0; j < k; ++j, ++c) + lzma_fastpos[c] = slot_fast; + } + + return; +} + + +/// \brief Initializes the length encoder +static void +length_encoder_reset(lzma_length_encoder *lencoder, + const uint32_t num_pos_states, const uint32_t table_size) +{ + // NLength::CPriceTableEncoder::SetTableSize() + lencoder->table_size = table_size; + + // NLength::CEncoder::Init() + bit_reset(lencoder->choice); + bit_reset(lencoder->choice2); + + for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { + bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); + bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); + } + + bittree_reset(lencoder->high, LEN_HIGH_BITS); + + // NLength::CPriceTableEncoder::UpdateTables() + for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) + lzma_length_encoder_update_table(lencoder, pos_state); + + return; +} + + +static void +lzma_lzma_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_lz_encoder_end(&coder->lz, allocator); + lzma_literal_end(&coder->literal_coder, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->lz = LZMA_LZ_ENCODER_INIT; + next->coder->literal_coder = NULL; + } + + // Validate options that aren't validated elsewhere. + const lzma_options_lzma *options = filters[0].options; + if (options->pos_bits > LZMA_POS_BITS_MAX + || options->fast_bytes < LZMA_FAST_BYTES_MIN + || options->fast_bytes > LZMA_FAST_BYTES_MAX) { + lzma_lzma_encoder_end(next->coder, allocator); + return LZMA_HEADER_ERROR; + } + + // Set compression mode. + switch (options->mode) { + case LZMA_MODE_FAST: + next->coder->best_compression = false; + break; + + case LZMA_MODE_BEST: + next->coder->best_compression = true; + break; + + default: + lzma_lzma_encoder_end(next->coder, allocator); + return LZMA_HEADER_ERROR; + } + + // Initialize literal coder. + { + const lzma_ret ret = lzma_literal_init( + &next->coder->literal_coder, allocator, + options->literal_context_bits, + options->literal_pos_bits); + if (ret != LZMA_OK) { + lzma_lzma_encoder_end(next->coder, allocator); + return ret; + } + } + + // Initialize LZ encoder. + { + const lzma_ret ret = lzma_lz_encoder_reset( + &next->coder->lz, allocator, &lzma_lzma_encode, + filters[0].uncompressed_size, + options->dictionary_size, OPTS, + options->fast_bytes, MATCH_MAX_LEN + 1 + OPTS, + options->match_finder, + options->match_finder_cycles, + options->preset_dictionary, + options->preset_dictionary_size); + if (ret != LZMA_OK) { + lzma_lzma_encoder_end(next->coder, allocator); + return ret; + } + } + + // Set dist_table_size. + { + // Round the dictionary size up to next 2^n. + uint32_t log_size; + for (log_size = 0; (UINT32_C(1) << log_size) + < options->dictionary_size; ++log_size) ; + + next->coder->dist_table_size = log_size * 2; + } + + // Misc FIXME desc + next->coder->dictionary_size = options->dictionary_size; + next->coder->pos_mask = (1U << options->pos_bits) - 1; + next->coder->fast_bytes = options->fast_bytes; + + // Range coder + rc_reset(next->coder->rc); + + // State + next->coder->state = 0; + next->coder->previous_byte = 0; + for (size_t i = 0; i < REP_DISTANCES; ++i) + next->coder->rep_distances[i] = 0; + + // Bit encoders + for (size_t i = 0; i < STATES; ++i) { + for (size_t j = 0; j <= next->coder->pos_mask; ++j) { + bit_reset(next->coder->is_match[i][j]); + bit_reset(next->coder->is_rep0_long[i][j]); + } + + bit_reset(next->coder->is_rep[i]); + bit_reset(next->coder->is_rep0[i]); + bit_reset(next->coder->is_rep1[i]); + bit_reset(next->coder->is_rep2[i]); + } + + for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) + bit_reset(next->coder->pos_encoders[i]); + + // Bit tree encoders + for (size_t i = 0; i < LEN_TO_POS_STATES; ++i) + bittree_reset(next->coder->pos_slot_encoder[i], POS_SLOT_BITS); + + bittree_reset(next->coder->pos_align_encoder, ALIGN_BITS); + + // Length encoders + length_encoder_reset(&next->coder->len_encoder, 1U << options->pos_bits, + options->fast_bytes + 1 - MATCH_MIN_LEN); + + length_encoder_reset(&next->coder->rep_match_len_encoder, + 1U << options->pos_bits, + next->coder->fast_bytes + 1 - MATCH_MIN_LEN); + + // Misc + next->coder->longest_match_was_found = false; + next->coder->optimum_end_index = 0; + next->coder->optimum_current_index = 0; + next->coder->additional_offset = 0; + + next->coder->now_pos = 0; + next->coder->is_initialized = false; + + // Initialize the next decoder in the chain, if any. + { + const lzma_ret ret = lzma_next_filter_init(&next->coder->next, + allocator, filters + 1); + if (ret != LZMA_OK) { + lzma_lzma_encoder_end(next->coder, allocator); + return ret; + } + } + + // Initialization successful. Set the function pointers. + next->code = &lzma_lz_encode; + next->end = &lzma_lzma_encoder_end; + + return LZMA_OK; +} + + +extern bool +lzma_lzma_encode_properties(const lzma_options_lzma *options, uint8_t *byte) +{ + if (options->literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX + || options->literal_pos_bits + > LZMA_LITERAL_POS_BITS_MAX + || options->pos_bits > LZMA_POS_BITS_MAX) + return true; + + *byte = (options->pos_bits * 5 + options->literal_pos_bits) * 9 + + options->literal_context_bits; + assert(*byte <= (4 * 5 + 4) * 9 + 8); + + return false; +} diff --git a/src/liblzma/lzma/lzma_encoder_presets.c b/src/liblzma/lzma/lzma_encoder_presets.c new file mode 100644 index 00000000..966c7c86 --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_presets.c @@ -0,0 +1,34 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_presets.c +/// \brief Encoder presets +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +LZMA_API const lzma_options_lzma lzma_preset_lzma[9] = { +// dictionary_size lc lp pb mode fb mf mfc +{ UINT32_C(1) << 16, 3, 0, 2, NULL, 0, LZMA_MODE_FAST, 64, LZMA_MF_HC3, 0 }, +{ UINT32_C(1) << 20, 3, 0, 2, NULL, 0, LZMA_MODE_FAST, 64, LZMA_MF_HC4, 0 }, +{ UINT32_C(1) << 19, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 64, LZMA_MF_BT4, 0 }, +{ UINT32_C(1) << 20, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 64, LZMA_MF_BT4, 0 }, +{ UINT32_C(1) << 21, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 }, +{ UINT32_C(1) << 22, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 }, +{ UINT32_C(1) << 23, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 }, +{ UINT32_C(1) << 24, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 273, LZMA_MF_BT4, 0 }, +{ UINT32_C(1) << 25, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 273, LZMA_MF_BT4, 0 }, +}; diff --git a/src/liblzma/lzma/lzma_encoder_private.h b/src/liblzma/lzma/lzma_encoder_private.h new file mode 100644 index 00000000..7fb1566a --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_private.h @@ -0,0 +1,225 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_private.h +/// \brief Private definitions for LZMA encoder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_ENCODER_PRIVATE_H +#define LZMA_LZMA_ENCODER_PRIVATE_H + +#include "lzma_encoder.h" +#include "lzma_common.h" +#include "lz_encoder.h" + +// We need space for about two encoding loops, because there is no check +// for available buffer space before end of payload marker gets written. +// 2*26 bytes should be enough for this... but Lasse isn't very sure about +// the exact value. 64 bytes certainly is enough. :-) +#define RC_BUFFER_SIZE 64 +#include "range_encoder.h" + + +#define move_pos(num) \ +do { \ + assert((int32_t)(num) >= 0); \ + if ((num) != 0) { \ + coder->additional_offset += num; \ + coder->lz.skip(&coder->lz, num); \ + } \ +} while (0) + + +#define get_pos_slot(pos) \ + ((pos) < (1 << 11) \ + ? lzma_fastpos[pos] \ + : ((pos) < (1 << 21) \ + ? lzma_fastpos[(pos) >> 10] + 20 \ + : lzma_fastpos[(pos) >> 20] + 40)) + + +#define get_pos_slot_2(pos) \ + ((pos) < (1 << 17) \ + ? lzma_fastpos[(pos) >> 6] + 12 \ + : ((pos) < (1 << 27) \ + ? lzma_fastpos[(pos) >> 16] + 32 \ + : lzma_fastpos[(pos) >> 26] + 52)) + + +/// This isn't modified once its contents have been +/// initialized by lzma_fastpos_init(). +extern uint8_t lzma_fastpos[1 << 11]; + + +typedef struct { + probability choice; + probability choice2; + probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + probability high[LEN_HIGH_SYMBOLS]; + + uint32_t prices[POS_STATES_MAX][LEN_SYMBOLS]; + uint32_t table_size; + uint32_t counters[POS_STATES_MAX]; + +} lzma_length_encoder; + + +typedef struct { + uint32_t state; + + bool prev_1_is_char; + bool prev_2; + + uint32_t pos_prev_2; + uint32_t back_prev_2; + + uint32_t price; + uint32_t pos_prev; // pos_next; + uint32_t back_prev; + + uint32_t backs[4]; + +} lzma_optimal; + + +struct lzma_coder_s { + // Next coder in the chain + lzma_next_coder next; + + // In window and match finder + lzma_lz_encoder lz; + + // Range encoder + lzma_range_encoder rc; + + // State + uint32_t state; + uint8_t previous_byte; + uint32_t rep_distances[REP_DISTANCES]; + + // Misc + uint32_t match_distances[MATCH_MAX_LEN * 2 + 2 + 1]; + uint32_t num_distance_pairs; + uint32_t additional_offset; + uint32_t now_pos; // Lowest 32 bits are enough here. + bool best_compression; ///< True when LZMA_MODE_BEST is used + bool is_initialized; + + // Literal encoder + lzma_literal_coder *literal_coder; + + // Bit encoders + probability is_match[STATES][POS_STATES_MAX]; + probability is_rep[STATES]; + probability is_rep0[STATES]; + probability is_rep1[STATES]; + probability is_rep2[STATES]; + probability is_rep0_long[STATES][POS_STATES_MAX]; + probability pos_encoders[FULL_DISTANCES - END_POS_MODEL_INDEX]; + + // Bit Tree Encoders + probability pos_slot_encoder[LEN_TO_POS_STATES][1 << POS_SLOT_BITS]; + probability pos_align_encoder[1 << ALIGN_BITS]; + + // Length encoders + lzma_length_encoder len_encoder; + lzma_length_encoder rep_match_len_encoder; + + // Optimal + lzma_optimal optimum[OPTS]; + uint32_t optimum_end_index; + uint32_t optimum_current_index; + uint32_t longest_match_length; + bool longest_match_was_found; + + // Prices + uint32_t pos_slot_prices[LEN_TO_POS_STATES][DIST_TABLE_SIZE_MAX]; + uint32_t distances_prices[LEN_TO_POS_STATES][FULL_DISTANCES]; + uint32_t align_prices[ALIGN_TABLE_SIZE]; + uint32_t align_price_count; + uint32_t dist_table_size; + uint32_t match_price_count; + + // LZMA specific settings + uint32_t dictionary_size; ///< Size in bytes + uint32_t fast_bytes; + uint32_t pos_state_bits; + uint32_t pos_mask; ///< (1 << pos_state_bits) - 1 +}; + + +extern void lzma_length_encoder_update_table(lzma_length_encoder *lencoder, + const uint32_t pos_state); + +extern bool lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + +extern void lzma_get_optimum(lzma_coder *restrict coder, + uint32_t *restrict back_res, uint32_t *restrict len_res); + +extern void lzma_get_optimum_fast(lzma_coder *restrict coder, + uint32_t *restrict back_res, uint32_t *restrict len_res); + + +// NOTE: Don't add 'restrict'. +static inline void +lzma_read_match_distances(lzma_coder *coder, + uint32_t *len_res, uint32_t *num_distance_pairs) +{ + *len_res = 0; + + coder->lz.get_matches(&coder->lz, coder->match_distances); + + *num_distance_pairs = coder->match_distances[0]; + + if (*num_distance_pairs > 0) { + *len_res = coder->match_distances[*num_distance_pairs - 1]; + assert(*len_res <= MATCH_MAX_LEN); + + if (*len_res == coder->fast_bytes) { + uint32_t offset = *len_res - 1; + const uint32_t distance = coder->match_distances[ + *num_distance_pairs] + 1; + uint32_t limit = MATCH_MAX_LEN - *len_res; + + assert(offset + limit < coder->lz.keep_size_after); + + // If we are close to end of the stream, we may need + // to limit the length of the match. + if (coder->lz.stream_end_was_reached + && coder->lz.write_pos + < coder->lz.read_pos + offset + limit) + limit = coder->lz.write_pos + - (coder->lz.read_pos + offset); + + offset += coder->lz.read_pos; + uint32_t i = 0; + while (i < limit && coder->lz.buffer[offset + i] + == coder->lz.buffer[ + offset + i - distance]) + ++i; + + *len_res += i; + } + } + + ++coder->additional_offset; + + return; +} + +#endif diff --git a/src/liblzma/lzma/lzma_literal.c b/src/liblzma/lzma/lzma_literal.c new file mode 100644 index 00000000..8f650fbf --- /dev/null +++ b/src/liblzma/lzma/lzma_literal.c @@ -0,0 +1,74 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_literal.c +/// \brief Literal Coder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_literal.h" + + +extern lzma_ret +lzma_literal_init(lzma_literal_coder **coder, lzma_allocator *allocator, + uint32_t literal_context_bits, uint32_t literal_pos_bits) +{ + // Verify that arguments are sane. + if (literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX + || literal_pos_bits > LZMA_LITERAL_POS_BITS_MAX) + return LZMA_HEADER_ERROR; + + // Calculate the number of states the literal coder must store. + const uint32_t states = literal_states( + literal_pos_bits, literal_context_bits); + + // Allocate a new literal coder, if needed. + if (*coder == NULL || (**coder).literal_context_bits + != literal_context_bits + || (**coder).literal_pos_bits != literal_pos_bits) { + // Free the old coder, if any. + lzma_free(*coder, allocator); + + // Allocate a new one. + *coder = lzma_alloc(sizeof(lzma_literal_coder) + + states * LIT_SIZE * sizeof(probability), + allocator); + if (*coder == NULL) + return LZMA_MEM_ERROR; + + // Store the new settings. + (**coder).literal_context_bits = literal_context_bits; + (**coder).literal_pos_bits = literal_pos_bits; + + // Calculate also the literal_pos_mask. It's not changed + // anywhere else than here. + (**coder).literal_pos_mask = (1 << literal_pos_bits) - 1; + } + + // Reset the literal coder. + for (uint32_t i = 0; i < states; ++i) + for (uint32_t j = 0; j < LIT_SIZE; ++j) + bit_reset((**coder).coders[i][j]); + + return LZMA_OK; +} + + +extern void +lzma_literal_end(lzma_literal_coder **coder, lzma_allocator *allocator) +{ + lzma_free(*coder, allocator); + *coder = NULL; +} diff --git a/src/liblzma/lzma/lzma_literal.h b/src/liblzma/lzma/lzma_literal.h new file mode 100644 index 00000000..174f5ed4 --- /dev/null +++ b/src/liblzma/lzma/lzma_literal.h @@ -0,0 +1,74 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_literal.h +/// \brief Literal Coder +/// +/// This is used as is by both LZMA encoder and decoder. +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LITERAL_H +#define LZMA_LITERAL_H + +#include "common.h" + +// We need typedef of `probability'. +#include "range_common.h" + + +/// Each literal coder is divided in three sections: +/// - 0x001-0x0FF: Without match byte +/// - 0x101-0x1FF: With match byte; match bit is 0 +/// - 0x201-0x2FF: With match byte; match bit is 1 +#define LIT_SIZE 0x300 + +/// Calculate how many states are needed. Each state has +/// LIT_SIZE `probability' variables. +#define literal_states(literal_context_bits, literal_pos_bits) \ + (1U << ((literal_context_bits) + (literal_pos_bits))) + +/// Locate the literal coder for the next literal byte. The choice depends on +/// - the lowest literal_pos_bits bits of the position of the current +/// byte; and +/// - the highest literal_context_bits bits of the previous byte. +#define literal_get_subcoder(literal_coder, pos, prev_byte) \ + (literal_coder)->coders[(((pos) & (literal_coder)->literal_pos_mask) \ + << (literal_coder)->literal_context_bits) \ + + ((prev_byte) >> (8 - (literal_coder)->literal_context_bits))] + + +typedef struct { + uint32_t literal_context_bits; + uint32_t literal_pos_bits; + + /// literal_pos_mask is always (1 << literal_pos_bits) - 1. + uint32_t literal_pos_mask; + + /// There are (1 << (literal_pos_bits + literal_context_bits)) + /// literal coders. + probability coders[][LIT_SIZE]; + +} lzma_literal_coder; + + +extern lzma_ret lzma_literal_init( + lzma_literal_coder **coder, lzma_allocator *allocator, + uint32_t literal_context_bits, uint32_t literal_pos_bits); + +extern void lzma_literal_end( + lzma_literal_coder **coder, lzma_allocator *allocator); + +#endif diff --git a/src/liblzma/rangecoder/Makefile.am b/src/liblzma/rangecoder/Makefile.am new file mode 100644 index 00000000..ef5d1464 --- /dev/null +++ b/src/liblzma/rangecoder/Makefile.am @@ -0,0 +1,28 @@ +## +## Copyright (C) 2006 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +noinst_LTLIBRARIES = librangecoder.la + +librangecoder_la_SOURCES = range_common.h +librangecoder_la_CPPFLAGS = \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_srcdir@/src/liblzma/common + +if COND_MAIN_ENCODER +librangecoder_la_SOURCES += range_encoder.c range_encoder.h +endif + +if COND_MAIN_DECODER +librangecoder_la_SOURCES += range_decoder.h +endif diff --git a/src/liblzma/rangecoder/range_common.h b/src/liblzma/rangecoder/range_common.h new file mode 100644 index 00000000..9e8f89a2 --- /dev/null +++ b/src/liblzma/rangecoder/range_common.h @@ -0,0 +1,68 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_common.h +/// \brief Common things for range encoder and decoder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2006 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_COMMON_H +#define LZMA_RANGE_COMMON_H + +#include "common.h" + + +/////////////// +// Constants // +/////////////// + +#define SHIFT_BITS 8 +#define TOP_BITS 24 +#define TOP_VALUE (UINT32_C(1) << TOP_BITS) +#define BIT_MODEL_TOTAL_BITS 11 +#define BIT_MODEL_TOTAL (UINT32_C(1) << BIT_MODEL_TOTAL_BITS) +#define MOVE_BITS 5 + +#define MOVE_REDUCING_BITS 2 +#define BIT_PRICE_SHIFT_BITS 6 + + +//////////// +// Macros // +//////////// + +// Resets the probability so that both 0 and 1 have probability of 50 % +#define bit_reset(prob) \ + prob = BIT_MODEL_TOTAL >> 1 + +// This does the same for a complete bit tree. +// (A tree represented as an array.) +#define bittree_reset(probs, bit_levels) \ + for (uint32_t bt_i = 0; bt_i < (1 << (bit_levels)); ++bt_i) \ + bit_reset((probs)[bt_i]) + + +////////////////////// +// Type definitions // +////////////////////// + +// Bit coder speed optimization +// uint16_t is enough for probability, but usually uint32_t is faster and it +// doesn't waste too much memory. If uint64_t is fastest on 64-bit CPU, you +// probably want to use that instead of uint32_t. With uint64_t you will +// waste RAM _at maximum_ of 4.5 MiB (same for both encoding and decoding). +typedef uint32_t probability; + +#endif diff --git a/src/liblzma/rangecoder/range_decoder.h b/src/liblzma/rangecoder/range_decoder.h new file mode 100644 index 00000000..0583faaf --- /dev/null +++ b/src/liblzma/rangecoder/range_decoder.h @@ -0,0 +1,189 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_decoder.h +/// \brief Range Decoder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_DECODER_H +#define LZMA_RANGE_DECODER_H + +#include "range_common.h" + + +typedef struct { + uint32_t range; + uint32_t code; +} lzma_range_decoder; + + +/// Makes local copies of range decoder variables. +#define rc_to_local(rc) \ + uint32_t rc_range = (rc).range; \ + uint32_t rc_code = (rc).code; \ + uint32_t rc_bound + +/// Stores the local copes back to the range decoder structure. +#define rc_from_local(rc) \ +do {\ + (rc).range = rc_range; \ + (rc).code = rc_code; \ +} while (0) + +/// Resets the range decoder structure. +#define rc_reset(rc) \ +do { \ + (rc).range = UINT32_MAX; \ + (rc).code = 0; \ +} while (0) + + +// All of the macros in this file expect the following variables being defined: +// - uint32_t rc_range; +// - uint32_t rc_code; +// - uint32_t rc_bound; // Temporary variable +// - uint8_t *in; +// - size_t in_pos_local; // Local alias for *in_pos + + +////////////////// +// Buffer "I/O" // +////////////////// + +// Read the next byte of compressed data from buffer_in, if needed. +#define rc_normalize() \ +do { \ + if (rc_range < TOP_VALUE) { \ + rc_range <<= SHIFT_BITS; \ + rc_code = (rc_code << SHIFT_BITS) | in[in_pos_local++]; \ + } \ +} while (0) + + +////////////////// +// Bit decoding // +////////////////// + +// Range decoder's DecodeBit() is splitted into three macros: +// if_bit_0(prob) { +// update_bit_0(prob) +// ... +// } else { +// update_bit_1(prob) +// ... +// } + +#define if_bit_0(prob) \ + rc_normalize(); \ + rc_bound = (rc_range >> BIT_MODEL_TOTAL_BITS) * (prob); \ + if (rc_code < rc_bound) + + +#define update_bit_0(prob) \ +do { \ + rc_range = rc_bound; \ + prob += (BIT_MODEL_TOTAL - (prob)) >> MOVE_BITS; \ +} while (0) + + +#define update_bit_1(prob) \ +do { \ + rc_range -= rc_bound; \ + rc_code -= rc_bound; \ + prob -= (prob) >> MOVE_BITS; \ +} while (0) + + +// Dummy versions don't update prob. +#define update_bit_0_dummy() \ + rc_range = rc_bound + + +#define update_bit_1_dummy() \ +do { \ + rc_range -= rc_bound; \ + rc_code -= rc_bound; \ +} while (0) + + +/////////////////////// +// Bit tree decoding // +/////////////////////// + +#define bittree_decode(target, probs, bit_levels) \ +do { \ + uint32_t model_index = 1; \ + for (uint32_t bit_index = (bit_levels); bit_index != 0; --bit_index) { \ + if_bit_0((probs)[model_index]) { \ + update_bit_0((probs)[model_index]); \ + model_index <<= 1; \ + } else { \ + update_bit_1((probs)[model_index]); \ + model_index = (model_index << 1) | 1; \ + } \ + } \ + target += model_index - (1 << bit_levels); \ +} while (0) + + +#define bittree_reverse_decode(target, probs, bit_levels) \ +do { \ + uint32_t model_index = 1; \ + for (uint32_t bit_index = 0; bit_index < bit_levels; ++bit_index) { \ + if_bit_0((probs)[model_index]) { \ + update_bit_0((probs)[model_index]); \ + model_index <<= 1; \ + } else { \ + update_bit_1((probs)[model_index]); \ + model_index = (model_index << 1) | 1; \ + target += 1 << bit_index; \ + } \ + } \ +} while (0) + + +// Dummy versions don't update prob. +#define bittree_decode_dummy(target, probs, bit_levels) \ +do { \ + uint32_t model_index = 1; \ + for (uint32_t bit_index = (bit_levels); bit_index != 0; --bit_index) { \ + if_bit_0((probs)[model_index]) { \ + update_bit_0_dummy(); \ + model_index <<= 1; \ + } else { \ + update_bit_1_dummy(); \ + model_index = (model_index << 1) | 1; \ + } \ + } \ + target += model_index - (1 << bit_levels); \ +} while (0) + + +#define bittree_reverse_decode_dummy(probs, bit_levels) \ +do { \ + uint32_t model_index = 1; \ + for (uint32_t bit_index = 0; bit_index < bit_levels; ++bit_index) { \ + if_bit_0((probs)[model_index]) { \ + update_bit_0_dummy(); \ + model_index <<= 1; \ + } else { \ + update_bit_1_dummy(); \ + model_index = (model_index << 1) | 1; \ + } \ + } \ +} while (0) + +#endif diff --git a/src/liblzma/rangecoder/range_encoder.c b/src/liblzma/rangecoder/range_encoder.c new file mode 100644 index 00000000..f03bd873 --- /dev/null +++ b/src/liblzma/rangecoder/range_encoder.c @@ -0,0 +1,46 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_encoder.c +/// \brief Static initializations for the range encoder's prices array +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "range_encoder.h" + + +#define NUM_BITS (BIT_MODEL_TOTAL_BITS - MOVE_REDUCING_BITS) + + +uint32_t lzma_rc_prob_prices[BIT_MODEL_TOTAL >> MOVE_REDUCING_BITS]; + + +extern void +lzma_rc_init(void) +{ + // Initialize lzma_rc_prob_prices[]. + for (int i = NUM_BITS - 1; i >= 0; --i) { + const uint32_t start = 1 << (NUM_BITS - i - 1); + const uint32_t end = 1 << (NUM_BITS - i); + + for (uint32_t j = start; j < end; ++j) { + lzma_rc_prob_prices[j] = (i << BIT_PRICE_SHIFT_BITS) + + (((end - j) << BIT_PRICE_SHIFT_BITS) + >> (NUM_BITS - i - 1)); + } + } + + return; +} diff --git a/src/liblzma/rangecoder/range_encoder.h b/src/liblzma/rangecoder/range_encoder.h new file mode 100644 index 00000000..d513cfd1 --- /dev/null +++ b/src/liblzma/rangecoder/range_encoder.h @@ -0,0 +1,317 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_encoder.h +/// \brief Range Encoder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_ENCODER_H +#define LZMA_RANGE_ENCODER_H + +#include "range_common.h" + + +// Allow #including this file even if RC_TEMP_BUFFER_SIZE isn't defined. +#ifdef RC_BUFFER_SIZE +typedef struct { + uint64_t low; + uint32_t range; + uint32_t cache_size; + uint8_t cache; + uint8_t buffer[RC_BUFFER_SIZE]; + size_t buffer_size; +} lzma_range_encoder; +#endif + + +/// Makes local copies of range encoder variables. +#define rc_to_local(rc) \ + uint64_t rc_low = (rc).low; \ + uint32_t rc_range = (rc).range; \ + uint32_t rc_cache_size = (rc).cache_size; \ + uint8_t rc_cache = (rc).cache; \ + uint8_t *rc_buffer = (rc).buffer; \ + size_t rc_buffer_size = (rc).buffer_size + +/// Stores the local copes back to the range encoder structure. +#define rc_from_local(rc) \ +do { \ + (rc).low = rc_low; \ + (rc).range = rc_range; \ + (rc).cache_size = rc_cache_size; \ + (rc).cache = rc_cache; \ + (rc).buffer_size = rc_buffer_size; \ +} while (0) + +/// Resets the range encoder structure. +#define rc_reset(rc) \ +do { \ + (rc).low = 0; \ + (rc).range = 0xFFFFFFFF; \ + (rc).cache_size = 1; \ + (rc).cache = 0; \ + (rc).buffer_size = 0; \ +} while (0) + + +////////////////// +// Bit encoding // +////////////////// + +// These macros expect that the following variables are defined: +// - uint64_t rc_low; +// - uint32_t rc_range; +// - uint8_t rc_cache; +// - uint32_t rc_cache_size; +// - uint8_t *out; +// - size_t out_pos_local; // Local copy of *out_pos +// - size_t size_out; + + +// Combined from NRangeCoder::CEncoder::Encode() +// and NRangeCoder::CEncoder::UpdateModel(). +#define bit_encode(prob, symbol) \ +do { \ + probability rc_prob = prob; \ + const uint32_t rc_bound \ + = (rc_range >> BIT_MODEL_TOTAL_BITS) * rc_prob; \ + if ((symbol) == 0) { \ + rc_range = rc_bound; \ + rc_prob += (BIT_MODEL_TOTAL - rc_prob) >> MOVE_BITS; \ + } else { \ + rc_low += rc_bound; \ + rc_range -= rc_bound; \ + rc_prob -= rc_prob >> MOVE_BITS; \ + } \ + prob = rc_prob; \ + rc_normalize(); \ +} while (0) + + +// Optimized version of bit_encode(prob, 0) +#define bit_encode_0(prob) \ +do { \ + probability rc_prob = prob; \ + rc_range = (rc_range >> BIT_MODEL_TOTAL_BITS) * rc_prob; \ + rc_prob += (BIT_MODEL_TOTAL - rc_prob) >> MOVE_BITS; \ + prob = rc_prob; \ + rc_normalize(); \ +} while (0) + + +// Optimized version of bit_encode(prob, 1) +#define bit_encode_1(prob) \ +do { \ + probability rc_prob = prob; \ + const uint32_t rc_bound = (rc_range >> BIT_MODEL_TOTAL_BITS) \ + * rc_prob; \ + rc_low += rc_bound; \ + rc_range -= rc_bound; \ + rc_prob -= rc_prob >> MOVE_BITS; \ + prob = rc_prob; \ + rc_normalize(); \ +} while (0) + + +/////////////////////// +// Bit tree encoding // +/////////////////////// + +#define bittree_encode(probs, bit_levels, symbol) \ +do { \ + uint32_t model_index = 1; \ + for (int32_t bit_index = bit_levels - 1; \ + bit_index >= 0; --bit_index) { \ + const uint32_t bit = ((symbol) >> bit_index) & 1; \ + bit_encode((probs)[model_index], bit); \ + model_index = (model_index << 1) | bit; \ + } \ +} while (0) + + +#define bittree_reverse_encode(probs, bit_levels, symbol) \ +do { \ + uint32_t model_index = 1; \ + for (uint32_t bit_index = 0; bit_index < bit_levels; ++bit_index) { \ + const uint32_t bit = ((symbol) >> bit_index) & 1; \ + bit_encode((probs)[model_index], bit); \ + model_index = (model_index << 1) | bit; \ + } \ +} while (0) + + +///////////////// +// Direct bits // +///////////////// + +#define rc_encode_direct_bits(value, num_total_bits) \ +do { \ + for (int32_t rc_i = (num_total_bits) - 1; rc_i >= 0; --rc_i) { \ + rc_range >>= 1; \ + if ((((value) >> rc_i) & 1) == 1) \ + rc_low += rc_range; \ + rc_normalize(); \ + } \ +} while (0) + + +////////////////// +// Buffer "I/O" // +////////////////// + +// Calls rc_shift_low() to write out a byte if needed. +#define rc_normalize() \ +do { \ + if (rc_range < TOP_VALUE) { \ + rc_range <<= SHIFT_BITS; \ + rc_shift_low(); \ + } \ +} while (0) + + +// Flushes all the pending output. +#define rc_flush() \ + for (int32_t rc_i = 0; rc_i < 5; ++rc_i) \ + rc_shift_low() + + +// Writes the compressed data to next_out. +// TODO: Notation change? +// (uint32_t)(0xFF000000) => ((uint32_t)(0xFF) << TOP_BITS) +// TODO: Another notation change? +// rc_low = (uint32_t)(rc_low) << SHIFT_BITS; +// => +// rc_low &= TOP_VALUE - 1; +// rc_low <<= SHIFT_BITS; +#define rc_shift_low() \ +do { \ + if ((uint32_t)(rc_low) < (uint32_t)(0xFF000000) \ + || (uint32_t)(rc_low >> 32) != 0) { \ + uint8_t rc_temp = rc_cache; \ + do { \ + rc_write_byte(rc_temp + (uint8_t)(rc_low >> 32)); \ + rc_temp = 0xFF; \ + } while(--rc_cache_size != 0); \ + rc_cache = (uint8_t)((uint32_t)(rc_low) >> 24); \ + } \ + ++rc_cache_size; \ + rc_low = (uint32_t)(rc_low) << SHIFT_BITS; \ +} while (0) + + +// Write one byte of compressed data to *next_out. Updates out_pos_local. +// If out_pos_local == out_size, the byte is appended to rc_buffer. +#define rc_write_byte(b) \ +do { \ + if (out_pos_local == out_size) { \ + rc_buffer[rc_buffer_size++] = (uint8_t)(b); \ + assert(rc_buffer_size < RC_BUFFER_SIZE); \ + } else { \ + assert(rc_buffer_size == 0); \ + out[out_pos_local++] = (uint8_t)(b); \ + } \ +} while (0) + + +////////////////// +// Price macros // +////////////////// + +// These macros expect that the following variables are defined: +// - uint32_t lzma_rc_prob_prices; + +#define bit_get_price(prob, symbol) \ + lzma_rc_prob_prices[((((prob) - (symbol)) ^ (-(symbol))) \ + & (BIT_MODEL_TOTAL - 1)) >> MOVE_REDUCING_BITS] + + +#define bit_get_price_0(prob) \ + lzma_rc_prob_prices[(prob) >> MOVE_REDUCING_BITS] + + +#define bit_get_price_1(prob) \ + lzma_rc_prob_prices[(BIT_MODEL_TOTAL - (prob)) >> MOVE_REDUCING_BITS] + + +// Adds price to price_target. TODO Optimize/Cleanup? +#define bittree_get_price(price_target, probs, bit_levels, symbol) \ +do { \ + uint32_t bittree_symbol = (symbol) | (UINT32_C(1) << bit_levels); \ + while (bittree_symbol != 1) { \ + price_target += bit_get_price((probs)[bittree_symbol >> 1], \ + bittree_symbol & 1); \ + bittree_symbol >>= 1; \ + } \ +} while (0) + + +// Adds price to price_target. +#define bittree_reverse_get_price(price_target, probs, bit_levels, symbol) \ +do { \ + uint32_t model_index = 1; \ + for (uint32_t bit_index = 0; bit_index < bit_levels; ++bit_index) { \ + const uint32_t bit = ((symbol) >> bit_index) & 1; \ + price_target += bit_get_price((probs)[model_index], bit); \ + model_index = (model_index << 1) | bit; \ + } \ +} while (0) + + +////////////////////// +// Global variables // +////////////////////// + +// Probability prices used by *_get_price() macros. This is initialized +// by lzma_rc_init() and is not modified later. +extern uint32_t lzma_rc_prob_prices[BIT_MODEL_TOTAL >> MOVE_REDUCING_BITS]; + + +/////////////// +// Functions // +/////////////// + +/// Initializes lzma_rc_prob_prices[]. This needs to be called only once. +extern void lzma_rc_init(void); + + +#ifdef RC_BUFFER_SIZE +/// Flushes data from rc->temp[] to out[] as much as possible. If everything +/// cannot be flushed, returns true; false otherwise. +static inline bool +rc_flush_buffer(lzma_range_encoder *rc, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + if (rc->buffer_size > 0) { + const size_t out_avail = out_size - *out_pos; + if (rc->buffer_size > out_avail) { + memcpy(out + *out_pos, rc->buffer, out_avail); + *out_pos += out_avail; + rc->buffer_size -= out_avail; + memmove(rc->buffer, rc->buffer + out_avail, + rc->buffer_size); + return true; + } + + memcpy(out + *out_pos, rc->buffer, rc->buffer_size); + *out_pos += rc->buffer_size; + rc->buffer_size = 0; + } + + return false; +} +#endif + +#endif diff --git a/src/liblzma/simple/Makefile.am b/src/liblzma/simple/Makefile.am new file mode 100644 index 00000000..a37f1eb5 --- /dev/null +++ b/src/liblzma/simple/Makefile.am @@ -0,0 +1,46 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +noinst_LTLIBRARIES = libsimple.la +libsimple_la_CPPFLAGS = \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_srcdir@/src/liblzma/common +libsimple_la_SOURCES = \ + simple_coder.c \ + simple_coder.h \ + simple_private.h + +if COND_FILTER_X86 +libsimple_la_SOURCES += x86.c +endif + +if COND_FILTER_POWERPC +libsimple_la_SOURCES += powerpc.c +endif + +if COND_FILTER_IA64 +libsimple_la_SOURCES += ia64.c +endif + +if COND_FILTER_ARM +libsimple_la_SOURCES += arm.c +endif + +if COND_FILTER_ARMTHUMB +libsimple_la_SOURCES += armthumb.c +endif + +if COND_FILTER_SPARC +libsimple_la_SOURCES += sparc.c +endif diff --git a/src/liblzma/simple/arm.c b/src/liblzma/simple/arm.c new file mode 100644 index 00000000..284371c3 --- /dev/null +++ b/src/liblzma/simple/arm.c @@ -0,0 +1,76 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file arm.c +/// \brief Filter for ARM binaries +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +arm_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + uint32_t i; + for (i = 0; i + 4 <= size; i += 4) { + if (buffer[i + 3] == 0xEB) { + uint32_t src = (buffer[i + 2] << 16) + | (buffer[i + 1] << 8) + | (buffer[i + 0]); + src <<= 2; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + 8 + src; + else + dest = src - (now_pos + (uint32_t)(i) + 8); + + dest >>= 2; + buffer[i + 2] = (dest >> 16); + buffer[i + 1] = (dest >> 8); + buffer[i + 0] = dest; + } + } + + return i; +} + + +static lzma_ret +arm_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &arm_code, 0, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_arm_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_arm_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/simple/armthumb.c b/src/liblzma/simple/armthumb.c new file mode 100644 index 00000000..51ed2c43 --- /dev/null +++ b/src/liblzma/simple/armthumb.c @@ -0,0 +1,81 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file armthumb.c +/// \brief Filter for ARM-Thumb binaries +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +armthumb_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + uint32_t i; + for (i = 0; i + 4 <= size; i += 2) { + if ((buffer[i + 1] & 0xF8) == 0xF0 + && (buffer[i + 3] & 0xF8) == 0xF8) { + uint32_t src = ((buffer[i + 1] & 0x7) << 19) + | (buffer[i + 0] << 11) + | ((buffer[i + 3] & 0x7) << 8) + | (buffer[i + 2]); + + src <<= 1; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + 4 + src; + else + dest = src - (now_pos + (uint32_t)(i) + 4); + + dest >>= 1; + buffer[i + 1] = 0xF0 | ((dest >> 19) & 0x7); + buffer[i + 0] = (dest >> 11); + buffer[i + 3] = 0xF8 | ((dest >> 8) & 0x7); + buffer[i + 2] = (dest); + i += 2; + } + } + + return i; +} + + +static lzma_ret +armthumb_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &armthumb_code, 0, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_armthumb_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return armthumb_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_armthumb_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return armthumb_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/simple/ia64.c b/src/liblzma/simple/ia64.c new file mode 100644 index 00000000..59830521 --- /dev/null +++ b/src/liblzma/simple/ia64.c @@ -0,0 +1,117 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file ia64.c +/// \brief Filter for IA64 (Itanium) binaries +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +ia64_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + static const uint32_t BRANCH_TABLE[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 + }; + + size_t i; + for (i = 0; i + 16 <= size; i += 16) { + const uint32_t instr_template = buffer[i] & 0x1F; + const uint32_t mask = BRANCH_TABLE[instr_template]; + uint32_t bit_pos = 5; + + for (size_t slot = 0; slot < 3; ++slot, bit_pos += 41) { + if (((mask >> slot) & 1) == 0) + continue; + + const size_t byte_pos = (bit_pos >> 3); + const uint32_t bit_res = bit_pos & 0x7; + uint64_t instruction = 0; + + for (size_t j = 0; j < 6; ++j) + instruction += (uint64_t)( + buffer[i + j + byte_pos]) + << (8 * j); + + uint64_t inst_norm = instruction >> bit_res; + + if (((inst_norm >> 37) & 0xF) == 0x5 + && ((inst_norm >> 9) & 0x7) == 0 + /* && (inst_norm & 0x3F)== 0 */ + ) { + uint32_t src = (uint32_t)( + (inst_norm >> 13) & 0xFFFFF); + src |= ((inst_norm >> 36) & 1) << 20; + + src <<= 4; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + dest >>= 4; + + inst_norm &= ~((uint64_t)(0x8FFFFF) << 13); + inst_norm |= (uint64_t)(dest & 0xFFFFF) << 13; + inst_norm |= (uint64_t)(dest & 0x100000) + << (36 - 20); + + instruction &= (1 << bit_res) - 1; + instruction |= (inst_norm << bit_res); + + for (size_t j = 0; j < 6; j++) + buffer[i + j + byte_pos] = (uint8_t)( + instruction + >> (8 * j)); + } + } + } + + return i; +} + + +static lzma_ret +ia64_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &ia64_code, 0, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_ia64_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return ia64_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_ia64_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return ia64_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/simple/powerpc.c b/src/liblzma/simple/powerpc.c new file mode 100644 index 00000000..a3089fca --- /dev/null +++ b/src/liblzma/simple/powerpc.c @@ -0,0 +1,80 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file powerpc.c +/// \brief Filter for PowerPC (big endian) binaries +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +powerpc_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + // PowerPC branch 6(48) 24(Offset) 1(Abs) 1(Link) + if ((buffer[i] >> 2) == 0x12 + && ((buffer[i + 3] & 3) == 1)) { + + const uint32_t src = ((buffer[i + 0] & 3) << 24) + | (buffer[i + 1] << 16) + | (buffer[i + 2] << 8) + | (buffer[i + 3] & (~3)); + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + buffer[i + 0] = 0x48 | ((dest >> 24) & 0x03); + buffer[i + 1] = (dest >> 16); + buffer[i + 2] = (dest >> 8); + buffer[i + 3] &= 0x03; + buffer[i + 3] |= dest; + } + } + + return i; +} + + +static lzma_ret +powerpc_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &powerpc_code, 0, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_powerpc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return powerpc_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_powerpc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return powerpc_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/simple/simple_coder.c b/src/liblzma/simple/simple_coder.c new file mode 100644 index 00000000..f60dff34 --- /dev/null +++ b/src/liblzma/simple/simple_coder.c @@ -0,0 +1,306 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_coder.c +/// \brief Wrapper for simple filters +/// +/// Simple filters don't change the size of the data i.e. number of bytes +/// in equals the number of bytes out. +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +/// Copied or encodes/decodes more data to out[]. Checks and updates +/// uncompressed_size when we are the last coder in the chain. +/// If we aren't the last filter in the chain, we don't need to care about +/// uncompressed size, since we don't change it; the next filter in the +/// chain will check it anyway. +static lzma_ret +copy_or_code(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(!coder->end_was_reached); + + if (coder->next.code == NULL) { + const size_t in_avail = in_size - *in_pos; + + if (coder->is_encoder) { + if (action == LZMA_FINISH) { + // If uncompressed size is known and the + // amount of available input doesn't match + // the uncompressed size, return an error. + if (coder->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN + && coder->uncompressed_size + != in_avail) + return LZMA_DATA_ERROR; + + } else if (coder->uncompressed_size + < (lzma_vli)(in_avail)) { + // There is too much input available. + return LZMA_DATA_ERROR; + } + } else { + // Limit in_size so that we don't copy too much. + if ((lzma_vli)(in_avail) > coder->uncompressed_size) + in_size = *in_pos + (size_t)( + coder->uncompressed_size); + } + + // Store the old position so we can update uncompressed_size. + const size_t out_start = *out_pos; + + // Copy the data + bufcpy(in, in_pos, in_size, out, out_pos, out_size); + + // Update uncompressed_size. + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + coder->uncompressed_size -= *out_pos - out_start; + + // Check if end of stream was reached. + if (coder->is_encoder) { + if (action == LZMA_FINISH && *in_pos == in_size) + coder->end_was_reached = true; + } else { + if (coder->uncompressed_size == 0) + coder->end_was_reached = true; + } + + } else { + // Call the next coder in the chain to provide us some data. + // We don't care about uncompressed_size here, because + // the next filter in the chain will do it for us (since + // we don't change the size of the data). + const lzma_ret ret = coder->next.code( + coder->next.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + + if (ret == LZMA_STREAM_END) { + assert(!coder->is_encoder + || action == LZMA_FINISH); + coder->end_was_reached = true; + + } else if (ret != LZMA_OK) { + return ret; + } + } + + return LZMA_OK; +} + + +static size_t +call_filter(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t filtered = coder->filter(coder->simple, + coder->now_pos, coder->is_encoder, + buffer, size); + coder->now_pos += filtered; + return filtered; +} + + +static lzma_ret +simple_code(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Flush already filtered data from coder->buffer[] to out[]. + if (coder->pos < coder->filtered) { + bufcpy(coder->buffer, &coder->pos, coder->filtered, + out, out_pos, out_size); + + // If we couldn't flush all the filtered data, return to + // application immediatelly. + if (coder->pos < coder->filtered) + return LZMA_OK; + + if (coder->end_was_reached) { + assert(coder->filtered == coder->size); + return LZMA_STREAM_END; + } + } + + // If we get here, there is no filtered data left in the buffer. + coder->filtered = 0; + + assert(!coder->end_was_reached); + + // If there is more output space left than there is unfiltered data + // in coder->buffer[], flush coder->buffer[] to out[], and copy/code + // more data to out[] hopefully filling it completely. Then filter + // the data in out[]. This step is where most of the data gets + // filtered if the buffer sizes used by the application are reasonable. + const size_t out_avail = out_size - *out_pos; + const size_t buf_avail = coder->size - coder->pos; + if (out_avail > buf_avail) { + // Store the old position so that we know from which byte + // to start filtering. + const size_t out_start = *out_pos; + + // Flush data from coder->buffer[] to out[], but don't reset + // coder->pos and coder->size yet. This way the coder can be + // restarted if the next filter in the chain returns e.g. + // LZMA_MEM_ERROR. + memcpy(out + *out_pos, coder->buffer + coder->pos, buf_avail); + *out_pos += buf_avail; + + // Copy/Encode/Decode more data to out[]. + { + const lzma_ret ret = copy_or_code(coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + assert(ret != LZMA_STREAM_END); + if (ret != LZMA_OK) + return ret; + } + + // Filter out[]. + const size_t size = *out_pos - out_start; + const size_t filtered = call_filter( + coder, out + out_start, size); + + const size_t unfiltered = size - filtered; + assert(unfiltered <= coder->allocated / 2); + + // Now we can update coder->pos and coder->size, because + // the next coder in the chain (if any) was successful. + coder->pos = 0; + coder->size = unfiltered; + + if (coder->end_was_reached) { + // The last byte has been copied to out[] already. + // They are left as is. + coder->size = 0; + + } else if (unfiltered > 0) { + // There is unfiltered data left in out[]. Copy it to + // coder->buffer[] and rewind *out_pos appropriately. + *out_pos -= unfiltered; + memcpy(coder->buffer, out + *out_pos, unfiltered); + } + } else if (coder->pos > 0) { + memmove(coder->buffer, coder->buffer + coder->pos, buf_avail); + coder->size -= coder->pos; + coder->pos = 0; + } + + assert(coder->pos == 0); + + // If coder->buffer[] isn't empty, try to fill it by copying/decoding + // more data. Then filter coder->buffer[] and copy the successfully + // filtered data to out[]. It is probable, that some filtered and + // unfiltered data will be left to coder->buffer[]. + if (coder->size > 0) { + { + const lzma_ret ret = copy_or_code(coder, allocator, + in, in_pos, in_size, + coder->buffer, &coder->size, + coder->allocated, action); + assert(ret != LZMA_STREAM_END); + if (ret != LZMA_OK) + return ret; + } + + coder->filtered = call_filter( + coder, coder->buffer, coder->size); + + // Everything is considered to be filtered if coder->buffer[] + // contains the last bytes of the data. + if (coder->end_was_reached) + coder->filtered = coder->size; + + // Flush as much as possible. + bufcpy(coder->buffer, &coder->pos, coder->filtered, + out, out_pos, out_size); + } + + // Check if we got everything done. + if (coder->end_was_reached && coder->pos == coder->size) + return LZMA_STREAM_END; + + return LZMA_OK; +} + + +static void +simple_coder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_free(coder->simple, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_simple_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size), + size_t simple_size, size_t unfiltered_max, bool is_encoder) +{ + // Allocate memory for the lzma_coder structure if needed. + if (next->coder == NULL) { + // Here we allocate space also for the temporary buffer. We + // need twice the size of unfiltered_max, because then it + // is always possible to filter at least unfiltered_max bytes + // more data in coder->buffer[] if it can be filled completely. + next->coder = lzma_alloc(sizeof(lzma_coder) + + 2 * unfiltered_max, allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &simple_code; + next->end = &simple_coder_end; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->filter = filter; + next->coder->allocated = 2 * unfiltered_max; + + // Allocate memory for filter-specific data structure. + if (simple_size > 0) { + next->coder->simple = lzma_alloc( + simple_size, allocator); + if (next->coder->simple == NULL) + return LZMA_MEM_ERROR; + } else { + next->coder->simple = NULL; + } + } + + if (filters[0].options != NULL) { + const lzma_options_simple *simple = filters[0].options; + next->coder->now_pos = simple->start_offset; + } else { + next->coder->now_pos = 0; + } + + // Reset variables. + next->coder->is_encoder = is_encoder; + next->coder->end_was_reached = false; + next->coder->uncompressed_size = filters[0].uncompressed_size; + next->coder->pos = 0; + next->coder->filtered = 0; + next->coder->size = 0; + + return lzma_next_filter_init( + &next->coder->next, allocator, filters + 1); +} diff --git a/src/liblzma/simple/simple_coder.h b/src/liblzma/simple/simple_coder.h new file mode 100644 index 00000000..b682f84f --- /dev/null +++ b/src/liblzma/simple/simple_coder.h @@ -0,0 +1,68 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_coder.h +/// \brief Wrapper for simple filters +// +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_CODER_H +#define LZMA_SIMPLE_CODER_H + +#include "common.h" + + +extern lzma_ret lzma_simple_x86_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_x86_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_powerpc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_powerpc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_ia64_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_ia64_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_arm_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_arm_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_armthumb_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_armthumb_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_sparc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_sparc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/src/liblzma/simple/simple_private.h b/src/liblzma/simple/simple_private.h new file mode 100644 index 00000000..a512396c --- /dev/null +++ b/src/liblzma/simple/simple_private.h @@ -0,0 +1,86 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_private.h +/// \brief Private definitions for so called simple filters +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_PRIVATE_H +#define LZMA_SIMPLE_PRIVATE_H + +#include "simple_coder.h" + + +typedef struct lzma_simple_s lzma_simple; + +struct lzma_coder_s { + /// Next filter in the chain + lzma_next_coder next; + + /// True if the next coder in the chain has returned LZMA_STREAM_END + /// or if we have processed uncompressed_size bytes. + bool end_was_reached; + + /// True if filter() should encode the data; false to decode. + /// Currently all simple filters use the same function for encoding + /// and decoding, because the difference between encoders and decoders + /// is very small. + bool is_encoder; + + /// Size of the data *left* to be processed, or LZMA_VLI_VALUE_UNKNOWN + /// if unknown. + lzma_vli uncompressed_size; + + /// Pointer to filter-specific function, which does + /// the actual filtering. + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size); + + /// Pointer to filter-specific data, or NULL if filter doesn't need + /// any extra data. + lzma_simple *simple; + + /// The lowest 32 bits of the current position in the data. Most + /// filters need this to do conversions between absolute and relative + /// addresses. + uint32_t now_pos; + + /// Size of the memory allocated for the buffer. + size_t allocated; + + /// Flushing position in the temporary buffer. buffer[pos] is the + /// next byte to be copied to out[]. + size_t pos; + + /// buffer[filtered] is the first unfiltered byte. When pos is smaller + /// than filtered, there is unflushed filtered data in the buffer. + size_t filtered; + + /// Total number of bytes (both filtered and unfiltered) currently + /// in the temporary buffer. + size_t size; + + /// Temporary buffer + uint8_t buffer[]; +}; + + +extern lzma_ret lzma_simple_coder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters, + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size), + size_t simple_size, size_t unfiltered_max, bool is_encoder); + +#endif diff --git a/src/liblzma/simple/sparc.c b/src/liblzma/simple/sparc.c new file mode 100644 index 00000000..09bc67f1 --- /dev/null +++ b/src/liblzma/simple/sparc.c @@ -0,0 +1,88 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sparc.c +/// \brief Filter for SPARC binaries +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +sparc_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + + if ((buffer[i] == 0x40 && (buffer[i + 1] & 0xC0) == 0x00) + || (buffer[i] == 0x7F + && (buffer[i + 1] & 0xC0) == 0xC0)) { + + uint32_t src = ((uint32_t)buffer[i + 0] << 24) + | ((uint32_t)buffer[i + 1] << 16) + | ((uint32_t)buffer[i + 2] << 8) + | ((uint32_t)buffer[i + 3]); + + src <<= 2; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + dest >>= 2; + + dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) + | (dest & 0x3FFFFF) + | 0x40000000; + + buffer[i + 0] = (uint8_t)(dest >> 24); + buffer[i + 1] = (uint8_t)(dest >> 16); + buffer[i + 2] = (uint8_t)(dest >> 8); + buffer[i + 3] = (uint8_t)(dest); + } + } + + return i; +} + + +static lzma_ret +sparc_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &sparc_code, 0, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_sparc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return sparc_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_sparc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return sparc_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/simple/x86.c b/src/liblzma/simple/x86.c new file mode 100644 index 00000000..a48a5750 --- /dev/null +++ b/src/liblzma/simple/x86.c @@ -0,0 +1,161 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file x86.c +/// \brief Filter for x86 binaries (BCJ filter) +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) + + +struct lzma_simple_s { + uint32_t prev_mask; + uint32_t prev_pos; +}; + + +static size_t +x86_code(lzma_simple *simple, uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + static const bool MASK_TO_ALLOWED_STATUS[8] + = { true, true, true, false, true, false, false, false }; + + static const uint32_t MASK_TO_BIT_NUMBER[8] + = { 0, 1, 2, 2, 3, 3, 3, 3 }; + + uint32_t prev_mask = simple->prev_mask; + uint32_t prev_pos = simple->prev_pos; + + if (size < 5) + return 0; + + if (now_pos - prev_pos > 5) + prev_pos = now_pos - 5; + + const size_t limit = size - 5; + size_t buffer_pos = 0; + + while (buffer_pos <= limit) { + uint8_t b = buffer[buffer_pos]; + if (b != 0xE8 && b != 0xE9) { + ++buffer_pos; + continue; + } + + const uint32_t offset = now_pos + (uint32_t)(buffer_pos) + - prev_pos; + prev_pos = now_pos + (uint32_t)(buffer_pos); + + if (offset > 5) { + prev_mask = 0; + } else { + for (uint32_t i = 0; i < offset; ++i) { + prev_mask &= 0x77; + prev_mask <<= 1; + } + } + + b = buffer[buffer_pos + 4]; + + if (Test86MSByte(b) + && MASK_TO_ALLOWED_STATUS[(prev_mask >> 1) & 0x7] + && (prev_mask >> 1) < 0x10) { + + uint32_t src = ((uint32_t)(b) << 24) + | ((uint32_t)(buffer[buffer_pos + 3]) << 16) + | ((uint32_t)(buffer[buffer_pos + 2]) << 8) + | (buffer[buffer_pos + 1]); + + uint32_t dest; + while (true) { + if (is_encoder) + dest = src + (now_pos + (uint32_t)( + buffer_pos) + 5); + else + dest = src - (now_pos + (uint32_t)( + buffer_pos) + 5); + + if (prev_mask == 0) + break; + + const uint32_t i = MASK_TO_BIT_NUMBER[ + prev_mask >> 1]; + + b = (uint8_t)(dest >> (24 - i * 8)); + + if (!Test86MSByte(b)) + break; + + src = dest ^ ((1 << (32 - i * 8)) - 1); + } + + buffer[buffer_pos + 4] + = (uint8_t)(~(((dest >> 24) & 1) - 1)); + buffer[buffer_pos + 3] = (uint8_t)(dest >> 16); + buffer[buffer_pos + 2] = (uint8_t)(dest >> 8); + buffer[buffer_pos + 1] = (uint8_t)(dest); + buffer_pos += 5; + prev_mask = 0; + + } else { + ++buffer_pos; + prev_mask |= 1; + if (Test86MSByte(b)) + prev_mask |= 0x10; + } + } + + simple->prev_mask = prev_mask; + simple->prev_pos = prev_pos; + + return buffer_pos; +} + + +static lzma_ret +x86_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters, + &x86_code, sizeof(lzma_simple), 5, is_encoder); + + if (ret == LZMA_OK) { + next->coder->simple->prev_mask = 0; + next->coder->simple->prev_pos = (uint32_t)(-5); + } + + return ret; +} + + +extern lzma_ret +lzma_simple_x86_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return x86_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_x86_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return x86_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/subblock/Makefile.am b/src/liblzma/subblock/Makefile.am new file mode 100644 index 00000000..8f2daf59 --- /dev/null +++ b/src/liblzma/subblock/Makefile.am @@ -0,0 +1,33 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +noinst_LTLIBRARIES = libsubblock.la +libsubblock_la_SOURCES = +libsubblock_la_CPPFLAGS = \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_srcdir@/src/liblzma/common + +if COND_MAIN_ENCODER +libsubblock_la_SOURCES += \ + subblock_encoder.c \ + subblock_encoder.h +endif + +if COND_MAIN_DECODER +libsubblock_la_SOURCES += \ + subblock_decoder.c \ + subblock_decoder.h \ + subblock_decoder_helper.c \ + subblock_decoder_helper.h +endif diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c new file mode 100644 index 00000000..ef3c289f --- /dev/null +++ b/src/liblzma/subblock/subblock_decoder.c @@ -0,0 +1,681 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder.c +/// \brief Decoder of the Subblock filter +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "subblock_decoder.h" +#include "subblock_decoder_helper.h" +#include "raw_decoder.h" + + +/// Maximum number of consecutive Subblocks with Subblock Type Padding +#define PADDING_MAX 31 + + +struct lzma_coder_s { + lzma_next_coder next; + + enum { + SEQ_FLAGS, + SEQ_SIZE_1, + SEQ_SIZE_2, + SEQ_SIZE_3, + SEQ_DATA, + SEQ_REPEAT_COUNT_1, + SEQ_REPEAT_COUNT_2, + SEQ_REPEAT_COUNT_3, + SEQ_REPEAT_SIZE, + SEQ_REPEAT_READ_DATA, + SEQ_REPEAT_FAST, + SEQ_REPEAT_NORMAL, + SEQ_FILTER_FLAGS, + SEQ_FILTER_END, + } sequence; + + /// Number of bytes left in the current Subblock Data field. + size_t size; + + /// Uncompressed Size, or LZMA_VLI_VALUE_UNKNOWN if unknown. + lzma_vli uncompressed_size; + + /// Number of consecutive Subblocks with Subblock Type Padding + uint32_t padding; + + /// True when .next.code() has returned LZMA_STREAM_END. + bool next_finished; + + /// True when the Subblock decoder has detected End of Payload Marker. + /// This may become true before next_finished becomes true. + bool this_finished; + + /// True if Subfilters are allowed. + bool allow_subfilters; + + /// Indicates if at least one byte of decoded output has been + /// produced after enabling Subfilter. + bool got_output_with_subfilter; + + /// Possible subfilter + lzma_next_coder subfilter; + + /// Filter Flags decoder is needed to parse the ID and Properties + /// of the subfilter. + lzma_next_coder filter_flags_decoder; + + /// The filter_flags_decoder stores its results here. + lzma_options_filter filter_flags; + + /// Options for the Subblock decoder helper. This is used to tell + /// the helper when it should return LZMA_STREAM_END to the subfilter. + lzma_options_subblock_helper helper; + + struct { + /// How many times buffer should be repeated + size_t count; + + /// Size of the buffer + size_t size; + + /// Position in the buffer + size_t pos; + + /// Buffer to hold the data to be repeated + uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX]; + } repeat; + + /// Temporary buffer needed when the Subblock filter is not the last + /// filter in the chain. The output of the next filter is first + /// decoded into buffer[], which is then used as input for the actual + /// Subblock decoder. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +}; + + +/// Values of valid Subblock Flags +enum { + FLAG_PADDING, + FLAG_EOPM, + FLAG_DATA, + FLAG_REPEAT, + FLAG_SET_SUBFILTER, + FLAG_END_SUBFILTER, +}; + + +/// Substracts size from coder->uncompressed_size uncompressed size is known +/// and size isn't bigger than coder->uncompressed_size. +static inline bool +update_uncompressed_size(lzma_coder *coder, size_t size) +{ + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + if ((lzma_vli)(size) > coder->uncompressed_size) + return true; + + coder->uncompressed_size -= size; + } + + return false; +} + + +/// Calls the subfilter and updates coder->uncompressed_size. +static lzma_ret +subfilter_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(coder->subfilter.code != NULL); + + const size_t out_start = *out_pos; + + // Call the subfilter. + const lzma_ret ret = coder->subfilter.code( + coder->subfilter.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, action); + + // Update uncompressed_size. + if (update_uncompressed_size(coder, *out_pos - out_start)) + return LZMA_DATA_ERROR; + + return ret; +} + + +static lzma_ret +decode_buffer(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + while (*out_pos < out_size && (*in_pos < in_size + || coder->sequence == SEQ_DATA)) + switch (coder->sequence) { + case SEQ_FLAGS: { + if ((in[*in_pos] >> 4) != FLAG_PADDING) + coder->padding = 0; + + // Do the correct action depending on the Subblock Type. + switch (in[*in_pos] >> 4) { + case FLAG_PADDING: + // Only check that reserved bits are zero. +// if (++coder->padding > PADDING_MAX +// || in[*in_pos] & 0x0F) +// return LZMA_DATA_ERROR; + ++*in_pos; + break; + + case FLAG_EOPM: + // Check that reserved bits are zero. + if (in[*in_pos] & 0x0F) + return LZMA_DATA_ERROR; + + // There must be no Subfilter enabled. + if (coder->subfilter.code != NULL) + return LZMA_DATA_ERROR; + + // End of Payload Marker must not be used if + // uncompressed size is known. + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + return LZMA_DATA_ERROR; + + ++*in_pos; + return LZMA_STREAM_END; + + case FLAG_DATA: + // First four bits of the Subblock Data size. + coder->size = in[*in_pos] & 0x0F; + ++*in_pos; + coder->got_output_with_subfilter = true; + coder->sequence = SEQ_SIZE_1; + break; + + case FLAG_REPEAT: + // First four bits of the Repeat Count. We use + // coder->size as a temporary place for it. + coder->size = in[*in_pos] & 0x0F; + ++*in_pos; + coder->got_output_with_subfilter = true; + coder->sequence = SEQ_REPEAT_COUNT_1; + break; + + case FLAG_SET_SUBFILTER: { + if ((in[*in_pos] & 0x0F) + || coder->subfilter.code != NULL + || !coder->allow_subfilters) + return LZMA_DATA_ERROR; + + assert(coder->filter_flags.options == NULL); + const lzma_ret ret = lzma_filter_flags_decoder_init( + &coder->filter_flags_decoder, + allocator, &coder->filter_flags); + if (ret != LZMA_OK) + return ret; + + coder->got_output_with_subfilter = false; + + ++*in_pos; + coder->sequence = SEQ_FILTER_FLAGS; + break; + } + + case FLAG_END_SUBFILTER: + if (coder->subfilter.code == NULL + || !coder->got_output_with_subfilter) + return LZMA_DATA_ERROR; + + // Tell the helper filter to indicate End of Input + // to our subfilter. + coder->helper.end_was_reached = true; + + size_t dummy = 0; + const lzma_ret ret = subfilter_decode(coder, allocator, + NULL, &dummy, 0, out, out_pos,out_size, + action); + + // If we didn't reach the end of the subfilter's output + // yet, return to the application. On the next call we + // will get to this same switch-case again, because we + // haven't updated *in_pos yet. + if (ret != LZMA_STREAM_END) + return ret; + + // Free Subfilter's memory. This is a bit debatable, + // since we could avoid some malloc()/free() calls + // if the same Subfilter gets used soon again. But + // if Subfilter isn't used again, we could leave + // a memory-hogging filter dangling until someone + // frees Subblock filter itself. + lzma_next_coder_end(&coder->subfilter, allocator); + + // Free memory used for subfilter options. This is + // safe, because we don't support any Subfilter that + // would allow pointers in the options structure. + lzma_free(coder->filter_flags.options, allocator); + coder->filter_flags.options = NULL; + + ++*in_pos; + + if (coder->uncompressed_size == 0) + return LZMA_STREAM_END; + + break; + + default: + return LZMA_DATA_ERROR; + } + + break; + } + + case SEQ_SIZE_1: + case SEQ_REPEAT_COUNT_1: + // We use the same code to parse + // - the Size (28 bits) in Subblocks of type Data; and + // - the Repeat count (28 bits) in Subblocks of type + // Repeating Data. + coder->size |= (size_t)(in[*in_pos]) << 4; + ++*in_pos; + ++coder->sequence; + break; + + case SEQ_SIZE_2: + case SEQ_REPEAT_COUNT_2: + coder->size |= (size_t)(in[*in_pos]) << 12; + ++*in_pos; + ++coder->sequence; + break; + + case SEQ_SIZE_3: + case SEQ_REPEAT_COUNT_3: + coder->size |= (size_t)(in[*in_pos]) << 20; + + // The real value is the stored value plus one. + ++coder->size; + + ++*in_pos; + ++coder->sequence; + break; + + case SEQ_REPEAT_SIZE: + // Move the Repeat Count to the correct variable and parse + // the Size of the Data to be repeated. + coder->repeat.count = coder->size; + coder->repeat.size = (size_t)(in[*in_pos]) + 1; + coder->repeat.pos = 0; + ++*in_pos; + coder->sequence = SEQ_REPEAT_READ_DATA; + break; + + case SEQ_REPEAT_READ_DATA: { + // Fill coder->repeat.buffer[]. + const size_t in_avail = in_size - *in_pos; + const size_t out_avail + = coder->repeat.size - coder->repeat.pos; + const size_t copy_size = MIN(in_avail, out_avail); + + memcpy(coder->repeat.buffer + coder->repeat.pos, + in + *in_pos, copy_size); + *in_pos += copy_size; + coder->repeat.pos += copy_size; + + if (coder->repeat.pos == coder->repeat.size) { + coder->repeat.pos = 0; + + if (coder->repeat.size == 1 + && coder->subfilter.code == NULL) + coder->sequence = SEQ_REPEAT_FAST; + else + coder->sequence = SEQ_REPEAT_NORMAL; + } + + break; + } + + case SEQ_REPEAT_FAST: { + // Optimization for cases when there is only one byte to + // repeat and no Subfilter. + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = MIN(coder->repeat.count, out_avail); + + memset(out + *out_pos, coder->repeat.buffer[0], copy_size); + + *out_pos += copy_size; + coder->repeat.count -= copy_size; + + if (update_uncompressed_size(coder, copy_size)) + return LZMA_DATA_ERROR; + + if (coder->repeat.count == 0) { + if (coder->uncompressed_size == 0) + return LZMA_STREAM_END; + } else { + return LZMA_OK; + } + + coder->sequence = SEQ_FLAGS; + break; + } + + case SEQ_REPEAT_NORMAL: + do { + // Cycle the repeat buffer if needed. + if (coder->repeat.pos == coder->repeat.size) { + if (--coder->repeat.count == 0) { + coder->sequence = SEQ_FLAGS; + break; + } + + coder->repeat.pos = 0; + } + + if (coder->subfilter.code == NULL) { + const size_t copy_size = bufcpy( + coder->repeat.buffer, + &coder->repeat.pos, + coder->repeat.size, + out, out_pos, out_size); + + if (update_uncompressed_size(coder, copy_size)) + return LZMA_DATA_ERROR; + + } else { + const lzma_ret ret = subfilter_decode( + coder, allocator, + coder->repeat.buffer, + &coder->repeat.pos, + coder->repeat.size, + out, out_pos, out_size, + action); + + if (ret == LZMA_STREAM_END) { + // End of Subfilter can occur only at + // a Subblock boundary. + if (coder->repeat.pos + != coder->repeat.size + || --coder->repeat + .count != 0) + return LZMA_DATA_ERROR; + + // We need a Subblock with Unset + // Subfilter before more data. + coder->sequence = SEQ_FILTER_END; + break; + + } else if (ret != LZMA_OK) { + return ret; + } + } + } while (*out_pos < out_size); + + break; + + case SEQ_DATA: { + // Limit the amount of input to match the available + // Subblock Data size. + size_t in_limit; + if (in_size - *in_pos > coder->size) + in_limit = *in_pos + coder->size; + else + in_limit = in_size; + + if (coder->subfilter.code == NULL) { + const size_t copy_size = bufcpy( + in, in_pos, in_limit, + out, out_pos, out_size); + + coder->size -= copy_size; + + if (update_uncompressed_size(coder, copy_size)) + return LZMA_DATA_ERROR; + + } else { + const size_t in_start = *in_pos; + const lzma_ret ret = subfilter_decode( + coder, allocator, + in, in_pos, in_limit, + out, out_pos, out_size, + action); + + // Update the number of unprocessed bytes left in + // this Subblock. This assert() is true because + // in_limit prevents *in_pos getting too big. + assert(*in_pos - in_start <= coder->size); + coder->size -= *in_pos - in_start; + + if (ret == LZMA_STREAM_END) { + // End of Subfilter can occur only at + // a Subblock boundary. + if (coder->size != 0) + return LZMA_DATA_ERROR; + + // We need a Subblock with Unset + // Subfilter before more data. + coder->sequence = SEQ_FILTER_END; + break; + } + + if (ret != LZMA_OK) + return ret; + } + + // If we couldn't process the whole Subblock Data yet, return. + if (coder->size > 0) + return LZMA_OK; + + // Check if we have decoded all the data. + if (coder->uncompressed_size == 0 + && coder->subfilter.code == NULL) + return LZMA_STREAM_END; + + coder->sequence = SEQ_FLAGS; + break; + } + + case SEQ_FILTER_FLAGS: { + lzma_ret ret = coder->filter_flags_decoder.code( + coder->filter_flags_decoder.coder, allocator, + in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret == LZMA_HEADER_ERROR + ? LZMA_DATA_ERROR : ret; + + // Don't free the filter_flags_decoder. It doesn't take much + // memory and we may need it again. + + // Initialize the Subfilter. Subblock and Copy filters are + // not allowed. + if (coder->filter_flags.id == LZMA_FILTER_COPY + || coder->filter_flags.id + == LZMA_FILTER_SUBBLOCK) + return LZMA_DATA_ERROR; + + coder->helper.end_was_reached = false; + + lzma_options_filter filters[3] = { + { + .id = coder->filter_flags.id, + .options = coder->filter_flags.options, + }, { + .id = LZMA_FILTER_SUBBLOCK_HELPER, + .options = &coder->helper, + }, { + .id = LZMA_VLI_VALUE_UNKNOWN, + .options = NULL, + } + }; + + // Optimization: We know that LZMA uses End of Payload Marker + // (not End of Input), so we can omit the helper filter. + if (filters[0].id == LZMA_FILTER_LZMA) + filters[1].id = LZMA_VLI_VALUE_UNKNOWN; + + ret = lzma_raw_decoder_init(&coder->subfilter, allocator, + filters, LZMA_VLI_VALUE_UNKNOWN, false); + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_FLAGS; + break; + } + + case SEQ_FILTER_END: + // We are in the beginning of a Subblock. The next Subblock + // whose type is not Padding, must indicate end of Subfilter. + if (in[*in_pos] == (FLAG_PADDING << 4)) { + ++*in_pos; + break; + } + + if (in[*in_pos] != (FLAG_END_SUBFILTER << 4)) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_FLAGS; + break; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +subblock_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + if (coder->next.code == NULL) + return decode_buffer(coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + while (*out_pos < out_size) { + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code( + coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + + if (ret == LZMA_STREAM_END) + coder->next_finished = true; + else if (coder->temp.size == 0 || ret != LZMA_OK) + return ret; + } + + if (coder->this_finished) { + if (coder->temp.pos != coder->temp.size) + return LZMA_DATA_ERROR; + + if (coder->next_finished) + return LZMA_STREAM_END; + + return LZMA_OK; + } + + const lzma_ret ret = decode_buffer(coder, allocator, + coder->temp.buffer, &coder->temp.pos, + coder->temp.size, + out, out_pos, out_size, action); + + if (ret == LZMA_STREAM_END) + // The next coder in the chain hasn't finished + // yet. If the input data is valid, there + // must be no more output coming, but the + // next coder may still need a litle more + // input to detect End of Payload Marker. + coder->this_finished = true; + else if (ret != LZMA_OK) + return ret; + else if (coder->next_finished && *out_pos < out_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} + + +static void +subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_next_coder_end(&coder->subfilter, allocator); + lzma_next_coder_end(&coder->filter_flags_decoder, allocator); + lzma_free(coder->filter_flags.options, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->subfilter = LZMA_NEXT_CODER_INIT; + next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT; + + } else { + lzma_next_coder_end(&next->coder->subfilter, allocator); + lzma_free(next->coder->filter_flags.options, allocator); + } + + next->coder->filter_flags.options = NULL; + + next->coder->sequence = SEQ_FLAGS; + next->coder->uncompressed_size = filters[0].uncompressed_size; + next->coder->padding = 0; + next->coder->next_finished = false; + next->coder->this_finished = false; + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + if (filters[0].options != NULL) + next->coder->allow_subfilters = ((lzma_options_subblock *)( + filters[0].options))->allow_subfilters; + else + next->coder->allow_subfilters = false; + + { + const lzma_ret ret = lzma_next_filter_init(&next->coder->next, + allocator, filters + 1); + if (ret != LZMA_OK) { + subblock_decoder_end(next->coder, allocator); + return ret; + } + } + + next->code = &subblock_decode; + next->end = &subblock_decoder_end; + + return LZMA_OK; +} diff --git a/src/liblzma/subblock/subblock_decoder.h b/src/liblzma/subblock/subblock_decoder.h new file mode 100644 index 00000000..724ef357 --- /dev/null +++ b/src/liblzma/subblock/subblock_decoder.h @@ -0,0 +1,29 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder.h +/// \brief Decoder of the Subblock filter +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SUBBLOCK_DECODER_H +#define LZMA_SUBBLOCK_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_subblock_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/src/liblzma/subblock/subblock_decoder_helper.c b/src/liblzma/subblock/subblock_decoder_helper.c new file mode 100644 index 00000000..5ff24431 --- /dev/null +++ b/src/liblzma/subblock/subblock_decoder_helper.c @@ -0,0 +1,80 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder_helper.c +/// \brief Helper filter for the Subblock decoder +/// +/// This filter is used to indicate End of Input for subfilters needing it. +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "subblock_decoder_helper.h" + + +struct lzma_coder_s { + const lzma_options_subblock_helper *options; +}; + + +static lzma_ret +helper_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action lzma_attribute((unused))) +{ + // If end_was_reached is true, we cannot have any input. + assert(!coder->options->end_was_reached || *in_pos == in_size); + + // We can safely copy as much as possible, because we are never + // given more data than a single Subblock Data field. + bufcpy(in, in_pos, in_size, out, out_pos, out_size); + + // Return LZMA_STREAM_END when instructed so by the Subblock decoder. + return coder->options->end_was_reached ? LZMA_STREAM_END : LZMA_OK; +} + + +static void +helper_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_subblock_decoder_helper_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + // This is always the last filter in the chain. + assert(filters[1].init == NULL); + + // We never know uncompressed size. + assert(filters[0].uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &helper_decode; + next->end = helper_end; + } + + next->coder->options = filters[0].options; + + return LZMA_OK; +} diff --git a/src/liblzma/subblock/subblock_decoder_helper.h b/src/liblzma/subblock/subblock_decoder_helper.h new file mode 100644 index 00000000..2a3e999e --- /dev/null +++ b/src/liblzma/subblock/subblock_decoder_helper.h @@ -0,0 +1,36 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder_helper.h +/// \brief Helper filter for the Subblock decoder +/// +/// This filter is used to indicate End of Input for subfilters needing it. +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SUBBLOCK_DECODER_HELPER_H +#define LZMA_SUBBLOCK_DECODER_HELPER_H + +#include "common.h" + + +typedef struct { + bool end_was_reached; +} lzma_options_subblock_helper; + + +extern lzma_ret lzma_subblock_decoder_helper_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/src/liblzma/subblock/subblock_encoder.c b/src/liblzma/subblock/subblock_encoder.c new file mode 100644 index 00000000..9fa95b24 --- /dev/null +++ b/src/liblzma/subblock/subblock_encoder.c @@ -0,0 +1,841 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_encoder.c +/// \brief Encoder of the Subblock filter +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "subblock_encoder.h" +#include "raw_encoder.h" + + +#define REPEAT_COUNT_MAX (1U << 28) + +/// Number of bytes the data chunk being repeated must be before we care +/// about alignment. This is somewhat arbitrary. It just doesn't make sense +/// to waste bytes for alignment when the data chunk is very small. +/// +/// TODO Rename and use this also for Subblock Data? +#define RLE_MIN_SIZE_FOR_ALIGN 3 + +#define write_byte(b) \ +do { \ + out[*out_pos] = b; \ + ++*out_pos; \ + ++coder->alignment.out_pos; \ +} while (0) + + +struct lzma_coder_s { + lzma_next_coder next; + bool next_finished; + + enum { + SEQ_FILL, + SEQ_FLUSH, + SEQ_RLE_COUNT_0, + SEQ_RLE_COUNT_1, + SEQ_RLE_COUNT_2, + SEQ_RLE_COUNT_3, + SEQ_RLE_SIZE, + SEQ_RLE_DATA, + SEQ_DATA_SIZE_0, + SEQ_DATA_SIZE_1, + SEQ_DATA_SIZE_2, + SEQ_DATA_SIZE_3, + SEQ_DATA, + SEQ_SUBFILTER_INIT, + SEQ_SUBFILTER_FLAGS, + } sequence; + + lzma_options_subblock *options; + + lzma_vli uncompressed_size; + + size_t pos; + uint32_t tmp; + + struct { + uint32_t multiple; + uint32_t in_pending; + uint32_t in_pos; + uint32_t out_pos; + } alignment; + + struct { + uint8_t *data; + size_t size; + size_t limit; + } subblock; + + struct { + uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX]; + size_t size; + lzma_vli count; + } rle; + + struct { + enum { + SUB_NONE, + SUB_SET, + SUB_RUN, + SUB_FINISH, + SUB_END_MARKER, + } mode; + + bool got_input; + + uint8_t *flags; + size_t flags_size; + + lzma_next_coder subcoder; + + } subfilter; + + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +}; + + +/// \brief Aligns the output buffer +/// +/// Aligns the output buffer so that after skew bytes the output position is +/// a multiple of coder->alignment.multiple. +static bool +subblock_align(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, uint32_t skew) +{ + assert(*out_pos < out_size); + + const uint32_t target = coder->alignment.in_pos + % coder->alignment.multiple; + + while ((coder->alignment.out_pos + skew) + % coder->alignment.multiple != target) { + // Zero indicates padding. + write_byte(0x00); + + // Check if output buffer got full and indicate it to + // the caller. + if (*out_pos == out_size) + return true; + } + + coder->alignment.in_pos += coder->alignment.in_pending; + coder->alignment.in_pending = 0; + + // Output buffer is not full. + return false; +} + + +/// \brief Checks if buffer contains repeated data +/// +/// \param needle Buffer containing a single repeat chunk +/// \param needle_size Size of needle in bytes +/// \param buf Buffer to search for repeated needles +/// \param buf_chunks Buffer size is buf_chunks * needle_size. +/// +/// \return True if the whole buf is filled with repeated needles. +/// +static bool +is_repeating(const uint8_t *restrict needle, size_t needle_size, + const uint8_t *restrict buf, size_t buf_chunks) +{ + while (buf_chunks-- != 0) { + if (memcmp(buf, needle, needle_size) != 0) + return false; + + buf += needle_size; + } + + return true; +} + + +/// \brief Optimizes the repeating style and updates coder->sequence +static void +subblock_rle_flush(lzma_coder *coder) +{ + // The Subblock decoder can use memset() when the size of the data + // being repeated is one byte, so we check if the RLE buffer is + // filled with a single repeating byte. + if (coder->rle.size > 1) { + const uint8_t b = coder->rle.buffer[0]; + size_t i = 0; + while (true) { + if (coder->rle.buffer[i] != b) + break; + + if (++i == coder->rle.size) { + // TODO Integer overflow check maybe, + // although this needs at least 2**63 bytes + // of input until it gets triggered... + coder->rle.count *= coder->rle.size; + coder->rle.size = 1; + break; + } + } + } + + if (coder->rle.count > REPEAT_COUNT_MAX) + coder->tmp = REPEAT_COUNT_MAX - 1; + else + coder->tmp = coder->rle.count - 1; + + coder->sequence = SEQ_RLE_COUNT_0; + + return; +} + + +/// \brief Resizes coder->subblock.data for a new size limit +static lzma_ret +subblock_data_size(lzma_coder *coder, lzma_allocator *allocator, + size_t new_limit) +{ + // Verify that the new limit is valid. + if (new_limit < LZMA_SUBBLOCK_DATA_SIZE_MIN + || new_limit > LZMA_SUBBLOCK_DATA_SIZE_MAX) + return LZMA_HEADER_ERROR; + + // Ff the new limit is different than the previous one, we need + // to reallocate the data buffer. + if (new_limit != coder->subblock.limit) { + lzma_free(coder->subblock.data, allocator); + coder->subblock.data = lzma_alloc(new_limit, allocator); + if (coder->subblock.data == NULL) + return LZMA_MEM_ERROR; + } + + coder->subblock.limit = new_limit; + + return LZMA_OK; +} + + +static lzma_ret +subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Verify that there is a sane amount of input. + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + const lzma_vli in_avail = in_size - *in_pos; + if (action == LZMA_FINISH) { + if (in_avail != coder->uncompressed_size) + return LZMA_DATA_ERROR; + } else { + if (in_avail > coder->uncompressed_size) + return LZMA_DATA_ERROR; + } + } + + // Check if we need to do something special with the Subfilter. + if (coder->options != NULL && coder->options->allow_subfilters) { + switch (coder->options->subfilter_mode) { + case LZMA_SUBFILTER_NONE: + if (coder->subfilter.mode != SUB_NONE) + return LZMA_PROG_ERROR; + break; + + case LZMA_SUBFILTER_SET: + if (coder->subfilter.mode != SUB_NONE) + return LZMA_HEADER_ERROR; + + coder->subfilter.mode = SUB_SET; + coder->subfilter.got_input = false; + + if (coder->sequence == SEQ_FILL) + coder->sequence = SEQ_FLUSH; + + break; + + case LZMA_SUBFILTER_RUN: + if (coder->subfilter.mode != SUB_RUN) + return LZMA_PROG_ERROR; + break; + + case LZMA_SUBFILTER_FINISH: + if (coder->subfilter.mode == SUB_RUN) + coder->subfilter.mode = SUB_FINISH; + else if (coder->subfilter.mode != SUB_FINISH) + return LZMA_PROG_ERROR; + + if (!coder->subfilter.got_input) + return LZMA_PROG_ERROR; + + break; + + default: + return LZMA_HEADER_ERROR; + } + } + + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_FILL: { + // Grab the new Subblock Data Size and reallocate the buffer. + if (coder->subblock.size == 0 && coder->options != NULL + && coder->options->subblock_data_size + != coder->subblock.limit) { + const lzma_ret ret = subblock_data_size(coder, + allocator, coder->options + ->subblock_data_size); + if (ret != LZMA_OK) + return ret; + } + + if (coder->subfilter.mode == SUB_NONE) { + assert(coder->subfilter.subcoder.code == NULL); + + // No Subfilter is enabled, just copy the data as is. + // NOTE: uncompressed_size cannot overflow because we + // have checked/ it in the beginning of this function. + const size_t in_used = bufcpy(in, in_pos, in_size, + coder->subblock.data, + &coder->subblock.size, + coder->subblock.limit); + + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + coder->uncompressed_size -= in_used; + + coder->alignment.in_pending += in_used; + + } else { + const size_t in_start = *in_pos; + lzma_ret ret; + + if (coder->subfilter.mode == SUB_FINISH) { + // Let the Subfilter write out pending data, + // but don't give it any new input anymore. + size_t dummy = 0; + ret = coder->subfilter.subcoder.code(coder + ->subfilter.subcoder.coder, + allocator, NULL, &dummy, 0, + coder->subblock.data, + &coder->subblock.size, + coder->subblock.limit, + LZMA_FINISH); + } else { + // Give our input data to the Subfilter. Note + // that action can be LZMA_FINISH. In that + // case, we filter everything until the end + // of the input. The application isn't required + // to separately set LZMA_SUBBLOCK_FINISH. + ret = coder->subfilter.subcoder.code(coder + ->subfilter.subcoder.coder, + allocator, in, in_pos, in_size, + coder->subblock.data, + &coder->subblock.size, + coder->subblock.limit, + action); + } + + const size_t in_used = *in_pos - in_start; + + if (in_used > 0) + coder->subfilter.got_input = true; + + // NOTE: uncompressed_size cannot overflow because we + // have checked it in the beginning of this function. + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + coder->uncompressed_size -= *in_pos - in_start; + + coder->alignment.in_pending += in_used; + + if (ret == LZMA_STREAM_END) { + // We don't strictly need to do this, but + // doing it sounds like a good idea, because + // otherwise the Subfilter's memory could be + // left allocated for long time, and would + // just waste memory. + lzma_next_coder_end(&coder->subfilter.subcoder, + allocator); + + assert(coder->options != NULL); + coder->options->subfilter_mode + = LZMA_SUBFILTER_NONE; + + assert(coder->subfilter.mode == SUB_FINISH + || action == LZMA_FINISH); + coder->subfilter.mode = SUB_END_MARKER; + + // Flush now. Even if coder->subblock.size + // happens to be zero, we still need to go + // to SEQ_FLUSH to write the Subfilter Unset + // indicator. + coder->sequence = SEQ_FLUSH; + break; + } + + // Return if an error occurred. + if (ret != LZMA_OK) + return ret; + } + + // If we ran out of input before the whole buffer + // was filled, return to application. + if (coder->subblock.size < coder->subblock.limit + && action != LZMA_FINISH) + return LZMA_OK; + + coder->sequence = SEQ_FLUSH; + } + + // Fall through + + case SEQ_FLUSH: + if (coder->options != NULL) { + // Update the alignment variable. + coder->alignment.multiple = coder->options->alignment; + if (coder->alignment.multiple + < LZMA_SUBBLOCK_ALIGNMENT_MIN + || coder->alignment.multiple + > LZMA_SUBBLOCK_ALIGNMENT_MAX) + return LZMA_HEADER_ERROR; + + // Run-length encoder + // + // First check if there is some data pending and we + // have an obvious need to flush it immediatelly. + if (coder->rle.count > 0 + && (coder->rle.size + != coder->options->rle + || coder->subblock.size + % coder->rle.size)) { + subblock_rle_flush(coder); + break; + } + + // Grab the (possibly new) RLE chunk size and + // validate it. + coder->rle.size = coder->options->rle; + if (coder->rle.size > LZMA_SUBBLOCK_RLE_MAX) + return LZMA_HEADER_ERROR; + + if (coder->subblock.size != 0 + && coder->rle.size + != LZMA_SUBBLOCK_RLE_OFF + && coder->subblock.size + % coder->rle.size == 0) { + + // Initialize coder->rle.buffer if we don't + // have RLE already running. + if (coder->rle.count == 0) + memcpy(coder->rle.buffer, + coder->subblock.data, + coder->rle.size); + + // Test if coder->subblock.data is repeating. + const size_t count = coder->subblock.size + / coder->rle.size; + if (is_repeating(coder->rle.buffer, + coder->rle.size, + coder->subblock.data, count)) { + if (LZMA_VLI_VALUE_MAX - count + < coder->rle.count) + return LZMA_PROG_ERROR; + + coder->rle.count += count; + coder->subblock.size = 0; + + } else if (coder->rle.count > 0) { + // It's not repeating or at least not + // with the same byte sequence as the + // earlier Subblock Data buffers. We + // have some data pending in the RLE + // buffer already, so do a flush. + // Once flushed, we will check again + // if the Subblock Data happens to + // contain a different repeating + // sequence. + subblock_rle_flush(coder); + break; + } + } + } + + // If we now have some data left in coder->subblock, the RLE + // buffer is empty and we must write a regular Subblock Data. + if (coder->subblock.size > 0) { + assert(coder->rle.count == 0); + coder->tmp = coder->subblock.size - 1; + coder->sequence = SEQ_DATA_SIZE_0; + break; + } + + // Check if we should enable Subfilter. + if (coder->subfilter.mode == SUB_SET) { + if (coder->rle.count > 0) + subblock_rle_flush(coder); + else + coder->sequence = SEQ_SUBFILTER_INIT; + break; + } + + // Check if we have just finished Subfiltering. + if (coder->subfilter.mode == SUB_END_MARKER) { + if (coder->rle.count > 0) { + subblock_rle_flush(coder); + break; + } + + write_byte(0x50); + coder->subfilter.mode = SUB_NONE; + if (*out_pos == out_size) + return LZMA_OK; + } + + // Check if we have already written everything. + if (action == LZMA_FINISH && *in_pos == in_size + && coder->subfilter.mode == SUB_NONE) { + if (coder->rle.count > 0) { + subblock_rle_flush(coder); + break; + } + + if (coder->uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN) { + // NOTE: No need to use write_byte() here + // since we are finishing. + out[*out_pos] = 0x10; + ++*out_pos; + } else if (coder->uncompressed_size != 0) { + return LZMA_DATA_ERROR; + } + + return LZMA_STREAM_END; + } + + // Otherwise we have more work to do. + coder->sequence = SEQ_FILL; + break; + + case SEQ_RLE_COUNT_0: + // Make the Data field properly aligned, but only if the data + // chunk to be repeated isn't extremely small. We have four + // bytes for Count and one byte for Size, thus the number five. + if (coder->rle.size >= RLE_MIN_SIZE_FOR_ALIGN + && subblock_align( + coder, out, out_pos, out_size, 5)) + return LZMA_OK; + + assert(coder->rle.count > 0); + + write_byte(0x30 | (coder->tmp & 0x0F)); + + coder->sequence = SEQ_RLE_COUNT_1; + break; + + case SEQ_RLE_COUNT_1: + write_byte(coder->tmp >> 4); + coder->sequence = SEQ_RLE_COUNT_2; + break; + + case SEQ_RLE_COUNT_2: + write_byte(coder->tmp >> 12); + coder->sequence = SEQ_RLE_COUNT_3; + break; + + case SEQ_RLE_COUNT_3: + write_byte(coder->tmp >> 20); + + if (coder->rle.count > REPEAT_COUNT_MAX) + coder->rle.count -= REPEAT_COUNT_MAX; + else + coder->rle.count = 0; + + coder->sequence = SEQ_RLE_SIZE; + break; + + case SEQ_RLE_SIZE: + assert(coder->rle.size >= LZMA_SUBBLOCK_RLE_MIN); + assert(coder->rle.size <= LZMA_SUBBLOCK_RLE_MAX); + write_byte(coder->rle.size - 1); + coder->sequence = SEQ_RLE_DATA; + break; + + case SEQ_RLE_DATA: + bufcpy(coder->rle.buffer, &coder->pos, coder->rle.size, + out, out_pos, out_size); + if (coder->pos < coder->rle.size) + return LZMA_OK; + + coder->alignment.out_pos += coder->rle.size; + + coder->pos = 0; + coder->sequence = SEQ_FLUSH; + break; + + case SEQ_DATA_SIZE_0: + // We need four bytes for the Size field. + if (subblock_align(coder, out, out_pos, out_size, 4)) + return LZMA_OK; + + write_byte(0x20 | (coder->tmp & 0x0F)); + coder->sequence = SEQ_DATA_SIZE_1; + break; + + case SEQ_DATA_SIZE_1: + write_byte(coder->tmp >> 4); + coder->sequence = SEQ_DATA_SIZE_2; + break; + + case SEQ_DATA_SIZE_2: + write_byte(coder->tmp >> 12); + coder->sequence = SEQ_DATA_SIZE_3; + break; + + case SEQ_DATA_SIZE_3: + write_byte(coder->tmp >> 20); + coder->sequence = SEQ_DATA; + break; + + case SEQ_DATA: + bufcpy(coder->subblock.data, &coder->pos, + coder->subblock.size, out, out_pos, out_size); + if (coder->pos < coder->subblock.size) + return LZMA_OK; + + coder->alignment.out_pos += coder->subblock.size; + + coder->subblock.size = 0; + coder->pos = 0; + coder->sequence = SEQ_FLUSH; + break; + + case SEQ_SUBFILTER_INIT: { + assert(coder->subblock.size == 0); + assert(coder->rle.count == 0); + assert(coder->subfilter.mode == SUB_SET); + assert(coder->options != NULL); + + // There must be a filter specified. + if (coder->options->subfilter_options.id + == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_HEADER_ERROR; + + // Initialize a raw encoder to work as a Subfilter. + lzma_options_filter options[2]; + options[0] = coder->options->subfilter_options; + options[1].id = LZMA_VLI_VALUE_UNKNOWN; + + lzma_ret ret = lzma_raw_encoder_init( + &coder->subfilter.subcoder, allocator, + options, LZMA_VLI_VALUE_UNKNOWN, false); + if (ret != LZMA_OK) + return ret; + + // Encode the Filter Flags field into a buffer. This should + // never fail since we have already successfully initialized + // the Subfilter itself. Check it still, and return + // LZMA_PROG_ERROR instead of whatever the ret would say. + ret = lzma_filter_flags_size( + &coder->subfilter.flags_size, options); + assert(ret == LZMA_OK); + if (ret != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->subfilter.flags = lzma_alloc( + coder->subfilter.flags_size, allocator); + if (coder->subfilter.flags == NULL) + return LZMA_MEM_ERROR; + + // Now we have a big-enough buffer. Encode the Filter Flags. + // Like above, this should never fail. + size_t dummy = 0; + ret = lzma_filter_flags_encode(coder->subfilter.flags, + &dummy, coder->subfilter.flags_size, options); + assert(ret == LZMA_OK); + assert(dummy == coder->subfilter.flags_size); + if (ret != LZMA_OK || dummy != coder->subfilter.flags_size) + return LZMA_PROG_ERROR; + + // Write a Subblock indicating a new Subfilter. + write_byte(0x40); + + coder->options->subfilter_mode = LZMA_SUBFILTER_RUN; + coder->subfilter.mode = SUB_RUN; + coder->sequence = SEQ_SUBFILTER_FLAGS; + } + + // Fall through + + case SEQ_SUBFILTER_FLAGS: + // Copy the Filter Flags to the output stream. + bufcpy(coder->subfilter.flags, &coder->pos, + coder->subfilter.flags_size, + out, out_pos, out_size); + if (coder->pos < coder->subfilter.flags_size) + return LZMA_OK; + + lzma_free(coder->subfilter.flags, allocator); + coder->subfilter.flags = NULL; + + coder->pos = 0; + coder->sequence = SEQ_FILL; + break; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +subblock_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + if (coder->next.code == NULL) + return subblock_buffer(coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + while (*out_pos < out_size + && (*in_pos < in_size || action == LZMA_FINISH)) { + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + if (ret == LZMA_STREAM_END) { + assert(action == LZMA_FINISH); + coder->next_finished = true; + } else if (coder->temp.size == 0 || ret != LZMA_OK) { + return ret; + } + } + + const lzma_ret ret = subblock_buffer(coder, allocator, + coder->temp.buffer, &coder->temp.pos, + coder->temp.size, out, out_pos, out_size, + coder->next_finished ? LZMA_FINISH : LZMA_RUN); + if (ret == LZMA_STREAM_END) { + assert(action == LZMA_FINISH); + assert(coder->next_finished); + return LZMA_STREAM_END; + } + + if (ret != LZMA_OK) + return ret; + } + + return LZMA_OK; +} + + +static void +subblock_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->next, allocator); + lzma_next_coder_end(&coder->subfilter.subcoder, allocator); + lzma_free(coder->subblock.data, allocator); + lzma_free(coder->subfilter.flags, allocator); + return; +} + + +extern lzma_ret +lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->subblock.data = NULL; + next->coder->subblock.limit = 0; + next->coder->subfilter.subcoder = LZMA_NEXT_CODER_INIT; + } else { + lzma_next_coder_end(&next->coder->subfilter.subcoder, + allocator); + lzma_free(next->coder->subfilter.flags, allocator); + } + + next->coder->subfilter.flags = NULL; + + next->coder->next_finished = false; + next->coder->sequence = SEQ_FILL; + next->coder->options = filters[0].options; + next->coder->uncompressed_size = filters[0].uncompressed_size; + next->coder->pos = 0; + + next->coder->alignment.in_pending = 0; + next->coder->alignment.in_pos = 0; + next->coder->alignment.out_pos = 0; + next->coder->subblock.size = 0; + next->coder->rle.count = 0; + next->coder->subfilter.mode = SUB_NONE; + + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + // Grab some values from the options structure if it is available. + size_t subblock_size_limit; + if (next->coder->options != NULL) { + if (next->coder->options->alignment + < LZMA_SUBBLOCK_ALIGNMENT_MIN + || next->coder->options->alignment + > LZMA_SUBBLOCK_ALIGNMENT_MAX) { + subblock_encoder_end(next->coder, allocator); + return LZMA_HEADER_ERROR; + } + next->coder->alignment.multiple + = next->coder->options->alignment; + subblock_size_limit = next->coder->options->subblock_data_size; + } else { + next->coder->alignment.multiple + = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT; + subblock_size_limit = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT; + } + + { + const lzma_ret ret = subblock_data_size(next->coder, allocator, + subblock_size_limit); + if (ret != LZMA_OK) { + subblock_encoder_end(next->coder, allocator); + return ret; + } + } + + { + const lzma_ret ret = lzma_next_filter_init(&next->coder->next, + allocator, filters + 1); + if (ret != LZMA_OK) { + subblock_encoder_end(next->coder, allocator); + return ret; + } + } + + next->code = &subblock_encode; + next->end = &subblock_encoder_end; + + return LZMA_OK; +} diff --git a/src/liblzma/subblock/subblock_encoder.h b/src/liblzma/subblock/subblock_encoder.h new file mode 100644 index 00000000..3ef9e2e4 --- /dev/null +++ b/src/liblzma/subblock/subblock_encoder.h @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_encoder.h +/// \brief Encoder of the Subblock filter +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SUBBLOCK_ENCODER_H +#define LZMA_SUBBLOCK_ENCODER_H + +#include "common.h" + +extern lzma_ret lzma_subblock_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/src/lzma/Makefile.am b/src/lzma/Makefile.am new file mode 100644 index 00000000..5fbd3358 --- /dev/null +++ b/src/lzma/Makefile.am @@ -0,0 +1,63 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This program is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +bin_PROGRAMS = lzma + +lzma_SOURCES = \ + alloc.c \ + alloc.h \ + args.c \ + args.h \ + error.c \ + error.h \ + hardware.c \ + hardware.h \ + help.c \ + help.h \ + io.c \ + io.h \ + main.c \ + options.c \ + options.h \ + private.h \ + process.c \ + process.h \ + suffix.c \ + suffix.h \ + util.c \ + util.h + +## It must be able to find sysdefs.h, lzma_adv.h, and possible +## replacement headers. +lzma_CPPFLAGS = \ + -DLOCALEDIR=\"$(localedir)\" \ + -I@top_srcdir@/src/common \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_builddir@/lib \ + -I@top_srcdir@/lib + +lzma_CFLAGS = @PTHREAD_CFLAGS@ + +## Always link the command line tool statically against liblzma. It is +## faster on x86, because no need for PIC. We also have one dependency less, +## which allows users to more freely copy the lzma binary to other boxes. +lzma_LDFLAGS = -static +lzma_LDADD = \ + @top_builddir@/src/liblzma/liblzma.la \ + @LTLIBINTL@ \ + @PTHREAD_LIBS@ + +if COND_GNULIB +lzma_LDADD += @top_builddir@/lib/libgnu_nls.a +endif diff --git a/src/lzma/alloc.c b/src/lzma/alloc.c new file mode 100644 index 00000000..d0fee68b --- /dev/null +++ b/src/lzma/alloc.c @@ -0,0 +1,106 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alloc.c +/// \brief Memory allocation functions +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/// Called when memory allocation fails. Prints and error message and +/// quits the application. +static void lzma_attribute((noreturn)) +xerror(void) +{ + errmsg(V_ERROR, "%s", strerror(errno)); + my_exit(ERROR); +} + + +extern void * +xmalloc(size_t size) +{ + if (size < 1) { + errno = EINVAL; + xerror(); + } + + void *p = malloc(size); + if (p == NULL) + xerror(); + + return p; +} + + +/* +extern void * +xrealloc(void *ptr, size_t size) +{ + if (size < 1) { + errno = EINVAL; + xerror(); + } + + ptr = realloc(ptr, size); + if (ptr == NULL) + xerror(); + + return ptr; +} +*/ + + +extern char * +xstrdup(const char *src) +{ + if (src == NULL) { + errno = EINVAL; + xerror(); + } + + const size_t size = strlen(src) + 1; + char *dest = malloc(size); + if (dest == NULL) + xerror(); + + memcpy(dest, src, size); + + return dest; +} + + +extern void +xstrcpy(char **dest, const char *src) +{ + size_t len = strlen(src) + 1; + + *dest = realloc(*dest, len); + if (*dest == NULL) + xerror(); + + memcpy(*dest, src, len + 1); + + return; +} + + +extern void * +allocator(void *opaque lzma_attribute((unused)), + size_t nmemb lzma_attribute((unused)), size_t size) +{ + return xmalloc(size); +} diff --git a/src/lzma/alloc.h b/src/lzma/alloc.h new file mode 100644 index 00000000..80317269 --- /dev/null +++ b/src/lzma/alloc.h @@ -0,0 +1,42 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alloc.h +/// \brief Memory allocation functions +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef ALLOC_H +#define ALLOC_H + +#include "private.h" + + +/// Safe malloc() that never returns NULL. +extern void *xmalloc(size_t size); + +/// Safe realloc() that never returns NULL. +extern void *xrealloc(void *ptr, size_t size); + +/// Safe strdup() that never returns NULL. +extern char *xstrdup(const char *src); + +/// xrealloc()s *dest to the size needed by src, and copies src to *dest. +extern void xstrcpy(char **dest, const char *src); + +/// Function for lzma_allocator.alloc. This uses xmalloc(). +extern void *allocator(void *opaque lzma_attribute((unused)), + size_t nmemb lzma_attribute((unused)), size_t size); + +#endif diff --git a/src/lzma/args.c b/src/lzma/args.c new file mode 100644 index 00000000..d6163ae7 --- /dev/null +++ b/src/lzma/args.c @@ -0,0 +1,566 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.c +/// \brief Argument parsing +/// +/// \note Filter-specific options parsing is in options.c. +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include "getopt.h" +#include <ctype.h> + + +enum tool_mode opt_mode = MODE_COMPRESS; +enum header_type opt_header = HEADER_AUTO; + +char *opt_suffix = NULL; + +char *opt_files_name = NULL; +char opt_files_split = '\0'; +FILE *opt_files_file = NULL; + +bool opt_stdout = false; +bool opt_force = false; +bool opt_keep_original = false; +bool opt_preserve_name = false; + +lzma_check_type opt_check = LZMA_CHECK_CRC64; +lzma_options_filter opt_filters[8]; + +// We don't modify or free() this, but we need to assign it in some +// non-const pointers. +const char *stdin_filename = "(stdin)"; + +static size_t preset_number = 7 - 1; +static bool preset_default = true; +static size_t filter_count = 0; + + +enum { + OPT_COPY = INT_MIN, + OPT_SUBBLOCK, + OPT_X86, + OPT_POWERPC, + OPT_IA64, + OPT_ARM, + OPT_ARMTHUMB, + OPT_SPARC, + OPT_DELTA, + OPT_LZMA, + + OPT_FILES, + OPT_FILES0, +}; + + +static const char short_opts[] = "cC:dfFhlLkqrStT:vVz123456789"; + + +static const struct option long_opts[] = { + // gzip-like options + { "fast", no_argument, NULL, '1' }, + { "best", no_argument, NULL, '9' }, + { "memory", required_argument, NULL, 'M' }, + { "name", no_argument, NULL, 'N' }, + { "suffix", required_argument, NULL, 'S' }, + { "threads", required_argument, NULL, 'T' }, + { "version", no_argument, NULL, 'V' }, + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "force", no_argument, NULL, 'f' }, + { "help", no_argument, NULL, 'h' }, + { "list", no_argument, NULL, 'l' }, + { "info", no_argument, NULL, 'l' }, + { "keep", no_argument, NULL, 'k' }, + { "no-name", no_argument, NULL, 'n' }, + { "quiet", no_argument, NULL, 'q' }, +// { "recursive", no_argument, NULL, 'r' }, // TODO + { "test", no_argument, NULL, 't' }, + { "verbose", no_argument, NULL, 'v' }, + { "compress", no_argument, NULL, 'z' }, + + // Filters + { "copy", no_argument, NULL, OPT_COPY }, + { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, + { "x86", no_argument, NULL, OPT_X86 }, + { "bcj", no_argument, NULL, OPT_X86 }, + { "powerpc", no_argument, NULL, OPT_POWERPC }, + { "ppc", no_argument, NULL, OPT_POWERPC }, + { "ia64", no_argument, NULL, OPT_IA64 }, + { "itanium", no_argument, NULL, OPT_IA64 }, + { "arm", no_argument, NULL, OPT_ARM }, + { "armthumb", no_argument, NULL, OPT_ARMTHUMB }, + { "sparc", no_argument, NULL, OPT_SPARC }, + { "delta", optional_argument, NULL, OPT_DELTA }, + { "lzma", optional_argument, NULL, OPT_LZMA }, + + // Other + { "format", required_argument, NULL, 'F' }, + { "check", required_argument, NULL, 'C' }, + { "files", optional_argument, NULL, OPT_FILES }, + { "files0", optional_argument, NULL, OPT_FILES0 }, + + { NULL, 0, NULL, 0 } +}; + + +static void +add_filter(lzma_vli id, const char *opt_str) +{ + if (filter_count == 7) { + errmsg(V_ERROR, _("Maximum number of filters is seven")); + my_exit(ERROR); + } + + opt_filters[filter_count].id = id; + + switch (id) { + case LZMA_FILTER_SUBBLOCK: + opt_filters[filter_count].options + = parse_options_subblock(opt_str); + break; + + case LZMA_FILTER_DELTA: + opt_filters[filter_count].options + = parse_options_delta(opt_str); + break; + + case LZMA_FILTER_LZMA: + opt_filters[filter_count].options + = parse_options_lzma(opt_str); + break; + + default: + assert(opt_str == NULL); + opt_filters[filter_count].options = NULL; + break; + } + + ++filter_count; + preset_default = false; + return; +} + + +static void +parse_real(int argc, char **argv) +{ + int c; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) + != -1) { + switch (c) { + // gzip-like options + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + preset_number = c - '1'; + preset_default = false; + break; + + // --memory + case 'M': + opt_memory = str_to_uint64("memory", optarg, + 1, SIZE_MAX); + break; + + case 'N': + opt_preserve_name = true; + break; + + // --suffix + case 'S': + // Empty suffix and suffixes having a slash are + // rejected. Such suffixes would break things later. + if (optarg[0] == '\0' || strchr(optarg, '/') != NULL) { + errmsg(V_ERROR, _("%s: Invalid filename " + "suffix"), optarg); + my_exit(ERROR); + } + + free(opt_suffix); + opt_suffix = xstrdup(optarg); + break; + + case 'T': + opt_threads = str_to_uint64("threads", optarg, + 1, SIZE_MAX); + break; + + // --version + case 'V': + // This doesn't return. + show_version(); + + // --stdout + case 'c': + opt_stdout = true; + break; + + // --decompress + case 'd': + opt_mode = MODE_DECOMPRESS; + break; + + // --force + case 'f': + opt_force = true; + break; + + // --help + case 'h': + // This doesn't return. + show_help(); + + // --list + case 'l': + opt_mode = MODE_LIST; + break; + + // --keep + case 'k': + opt_keep_original = true; + break; + + case 'n': + opt_preserve_name = false; + break; + + // --quiet + case 'q': + if (verbosity > V_SILENT) + --verbosity; + + break; + + case 't': + opt_mode = MODE_TEST; + break; + + // --verbose + case 'v': + if (verbosity < V_DEBUG) + ++verbosity; + + break; + + case 'z': + opt_mode = MODE_COMPRESS; + break; + + // Filter setup + + case OPT_COPY: + add_filter(LZMA_FILTER_COPY, NULL); + break; + + case OPT_SUBBLOCK: + add_filter(LZMA_FILTER_SUBBLOCK, optarg); + break; + + case OPT_X86: + add_filter(LZMA_FILTER_X86, NULL); + break; + + case OPT_POWERPC: + add_filter(LZMA_FILTER_POWERPC, NULL); + break; + + case OPT_IA64: + add_filter(LZMA_FILTER_IA64, NULL); + break; + + case OPT_ARM: + add_filter(LZMA_FILTER_ARM, NULL); + break; + + case OPT_ARMTHUMB: + add_filter(LZMA_FILTER_ARMTHUMB, NULL); + break; + + case OPT_SPARC: + add_filter(LZMA_FILTER_SPARC, NULL); + break; + + case OPT_DELTA: + add_filter(LZMA_FILTER_DELTA, optarg); + break; + + case OPT_LZMA: + add_filter(LZMA_FILTER_LZMA, optarg); + break; + + // Other + + // --format + case 'F': { + static const char *types[] = { + "auto", + "native", + "single", + "multi", + "alone", +// "gzip", + NULL + }; + + opt_header = 0; + while (strcmp(types[opt_header], optarg) != 0) { + if (types[++opt_header] == NULL) { + errmsg(V_ERROR, _("%s: Unknown file " + "format type"), + optarg); + my_exit(ERROR); + } + } + + break; + } + + // --check + case 'C': { + static const struct { + const char *str; + unsigned int value; + } types[] = { + { "none", LZMA_CHECK_NONE }, + { "crc32", LZMA_CHECK_CRC32 }, + { "crc64", LZMA_CHECK_CRC64 }, + { "sha256", LZMA_CHECK_SHA256 }, + { NULL, 0 } + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) { + if (types[++i].str == NULL) { + errmsg(V_ERROR, _("%s: Unknown " + "integrity check " + "type"), optarg); + my_exit(ERROR); + } + } + + opt_check = types[i].value; + break; + } + + case OPT_FILES: + opt_files_split = '\n'; + + // Fall through + + case OPT_FILES0: + if (opt_files_name != NULL) { + errmsg(V_ERROR, _("Only one file can be " + "specified with `--files'" + "or `--files0'.")); + my_exit(ERROR); + } + + if (optarg == NULL) { + opt_files_name = (char *)stdin_filename; + opt_files_file = stdin; + } else { + opt_files_name = optarg; + opt_files_file = fopen(optarg, + c == OPT_FILES ? "r" : "rb"); + if (opt_files_file == NULL) { + errmsg(V_ERROR, "%s: %s", optarg, + strerror(errno)); + my_exit(ERROR); + } + } + + break; + + default: + show_try_help(); + my_exit(ERROR); + } + } + + return; +} + + +static void +parse_environment(void) +{ + char *env = getenv("LZMA_OPT"); + if (env == NULL) + return; + + env = xstrdup(env); + + // Calculate the number of arguments in env. + unsigned int argc = 1; + bool prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + if (isspace(env[i])) { + prev_was_space = true; + } else if (prev_was_space) { + prev_was_space = false; + if (++argc > (unsigned int)(INT_MAX)) { + errmsg(V_ERROR, _("The environment variable " + "LZMA_OPT contains too many " + "arguments")); + my_exit(ERROR); + } + } + } + + char **argv = xmalloc((argc + 1) * sizeof(char*)); + argv[0] = argv0; + argv[argc] = NULL; + + argc = 1; + prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + if (isspace(env[i])) { + prev_was_space = true; + } else if (prev_was_space) { + prev_was_space = false; + argv[argc++] = env + i; + } + } + + parse_real((int)(argc), argv); + + free(env); + + return; +} + + +static void +set_compression_settings(void) +{ + if (filter_count == 0) { + opt_filters[0].id = LZMA_FILTER_LZMA; + opt_filters[0].options = (lzma_options_lzma *)( + lzma_preset_lzma + preset_number); + filter_count = 1; + } + + // Terminate the filter options array. + opt_filters[filter_count].id = LZMA_VLI_VALUE_UNKNOWN; + + // Optimize the filter chain a little by removing all + // Copy filters. + for (size_t i = 0; opt_filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { + while (opt_filters[i].id == LZMA_FILTER_COPY) { + size_t j = i; + do { + opt_filters[j] = opt_filters[j + 1]; + } while (opt_filters[++j].id + != LZMA_VLI_VALUE_UNKNOWN); + } + } + + const uint32_t memory_limit = opt_memory / (1024 * 1024) + 1; + uint32_t memory_usage = lzma_memory_usage(opt_filters, true); + + // Don't go over the memory limits when the default + // setting is used. + if (preset_default) { + while (memory_usage > memory_limit) { + if (preset_number == 0) { + errmsg(V_ERROR, _("Memory usage limit is too " + "small for any internal " + "filter preset")); + my_exit(ERROR); + } + + --preset_number; + opt_filters[0].options = (lzma_options_lzma *)( + lzma_preset_lzma + + preset_number); + memory_usage = lzma_memory_usage(opt_filters, + true); + } + } else { + if (memory_usage > memory_limit) { + errmsg(V_ERROR, _("Memory usage limit is too small " + "for the given filter setup")); + my_exit(ERROR); + } + } + + // Limit the number of worked threads so that memory usage + // limit isn't exceeded. + // FIXME: Probably should use bytes instead of mebibytes for + // memory_usage and memory_limit. + if (memory_usage == 0) + memory_usage = 1; + + size_t thread_limit = memory_limit / memory_usage; + if (thread_limit == 0) + thread_limit = 1; + + if (opt_threads > thread_limit) + opt_threads = thread_limit; + + return; +} + + +extern char ** +parse_args(int argc, char **argv) +{ + // Check how we were called. + { + const char *name = str_filename(argv[0]); + if (name != NULL) { + if (strstr(name, "cat") != NULL) { + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "un") != NULL) { + opt_mode = MODE_DECOMPRESS; + } + } + } + + // First the flags from environment + parse_environment(); + + // Then from the command line + optind = 1; + parse_real(argc, argv); + + // Never remove the source file when the destination is not on disk. + // In test mode the data is written nowhere, but setting opt_stdout + // will make the rest of the code behave well. + if (opt_stdout || opt_mode == MODE_TEST) { + opt_keep_original = true; + opt_stdout = true; + } + + if (opt_mode == MODE_COMPRESS) + set_compression_settings(); + + // If no filenames are given, use stdin. + if (argv[optind] == NULL && opt_files_name == NULL) { + // We don't modify or free() the "-" constant. + static char *argv_stdin[2] = { (char *)"-", NULL }; + return argv_stdin; + } + + return argv + optind; +} diff --git a/src/lzma/args.h b/src/lzma/args.h new file mode 100644 index 00000000..4f19a01e --- /dev/null +++ b/src/lzma/args.h @@ -0,0 +1,64 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.h +/// \brief Argument parsing +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef ARGS_H +#define ARGS_H + +#include "private.h" + + +enum tool_mode { + MODE_COMPRESS, + MODE_DECOMPRESS, + MODE_TEST, + MODE_LIST, +}; + +enum header_type { + HEADER_AUTO, + HEADER_NATIVE, + HEADER_SINGLE, + HEADER_MULTI, + HEADER_ALONE, + // HEADER_GZIP, +}; + + +extern char *opt_suffix; + +extern char *opt_files_name; +extern char opt_files_split; +extern FILE *opt_files_file; + +extern bool opt_stdout; +extern bool opt_force; +extern bool opt_keep_original; +extern bool opt_preserve_name; +// extern bool opt_recursive; +extern enum tool_mode opt_mode; +extern enum header_type opt_header; + +extern lzma_check_type opt_check; +extern lzma_options_filter opt_filters[8]; + +extern const char *stdin_filename; + +extern char **parse_args(int argc, char **argv); + +#endif diff --git a/src/lzma/error.c b/src/lzma/error.c new file mode 100644 index 00000000..a83de27a --- /dev/null +++ b/src/lzma/error.c @@ -0,0 +1,156 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file error.c +/// \brief Error message printing +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include <stdarg.h> + + +exit_status_type exit_status = SUCCESS; +verbosity_type verbosity = V_WARNING; +char *argv0 = NULL; +volatile sig_atomic_t user_abort = 0; + + +extern const char * +str_strm_error(lzma_ret code) +{ + switch (code) { + case LZMA_OK: + return _("Operation successful"); + + case LZMA_STREAM_END: + return _("Operation finished successfully"); + + case LZMA_PROG_ERROR: + return _("Internal error (bug)"); + + case LZMA_DATA_ERROR: + return _("Compressed data is corrupt"); + + case LZMA_MEM_ERROR: + return strerror(ENOMEM); + + case LZMA_BUF_ERROR: + return _("Unexpected end of input"); + + case LZMA_HEADER_ERROR: + return _("Unsupported options"); + + case LZMA_UNSUPPORTED_CHECK: + return _("Unsupported integrity check type"); + + default: + return NULL; + } +} + + +extern void +set_exit_status(exit_status_type new_status) +{ + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + pthread_mutex_lock(&mutex); + + if (new_status != WARNING || exit_status == SUCCESS) + exit_status = new_status; + + pthread_mutex_unlock(&mutex); + return; +} + + +extern void lzma_attribute((noreturn)) +my_exit(int status) +{ + // Close stdout. If something goes wrong, print an error message + // to stderr. + { + const int ferror_err = ferror(stdout); + const int fclose_err = fclose(stdout); + if (fclose_err) { + errmsg(V_ERROR, _("Writing to standard output " + "failed: %s"), strerror(errno)); + status = ERROR; + } else if (ferror_err) { + // Some error has occurred but we have no clue about + // the reason since fclose() succeeded. + errmsg(V_ERROR, _("Writing to standard output " + "failed: %s"), "Unknown error"); + status = ERROR; + } + } + + // Close stderr. If something goes wrong, there's nothing where we + // could print an error message. Just set the exit status. + { + const int ferror_err = ferror(stderr); + const int fclose_err = fclose(stderr); + if (fclose_err || ferror_err) + status = ERROR; + } + + exit(status); +} + + +extern void lzma_attribute((format(printf, 2, 3))) +errmsg(verbosity_type v, const char *fmt, ...) +{ + va_list ap; + + if (v <= verbosity) { + va_start(ap, fmt); + + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + pthread_mutex_lock(&mutex); + + fprintf(stderr, "%s: ", argv0); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + + pthread_mutex_unlock(&mutex); + + va_end(ap); + } + + if (v == V_ERROR) + set_exit_status(ERROR); + else if (v == V_WARNING) + set_exit_status(WARNING); + + return; +} + + +extern void +out_of_memory(void) +{ + errmsg(V_ERROR, "%s", strerror(ENOMEM)); + user_abort = 1; + return; +} + + +extern void +internal_error(void) +{ + errmsg(V_ERROR, _("Internal error (bug)")); + user_abort = 1; + return; +} diff --git a/src/lzma/error.h b/src/lzma/error.h new file mode 100644 index 00000000..34ec30e1 --- /dev/null +++ b/src/lzma/error.h @@ -0,0 +1,67 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file error.c +/// \brief Error message printing +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef ERROR_H +#define ERROR_H + +#include "private.h" + + +typedef enum { + SUCCESS = 0, + ERROR = 1, + WARNING = 2, +} exit_status_type; + + +typedef enum { + V_SILENT, + V_ERROR, + V_WARNING, + V_VERBOSE, + V_DEBUG, +} verbosity_type; + + +extern exit_status_type exit_status; + +extern verbosity_type verbosity; + +/// Like GNU's program_invocation_name but portable +extern char *argv0; + +/// Once this is non-zero, all threads must shutdown and clean up incomplete +/// output files from the disk. +extern volatile sig_atomic_t user_abort; + + +extern const char * str_strm_error(lzma_ret code); + +extern void errmsg(verbosity_type v, const char *fmt, ...) + lzma_attribute((format(printf, 2, 3))); + +extern void set_exit_status(exit_status_type new_status); + +extern void my_exit(int status) lzma_attribute((noreturn)); + +extern void out_of_memory(void); + +extern void internal_error(void); + +#endif diff --git a/src/lzma/hardware.c b/src/lzma/hardware.c new file mode 100644 index 00000000..6cb3cdfc --- /dev/null +++ b/src/lzma/hardware.c @@ -0,0 +1,99 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.c +/// \brief Detection of available hardware resources +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "physmem.h" + + +/// Maximum number of free *coder* threads. This can be set with +/// the --threads=NUM command line option. +size_t opt_threads = 1; + + +/// Number of bytes of memory to use at maximum (only a rough limit). +/// This can be set with the --memory=NUM command line option. +/// If no better value can be determined, the default is 14 MiB, which +/// should be quite safe even for older systems while still allowing +/// reasonable compression ratio. +size_t opt_memory = 14 * 1024 * 1024; + + +/// Get the amount of physical memory, and set opt_memory to 1/3 of it. +/// User can then override this with --memory command line option. +static void +hardware_memory(void) +{ + uint64_t mem = physmem(); + if (mem != 0) { + mem /= 3; + +#if UINT64_MAX > SIZE_MAX + if (mem > SIZE_MAX) + mem = SIZE_MAX; +#endif + + opt_memory = mem; + } + + return; +} + + +/// Get the number of CPU cores, and set opt_threads to default to that value. +/// User can then override this with --threads command line option. +static void +hardware_cores(void) +{ +#if defined(HAVE_NUM_PROCESSORS_SYSCONF) + const long cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (cpus > 0) + opt_threads = (size_t)(cpus); + +#elif defined(HAVE_NUM_PROCESSORS_SYSCTL) + int name[2] = { CTL_HW, HW_NCPU }; + int cpus; + size_t cpus_size = sizeof(cpus); + if (!sysctl(name, &cpus, &cpus_size, NULL, NULL) + && cpus_size == sizeof(cpus) && cpus > 0) + opt_threads = (size_t)(cpus); +#endif + + // Limit opt_threads so that maximum number of threads doesn't exceed. + +#if defined(_SC_THREAD_THREADS_MAX) + const long threads_max = sysconf(_SC_THREAD_THREADS_MAX); + if (threads_max > 0 && (size_t)(threads_max) < opt_threads) + opt_threads = (size_t)(threads_max); + +#elif defined(PTHREAD_THREADS_MAX) + if (opt_threads > PTHREAD_THREADS_MAX) + opt_threads = PTHREAD_THREADS_MAX; +#endif + + return; +} + + +extern void +hardware_init(void) +{ + hardware_memory(); + hardware_cores(); + return; +} diff --git a/src/lzma/hardware.h b/src/lzma/hardware.h new file mode 100644 index 00000000..d47bd29f --- /dev/null +++ b/src/lzma/hardware.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.c +/// \brief Detection of available hardware resources +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef HARDWARE_H +#define HARDWARE_H + +#include "private.h" + + +extern size_t opt_threads; +extern size_t opt_memory; + +extern void hardware_init(void); + +#endif diff --git a/src/lzma/help.c b/src/lzma/help.c new file mode 100644 index 00000000..ad7dd861 --- /dev/null +++ b/src/lzma/help.c @@ -0,0 +1,178 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file help.c +/// \brief Help messages +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +extern void +show_try_help(void) +{ + // Print this with V_WARNING instead of V_ERROR to prevent it from + // showing up when --quiet has been specified. + errmsg(V_WARNING, _("Try `%s --help' for more information."), argv0); + return; +} + + +extern void lzma_attribute((noreturn)) +show_help(void) +{ + printf(_("Usage: %s [OPTION]... [FILE]...\n" + "Compress or decompress FILEs in the .lzma format.\n" + "\n"), argv0); + + puts(_("Mandatory arguments to long options are mandatory for " + "short options too.\n")); + + puts(_( +" Operation mode:\n" +"\n" +" -z, --compress force compression\n" +" -d, --decompress force decompression\n" +" -t, --test test compressed file integrity\n" +" -l, --list list block sizes, total sizes, and possible metadata\n" +)); + + puts(_( +" Operation modifiers:\n" +"\n" +" -k, --keep keep (don't delete) input files\n" +" -f, --force force overwrite of output file and (de)compress links\n" +" -c, --stdout write to standard output and don't delete input files\n" +" -S, --suffix=.SUF use suffix `.SUF' on compressed files instead of `.lzma'\n" +" -F, --format=FMT file format to encode or decode; possible values are\n" +" `auto', `native', `single', `multi', and `alone'\n" +" --files=[FILE] read filenames to process from FILE; if FILE is\n" +" omitted, filenames are read from the standard input;\n" +" filenames must be terminated with the newline character\n" +" --files0=[FILE] like --files but use the nul byte as terminator\n" +)); + + puts(_( +" Compression presets and basic compression options:\n" +"\n" +" -1 .. -2 fast compression\n" +" -3 .. -6 good compression\n" +" -7 .. -9 excellent compression, but needs a lot of memory;\n" +" default is -7 if memory limit allows\n" +"\n" +" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n" +" or `sha256'\n" +)); + + puts(_( +" Custom filter chain for compression (alternative for using presets):\n" +"\n" +" --lzma=[OPTS] LZMA filter; OPTS is a comma-separated list of zero or\n" +" more of the following options (valid values; default):\n" +" dict=NUM dictionary size in bytes (1 - 1Gi; 8Mi)\n" +" lc=NUM number of literal context bits (0-8; 3)\n" +" lp=NUM number of literal position bits (0-4; 0)\n" +" pb=NUM number of position bits (0-4; 2)\n" +" mode=MODE compression mode (`fast' or `best'; `best')\n" +" fb=NUM number of fast bytes (5-273; 128)\n" +" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" +" mfc=NUM match finder cycles; 0=automatic (default)\n" +"\n" +" --x86 x86 filter (sometimes called BCJ filter)\n" +" --powerpc PowerPC (big endian) filter\n" +" --ia64 IA64 (Itanium) filter\n" +" --arm ARM filter\n" +" --armthumb ARM-Thumb filter\n" +" --sparc SPARC filter\n" +"\n" +" --copy No filtering (useful only when specified alone)\n" +" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n" +" size=NUM number of bytes of data per subblock\n" +" (1 - 256Mi; 4Ki)\n" +" rle=NUM run-length encoder chunk size (0-256; 0)\n" +)); + +/* +These aren't implemented yet. + + puts(_( +" Metadata options:\n" +"\n" +" -N, --name save or restore the original filename and time stamp\n" +" -n, --no-name do not save or restore filename and time stamp (default)\n" +" -S, --sign=KEY sign the data with GnuPG when compressing, or verify\n" +" the signature when decompressing\n")); +*/ + + puts(_( +" Resource usage options:\n" +"\n" +" -M, --memory=NUM use roughly NUM bytes of memory at maximum\n" +" -T, --threads=NUM use at maximum of NUM (de)compression threads\n" +// " --threading=STR threading style; possible values are `auto' (default),\n" +// " `files', and `stream' +)); + + puts(_( +" Other options:\n" +"\n" +" -q, --quiet suppress warnings; specify twice to suppress errors too\n" +" -v, --verbose be verbose; specify twice for even more verbose\n" +"\n" +" -h, --help display this help and exit\n" +" -V, --version display version and license information and exit\n")); + + puts(_("With no FILE, or when FILE is -, read standard input.\n")); + + size_t mem_limit = opt_memory / (1024 * 1024); + if (mem_limit == 0) + mem_limit = 1; + + puts(_("On this system and configuration, the tool will use")); + printf(_(" * roughly %zu MiB of memory at maximum; and\n"), + mem_limit); + printf(N_( + " * at maximum of one thread for (de)compression.\n\n", + " * at maximum of %zu threads for (de)compression.\n\n", + opt_threads), opt_threads); + + printf(_("Report bugs to <%s> (in English or Finnish).\n"), + PACKAGE_BUGREPORT); + + my_exit(SUCCESS); +} + + +extern void lzma_attribute((noreturn)) +show_version(void) +{ + printf( +"lzma (LZMA Utils) " PACKAGE_VERSION "\n" +"\n" +"Copyright (C) 1999-2006 Igor Pavlov\n" +"Copyright (C) 2007 Lasse Collin\n" +"\n" +"This program is free software; you can redistribute it and/or modify\n" +"it under the terms of the GNU General Public License as published by\n" +"the Free Software Foundation; either version 2 of the License, or\n" +"(at your option) any later version.\n" +"\n" +"This program is distributed in the hope that it will be useful,\n" +"but WITHOUT ANY WARRANTY; without even the implied warranty of\n" +"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" +"GNU General Public License for more details.\n" +"\n"); + my_exit(SUCCESS); +} diff --git a/src/lzma/help.h b/src/lzma/help.h new file mode 100644 index 00000000..659c66a0 --- /dev/null +++ b/src/lzma/help.h @@ -0,0 +1,32 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file help.h +/// \brief Help messages +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef HELP_H +#define HELP_H + +#include "private.h" + + +extern void show_try_help(void); + +extern void show_help(void) lzma_attribute((noreturn)); + +extern void show_version(void) lzma_attribute((noreturn)); + +#endif diff --git a/src/lzma/io.c b/src/lzma/io.c new file mode 100644 index 00000000..a7683fcc --- /dev/null +++ b/src/lzma/io.c @@ -0,0 +1,664 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file io.c +/// \brief File opening, unlinking, and closing +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) +# include <sys/time.h> +#endif + +#ifndef O_SEARCH +# define O_SEARCH O_RDONLY +#endif + + +/// \brief Number of open file_pairs +/// +/// Once the main() function has requested processing of all files, +/// we wait that open_pairs drops back to zero. Then it is safe to +/// exit from the program. +static size_t open_pairs = 0; + + +/// \brief mutex for file system operations +/// +/// All file system operations are done via the functions in this file. +/// They use fchdir() to avoid some race conditions (more portable than +/// openat() & co.). +/// +/// Synchronizing all file system operations shouldn't affect speed notably, +/// since the actual reading from and writing to files is done in parallel. +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + +/// This condition is invoked when a file is closed and the value of +/// the open_files variable has dropped to zero. The only listener for +/// this condition is io_finish() which is called from main(). +static pthread_cond_t io_cond = PTHREAD_COND_INITIALIZER; + + +/// True when stdout is being used by some thread +static bool stdout_in_use = false; + + +/// This condition is signalled when a thread releases stdout (no longer +/// writes data to it). +static pthread_cond_t stdout_cond = PTHREAD_COND_INITIALIZER; + + +/// \brief Directory where we were started +/// +/// This is needed when a new file, whose name was given on command line, +/// is opened. +static int start_dir; + + +static uid_t uid; +static gid_t gid; + + +extern void +io_init(void) +{ + start_dir = open(".", O_SEARCH | O_NOCTTY); + if (start_dir == -1) { + errmsg(V_ERROR, _("Cannot get file descriptor of the current " + "directory: %s"), strerror(errno)); + my_exit(ERROR); + } + + uid = getuid(); + gid = getgid(); + + return; +} + + +/// Waits until the number of open file_pairs has dropped to zero. +extern void +io_finish(void) +{ + pthread_mutex_lock(&mutex); + + while (open_pairs != 0) + pthread_cond_wait(&io_cond, &mutex); + + (void)close(start_dir); + + pthread_mutex_unlock(&mutex); + + return; +} + + +/// \brief Unlinks a file +/// +/// \param dir_fd File descriptor of the directory containing the file +/// \param name Name of the file with or without path +/// +/// \return Zero on success. On error, -1 is returned and errno set. +/// +static void +io_unlink(int dir_fd, const char *name, ino_t ino) +{ + const char *base = str_filename(name); + if (base == NULL) { + // This shouldn't happen. + errmsg(V_ERROR, _("%s: Invalid filename"), name); + return; + } + + pthread_mutex_lock(&mutex); + + if (fchdir(dir_fd)) { + errmsg(V_ERROR, _("Cannot change directory: %s"), + strerror(errno)); + } else { + struct stat st; + if (lstat(base, &st) || st.st_ino != ino) + errmsg(V_ERROR, _("%s: File seems to be moved, " + "not removing"), name); + + // There's a race condition between lstat() and unlink() + // but at least we have tried to avoid removing wrong file. + else if (unlink(base)) + errmsg(V_ERROR, _("%s: Cannot remove: %s"), + name, strerror(errno)); + } + + pthread_mutex_unlock(&mutex); + + return; +} + + +/// \brief Copies owner/group and permissions +/// +/// \todo ACL and EA support +/// +static void +io_copy_attrs(const file_pair *pair) +{ + // This function is more tricky than you may think at first. + // Blindly copying permissions may permit users to access the + // destination file who didn't have permission to access the + // source file. + + if (uid == 0 && fchown(pair->dest_fd, pair->src_st.st_uid, -1)) + errmsg(V_WARNING, _("%s: Cannot set the file owner: %s"), + pair->dest_name, strerror(errno)); + + mode_t mode; + + if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { + errmsg(V_WARNING, _("%s: Cannot set the file group: %s"), + pair->dest_name, strerror(errno)); + // We can still safely copy some additional permissions: + // `group' must be at least as strict as `other' and + // also vice versa. + // + // NOTE: After this, the owner of the source file may + // get additional permissions. This shouldn't be too bad, + // because the owner would have had permission to chmod + // the original file anyway. + mode = ((pair->src_st.st_mode & 0070) >> 3) + & (pair->src_st.st_mode & 0007); + mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; + } else { + // Drop the setuid, setgid, and sticky bits. + mode = pair->src_st.st_mode & 0777; + } + + if (fchmod(pair->dest_fd, mode)) + errmsg(V_WARNING, _("%s: Cannot set the file permissions: %s"), + pair->dest_name, strerror(errno)); + + // Copy the timestamps only if we have a secure function to do it. +#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) + struct timeval tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[1].tv_sec = pair->src_st.st_mtime; + +# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) + tv[0].tv_usec = pair->src_st.st_atim.tv_nsec / 1000; +# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) + tv[0].tv_usec = pair->src_st.st_atimespec.tv_nsec / 1000; +# else + tv[0].tv_usec = 0; +# endif + +# if defined(HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC) + tv[1].tv_usec = pair->src_st.st_mtim.tv_nsec / 1000; +# elif defined(HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC) + tv[1].tv_usec = pair->src_st.st_mtimespec.tv_nsec / 1000; +# else + tv[1].tv_usec = 0; +# endif + +# ifdef HAVE_FUTIMES + (void)futimes(pair->dest_fd, tv); +# else + (void)futimesat(pair->dest_fd, NULL, tv); +# endif +#endif + + return; +} + + +/// Opens and changes into the directory containing the source file. +static int +io_open_dir(file_pair *pair) +{ + if (pair->src_name == stdin_filename) + return 0; + + if (fchdir(start_dir)) { + errmsg(V_ERROR, _("Cannot change directory: %s"), + strerror(errno)); + return -1; + } + + const char *split = strrchr(pair->src_name, '/'); + if (split == NULL) { + pair->dir_fd = start_dir; + } else { + // Copy also the slash. It's needed to support filenames + // like "/foo" (dirname being "/"), and it never hurts anyway. + const size_t dirname_len = split - pair->src_name + 1; + char dirname[dirname_len + 1]; + memcpy(dirname, pair->src_name, dirname_len); + dirname[dirname_len] = '\0'; + + // Open the directory and change into it. + pair->dir_fd = open(dirname, O_SEARCH | O_NOCTTY); + if (pair->dir_fd == -1 || fchdir(pair->dir_fd)) { + errmsg(V_ERROR, _("%s: Cannot open the directory " + "containing the file: %s"), + pair->src_name, strerror(errno)); + (void)close(pair->dir_fd); + return -1; + } + } + + return 0; +} + + +static void +io_close_dir(file_pair *pair) +{ + if (pair->dir_fd != start_dir) + (void)close(pair->dir_fd); + + return; +} + + +/// Opens the source file. The file is opened using the plain filename without +/// path, thus the file must be in the current working directory. This is +/// ensured because io_open_dir() is always called before this function. +static int +io_open_src(file_pair *pair) +{ + if (pair->src_name == stdin_filename) { + pair->src_fd = STDIN_FILENO; + } else { + // Strip the pathname. Thanks to io_open_dir(), the file + // is now in the current working directory. + const char *filename = str_filename(pair->src_name); + if (filename == NULL) + return -1; + + // Symlinks are followed if --stdout or --force has been + // specified. + const bool follow_symlinks = opt_stdout || opt_force; + pair->src_fd = open(filename, O_RDONLY | O_NOCTTY + | (follow_symlinks ? 0 : O_NOFOLLOW)); + if (pair->src_fd == -1) { + // Give an understandable error message in if reason + // for failing was that the file was a symbolic link. + // - Linux, OpenBSD, Solaris: ELOOP + // - FreeBSD: EMLINK + // - Tru64: ENOTSUP + // It seems to be safe to check for all these, since + // those errno values aren't used for other purporses + // on any of the listed operating system *when* the + // above flags are used with open(). + if (!follow_symlinks + && (errno == ELOOP +#ifdef EMLINK + || errno == EMLINK +#endif +#ifdef ENOTSUP + || errno == ENOTSUP +#endif + )) { + errmsg(V_WARNING, _("%s: Is a symbolic link, " + "skipping"), pair->src_name); + } else { + errmsg(V_ERROR, "%s: %s", pair->src_name, + strerror(errno)); + } + + return -1; + } + + if (fstat(pair->src_fd, &pair->src_st)) { + errmsg(V_ERROR, "%s: %s", pair->src_name, + strerror(errno)); + goto error; + } + + if (S_ISDIR(pair->src_st.st_mode)) { + errmsg(V_WARNING, _("%s: Is a directory, skipping"), + pair->src_name); + goto error; + } + + if (!opt_stdout) { + if (!opt_force && !S_ISREG(pair->src_st.st_mode)) { + errmsg(V_WARNING, _("%s: Not a regular file, " + "skipping"), pair->src_name); + goto error; + } + + if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { + // Setuid and setgid files are rejected even + // with --force. This is good for security + // (hopefully) but it's a bit weird to reject + // file when --force was given. At least this + // matches gzip's behavior. + errmsg(V_WARNING, _("%s: File has setuid or " + "setgid bit set, skipping"), + pair->src_name); + goto error; + } + + if (!opt_force && (pair->src_st.st_mode & S_ISVTX)) { + errmsg(V_WARNING, _("%s: File has sticky bit " + "set, skipping"), + pair->src_name); + goto error; + } + + if (pair->src_st.st_nlink > 1) { + errmsg(V_WARNING, _("%s: Input file has more " + "than one hard link, " + "skipping"), pair->src_name); + goto error; + } + } + } + + return 0; + +error: + (void)close(pair->src_fd); + return -1; +} + + +/// \brief Closes source file of the file_pair structure +/// +/// \param pair File whose src_fd should be closed +/// \param success If true, the file will be removed from the disk if +/// closing succeeds and --keep hasn't been used. +static void +io_close_src(file_pair *pair, bool success) +{ + if (pair->src_fd == STDIN_FILENO || pair->src_fd == -1) + return; + + if (close(pair->src_fd)) { + errmsg(V_ERROR, _("%s: Closing the file failed: %s"), + pair->src_name, strerror(errno)); + } else if (success && !opt_keep_original) { + io_unlink(pair->dir_fd, pair->src_name, pair->src_st.st_ino); + } + + return; +} + + +static int +io_open_dest(file_pair *pair) +{ + if (opt_stdout || pair->src_fd == STDIN_FILENO) { + // We don't modify or free() this. + pair->dest_name = (char *)"(stdout)"; + pair->dest_fd = STDOUT_FILENO; + + // Synchronize the order in which files get written to stdout. + // Unlocking the mutex is safe, because opening the file_pair + // can no longer fail. + while (stdout_in_use) + pthread_cond_wait(&stdout_cond, &mutex); + + stdout_in_use = true; + + } else { + pair->dest_name = get_dest_name(pair->src_name); + if (pair->dest_name == NULL) + return -1; + + // This cannot fail, because get_dest_name() doesn't return + // invalid names. + const char *filename = str_filename(pair->dest_name); + assert(filename != NULL); + + pair->dest_fd = open(filename, O_WRONLY | O_NOCTTY | O_CREAT + | (opt_force ? O_TRUNC : O_EXCL), + S_IRUSR | S_IWUSR); + if (pair->dest_fd == -1) { + errmsg(V_ERROR, "%s: %s", pair->dest_name, + strerror(errno)); + free(pair->dest_name); + return -1; + } + + // If this really fails... well, we have a safe fallback. + struct stat st; + if (fstat(pair->dest_fd, &st)) + pair->dest_ino = 0; + else + pair->dest_ino = st.st_ino; + } + + return 0; +} + + +/// \brief Closes destination file of the file_pair structure +/// +/// \param pair File whose dest_fd should be closed +/// \param success If false, the file will be removed from the disk. +/// +/// \return Zero if closing succeeds. On error, -1 is returned and +/// error message printed. +static int +io_close_dest(file_pair *pair, bool success) +{ + if (pair->dest_fd == -1) + return 0; + + if (pair->dest_fd == STDOUT_FILENO) { + stdout_in_use = false; + pthread_cond_signal(&stdout_cond); + return 0; + } + + if (close(pair->dest_fd)) { + errmsg(V_ERROR, _("%s: Closing the file failed: %s"), + pair->dest_name, strerror(errno)); + + // Closing destination file failed, so we cannot trust its + // contents. Get rid of junk: + io_unlink(pair->dir_fd, pair->dest_name, pair->dest_ino); + free(pair->dest_name); + return -1; + } + + // If the operation using this file wasn't successful, we git rid + // of the junk file. + if (!success) + io_unlink(pair->dir_fd, pair->dest_name, pair->dest_ino); + + free(pair->dest_name); + + return 0; +} + + +extern file_pair * +io_open(const char *src_name) +{ + if (is_empty_filename(src_name)) + return NULL; + + file_pair *pair = malloc(sizeof(file_pair)); + if (pair == NULL) { + out_of_memory(); + return NULL; + } + + *pair = (file_pair){ + .src_name = src_name, + .dest_name = NULL, + .dir_fd = -1, + .src_fd = -1, + .dest_fd = -1, + .src_eof = false, + }; + + pthread_mutex_lock(&mutex); + + ++open_pairs; + + if (io_open_dir(pair)) + goto error_dir; + + if (io_open_src(pair)) + goto error_src; + + if (user_abort || io_open_dest(pair)) + goto error_dest; + + pthread_mutex_unlock(&mutex); + + return pair; + +error_dest: + io_close_src(pair, false); +error_src: + io_close_dir(pair); +error_dir: + --open_pairs; + pthread_mutex_unlock(&mutex); + free(pair); + return NULL; +} + + +/// \brief Closes the file descriptors and frees the structure +extern void +io_close(file_pair *pair, bool success) +{ + if (success && pair->dest_fd != STDOUT_FILENO) + io_copy_attrs(pair); + + // Close the destination first. If it fails, we must not remove + // the source file! + if (!io_close_dest(pair, success)) { + // Closing destination file succeeded. Remove the source file + // if the operation using this file pair was successful + // and we haven't been requested to keep the source file. + io_close_src(pair, success); + } else { + // We don't care if operation using this file pair was + // successful or not, since closing the destination file + // failed. Don't remove the original file. + io_close_src(pair, false); + } + + io_close_dir(pair); + + free(pair); + + pthread_mutex_lock(&mutex); + + if (--open_pairs == 0) + pthread_cond_signal(&io_cond); + + pthread_mutex_unlock(&mutex); + + return; +} + + +/// \brief Reads from a file to a buffer +/// +/// \param pair File pair having the sourcefile open for reading +/// \param buf Destination buffer to hold the read data +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, number of bytes read is returned. On end of +/// file zero is returned and pair->src_eof set to true. +/// On error, SIZE_MAX is returned and error message printed. +/// +/// \note This does no locking, thus two threads must not read from +/// the same file. This no problem in this program. +extern size_t +io_read(file_pair *pair, uint8_t *buf, size_t size) +{ + // We use small buffers here. + assert(size < SSIZE_MAX); + + size_t left = size; + + while (left > 0) { + const ssize_t amount = read(pair->src_fd, buf, left); + + if (amount == 0) { + pair->src_eof = true; + break; + } + + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return SIZE_MAX; + + continue; + } + + errmsg(V_ERROR, _("%s: Read error: %s"), + pair->src_name, strerror(errno)); + + // FIXME Is this needed? + pair->src_eof = true; + + return SIZE_MAX; + } + + buf += (size_t)(amount); + left -= (size_t)(amount); + } + + return size - left; +} + + +/// \brief Writes a buffer to a file +/// +/// \param pair File pair having the destination file open for writing +/// \param buf Buffer containing the data to be written +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, zero is returned. On error, -1 is returned +/// and error message printed. +/// +/// \note This does no locking, thus two threads must not write to +/// the same file. This no problem in this program. +extern int +io_write(const file_pair *pair, const uint8_t *buf, size_t size) +{ + assert(size < SSIZE_MAX); + + while (size > 0) { + const ssize_t amount = write(pair->dest_fd, buf, size); + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return -1; + + continue; + } + + errmsg(V_ERROR, _("%s: Write error: %s"), + pair->dest_name, strerror(errno)); + return -1; + } + + buf += (size_t)(amount); + size -= (size_t)(amount); + } + + return 0; +} diff --git a/src/lzma/io.h b/src/lzma/io.h new file mode 100644 index 00000000..d1aa17f4 --- /dev/null +++ b/src/lzma/io.h @@ -0,0 +1,60 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file io.h +/// \brief I/O types and functions +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef IO_H +#define IO_H + +#include "private.h" + +#if BUFSIZ <= 1024 +# define IO_BUFFER_SIZE 8192 +#else +# define IO_BUFFER_SIZE BUFSIZ +#endif + + +typedef struct { + const char *src_name; + char *dest_name; + + int dir_fd; + int src_fd; + int dest_fd; + + struct stat src_st; + ino_t dest_ino; + + bool src_eof; +} file_pair; + + +extern void io_init(void); + +extern void io_finish(void); + +extern file_pair *io_open(const char *src_name); + +extern void io_close(file_pair *pair, bool success); + +extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size); + +extern int io_write(const file_pair *pair, const uint8_t *buf, size_t size); + + +#endif diff --git a/src/lzma/list.c b/src/lzma/list.c new file mode 100644 index 00000000..61eb5702 --- /dev/null +++ b/src/lzma/list.c @@ -0,0 +1,477 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.c +/// \brief Listing information about .lzma files +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/* + +1. Check the file type: native, alone, unknown + +Alone: +1. Show info about header. Don't look for concatenated parts. + +Native: +1. Check that Stream Header is valid. +2. Seek to the end of the file. +3. Skip padding. +4. Reverse decode Stream Footer. +5. Seek Backward Size bytes. +6. + +*/ + + +static void +unsupported_file(file_handle *handle) +{ + errmsg(V_ERROR, "%s: Unsupported file type", handle->name); + set_exit_status(ERROR); + (void)io_close(handle); + return; +} + + +/// Primitive escaping function, that escapes only ASCII control characters. +static void +print_escaped(const uint8_t *str) +{ + while (*str != '\0') { + if (*str <= 0x1F || *str == 0x7F) + printf("\\x%02X", *str); + else + putchar(*str); + + ++str; + } + + return; +} + + +static void +list_native(file_handle *handle) +{ + lzma_stream strm = LZMA_STREAM_INIT; + lzma_stream_flags flags; + lzma_ret ret = lzma_stream_header_decoder(&strm, &flags); + +} + + +static void +list_alone(const listing_handle *handle) +{ + if (handle->buffer[0] > (4 * 5 + 4) * 9 + 8) { + unsupported_file(handle); + return; + } + + const unsigned int pb = handle->buffer[0] / (9 * 5); + handle->buffer[0] -= pb * 9 * 5; + const unsigned int lp = handle->buffer[0] / 9; + const unsigned int lc = handle->buffer[0] - lp * 9; + + uint32_t dict = 0; + for (size_t i = 1; i < 5; ++i) { + dict <<= 8; + dict |= header[i]; + } + + if (dict > LZMA_DICTIONARY_SIZE_MAX) { + unsupported_file(handle); + return; + } + + uint64_t uncompressed_size = 0; + for (size_t i = 5; i < 13; ++i) { + uncompressed_size <<= 8; + uncompressed_size |= header[i]; + } + + // Reject files with uncompressed size of 256 GiB or more. It's + // an arbitrary limit trying to avoid at least some false positives. + if (uncompressed_size != UINT64_MAX + && uncompressed_size >= (UINT64_C(1) << 38)) { + unsupported_file(handle); + return; + } + + if (verbosity < V_WARNING) { + printf("name="); + print_escaped(handle->name); + printf("\nformat=alone\n"); + + if (uncompressed_size == UINT64_MAX) + printf("uncompressed_size=unknown\n"); + else + printf("uncompressed_size=%" PRIu64 "\n", + uncompressed_size); + + printf("dict=%" PRIu32 "\n", dict); + + printf("lc=%u\nlp=%u\npb=%u\n\n", lc, lp, pb); + + } else { + printf("File name: "); + print_escaped(handle->name); + printf("\nFile format: LZMA_Alone\n") + + printf("Uncompressed size: "); + if (uncompressed_size == UINT64_MAX) + printf("unknown\n"); + else + printf("%," PRIu64 " bytes (%" PRIu64 " MiB)\n", + uncompressed_size, + (uncompressed_size + 1024 * 512) + / (1024 * 1024)); + + printf("Dictionary size: %," PRIu32 " bytes " + "(%" PRIu32 " MiB)\n", + dict, (dict + 1024 * 512) / (1024 * 1024)); + + printf("Literal context bits (lc): %u\n", lc); + printf("Literal position bits (lc): %u\n", lp); + printf("Position bits (pb): %u\n", pb); + } + + return; +} + + + + +typedef struct { + const char *filename; + struct stat st; + int fd; + + lzma_stream strm; + lzma_stream_flags stream_flags; + lzma_info *info; + + lzma_vli backward_size; + lzma_vli uncompressed_size; + + size_t buffer_size; + uint8_t buffer[IO_BUFFER_SIZE]; +} listing_handle; + + +static bool +listing_pread(listing_handle *handle, uint64_t offset) +{ + if (offset >= (uint64_t)(handle->st.st_size)) { + errmsg(V_ERROR, "%s: Trying to read past the end of " + "the file.", handle->filename); + return true; + } + +#ifdef HAVE_PREAD + const ssize_t ret = pread(handle->fd, handle->buffer, IO_BUFFER_SIZE, + (off_t)(offset)); +#else + // Use lseek() + read() since we don't have pread(). We don't care + // to which offset the reading position is left. + if (lseek(handle->fd, (off_t)(offset), SEEK_SET) == -1) { + errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno)); + return true; + } + + const ssize_t ret = read(handle->fd, handle->buffer, IO_BUFFER_SIZE); +#endif + + if (ret == -1) { + errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno)); + return true; + } + + if (ret == 0) { + errmsg(V_ERROR, "%s: Trying to read past the end of " + "the file.", handle->filename); + return true; + } + + handle->buffer_size = (size_t)(ret); + return false; +} + + + +static bool +parse_stream_header(listing_handle *handle) +{ + if (listing_pread(handle, 0)) + return true; + + // TODO Got enough input? + + lzma_ret ret = lzma_stream_header_decoder( + &handle->strm, &handle->stream_flags); + if (ret != LZMA_OK) { + errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); + return true; + } + + handle->strm.next_in = handle->buffer; + handle->strm.avail_in = handle->buffer_size; + ret = lzma_code(&handle->strm, LZMA_RUN); + if (ret != LZMA_STREAM_END) { + assert(ret != LZMA_OK); + errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); + return true; + } + + return false; +} + + +static bool +parse_stream_tail(listing_handle *handle) +{ + uint64_t offset = (uint64_t)(handle->st.st_size); + + // Skip padding + do { + if (offset == 0) { + errmsg(V_ERROR, "%s: %s", handle->name, + str_strm_error(LZMA_DATA_ERROR)); + return true; + } + + if (offset < IO_BUFFER_SIZE) + offset = 0; + else + offset -= IO_BUFFER_SIZE; + + if (listing_pread(handle, offset)) + return true; + + while (handle->buffer_size > 0 + && handle->buffer[handle->buffer_size - 1] + == '\0') + --handle->buffer_size; + + } while (handle->buffer_size == 0); + + if (handle->buffer_size < LZMA_STREAM_TAIL_SIZE) { + // TODO + } + + lzma_stream_flags stream_flags; + lzma_ret ret = lzma_stream_tail_decoder(&handle->strm, &stream_flags); + if (ret != LZMA_OK) { + errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); + return true; + } + + handle->strm.next_in = handle->buffer + handle->buffer_size + - LZMA_STREAM_TAIL_SIZE; + handle->strm.avail_in = LZMA_STREAM_TAIL_SIZE; + handle->buffer_size -= LZMA_STREAM_TAIL_SIZE; + ret = lzma_code(&handle->strm, LZMA_RUN); + if (ret != LZMA_OK) { + assert(ret != LZMA_OK); + errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); + return true; + } + + if (!lzma_stream_flags_is_equal(handle->stream_flags, stream_flags)) { + // TODO + // Possibly corrupt, possibly concatenated file. + } + + handle->backward_size = 0; + ret = lzma_vli_reverse_decode(&handle->backward_size, handle->buffer, + &handle->buffer_size); + if (ret != LZMA_OK) { + // It may be LZMA_BUF_ERROR too, but it doesn't make sense + // as an error message displayed to the user. + errmsg(V_ERROR, "%s: %s", handle->name, + str_strm_error(LZMA_DATA_ERROR)); + return true; + } + + if (!stream_flags.is_multi) { + handle->uncompressed_size = 0; + size_t tmp = handle->buffer_size; + ret = lzma_vli_reverse_decode(&handle->uncompressed_size, + handle->buffer, &tmp); + if (ret != LZMA_OK) + handle->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + } + + // Calculate the Header Metadata Block start offset. + + + return false; +} + + + +static void +list_native(listing_handle *handle) +{ + lzma_memory_limitter *limitter + = lzma_memory_limitter_create(opt_memory); + if (limitter == NULL) { + errmsg(V_ERROR, + } + lzma_info *info = + + + // Parse Stream Header + // + // Single-Block Stream: + // - Parse Block Header + // - Parse Stream Footer + // - If Backward Size doesn't match, error out + // + // Multi-Block Stream: + // - Parse Header Metadata Block, if any + // - Parse Footer Metadata Block + // - Parse Stream Footer + // - If Footer Metadata Block doesn't match the Stream, error out + // + // In other words, we don't support concatened files. + if (parse_stream_header(handle)) + return; + + if (parse_block_header(handle)) + return; + + if (handle->stream_flags.is_multi) { + if (handle->block_options.is_metadata) { + if (parse_metadata(handle) + return; + } + + if (my_seek(handle, + + } else { + if (handle->block_options.is_metadata) { + FILE_IS_CORRUPT(); + return; + } + + if (parse_stream_footer(handle)) + return; + + // If Uncompressed Size isn't present in Block Header, + // it must be present in Stream Footer. + if (handle->block_options.uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN + && handle->stream_flags.uncompressed_size + == LZMA_VLI_VALUE_UNKNOWN) { + FILE_IS_CORRUPT(); + return; + } + + // Construct a single-Record Index. + lzma_index *index = malloc(sizeof(lzma_index)); + if (index == NULL) { + out_of_memory(); + return; + } + + // Pohdintaa: + // Jos Block coder hoitaisi Uncompressed ja Backward Sizet, + // voisi index->total_sizeksi laittaa suoraan Backward Sizen. + index->total_size = + + if () { + + } + } + + + if (handle->block_options.is_metadata) { + if (!handle->stream_flags.is_multi) { + FILE_IS_CORRUPT(); + return; + } + + if (parse_metadata(handle)) + return; + + } +} + + + +extern void +list(const char *filename) +{ + if (strcmp(filename, "-") == 0) { + errmsg(V_ERROR, "%s: --list does not support reading from " + "standard input", filename); + return; + } + + if (is_empty_filename(filename)) + return; + + listing_handle handle; + handle.filename = filename; + + handle.fd = open(filename, O_RDONLY | O_NOCTTY); + if (handle.fd == -1) { + errmsg(V_ERROR, "%s: %s", filename, strerror(errno)); + return; + } + + if (fstat(handle.fd, &handle.st)) { + errmsg(V_ERROR, "%s: %s", filename, strerror(errno)); + goto out; + } + + if (!S_ISREG(handle.st.st_mode)) { + errmsg(V_WARNING, _("%s: Not a regular file, skipping"), + filename); + goto out; + } + + if (handle.st.st_size <= 0) { + errmsg(V_ERROR, _("%s: File is empty"), filename); + goto out; + } + + if (listing_pread(&handle, 0)) + goto out; + + if (handle.buffer[0] == 0xFF) { + if (opt_header == HEADER_ALONE) { + errmsg(V_ERROR, "%s: FIXME", filename); // FIXME + goto out; + } + + list_native(&handle); + } else { + if (opt_header != HEADER_AUTO && opt_header != HEADER_ALONE) { + errmsg(V_ERROR, "%s: FIXME", filename); // FIXME + goto out; + } + + list_alone(&handle); + } + +out: + (void)close(fd); + return; +} diff --git a/src/lzma/main.c b/src/lzma/main.c new file mode 100644 index 00000000..26edc47e --- /dev/null +++ b/src/lzma/main.c @@ -0,0 +1,254 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.c +/// \brief main() +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "open_stdxxx.h" +#include <ctype.h> + +static sig_atomic_t exit_signal = 0; + + +static void +signal_handler(int sig) +{ + // FIXME Is this thread-safe together with main()? + exit_signal = sig; + + user_abort = 1; + return; +} + + +static void +establish_signal_handlers(void) +{ + struct sigaction sa; + sa.sa_handler = &signal_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + + static const int sigs[] = { + SIGHUP, + SIGINT, + SIGPIPE, + SIGTERM, + SIGXCPU, + SIGXFSZ, + }; + + for (size_t i = 0; i < sizeof(sigs) / sizeof(sigs[0]); ++i) { + if (sigaction(sigs[i], &sa, NULL)) { + errmsg(V_ERROR, _("Cannot establish signal handlers")); + my_exit(ERROR); + } + } + + /* + SIGINFO/SIGUSR1 for status reporting? + */ +} + + +static bool +is_tty_stdin(void) +{ + const bool ret = isatty(STDIN_FILENO); + if (ret) { + // FIXME: Other threads may print between these lines. + // Maybe that should be fixed. Not a big issue in practice. + errmsg(V_ERROR, _("Compressed data not read from " + "a terminal.")); + errmsg(V_ERROR, _("Use `--force' to force decompression.")); + show_try_help(); + } + + return ret; +} + + +static bool +is_tty_stdout(void) +{ + const bool ret = isatty(STDOUT_FILENO); + if (ret) { + errmsg(V_ERROR, _("Compressed data not written to " + "a terminal.")); + errmsg(V_ERROR, _("Use `--force' to force decompression.")); + show_try_help(); + } + + return ret; +} + + +static char * +read_name(void) +{ + size_t size = 256; + size_t pos = 0; + char *name = malloc(size); + if (name == NULL) { + out_of_memory(); + return NULL; + } + + while (true) { + const int c = fgetc(opt_files_file); + if (c == EOF) { + free(name); + + if (ferror(opt_files_file)) + errmsg(V_ERROR, _("%s: Error reading " + "filenames: %s"), + opt_files_name, + strerror(errno)); + else if (pos != 0) + errmsg(V_ERROR, _("%s: Unexpected end of " + "input when reading " + "filenames"), opt_files_name); + + return NULL; + } + + if (c == '\0' || c == opt_files_split) + break; + + name[pos++] = c; + + if (pos == size) { + size *= 2; + char *tmp = realloc(name, size); + if (tmp == NULL) { + free(name); + out_of_memory(); + return NULL; + } + + name = tmp; + } + } + + if (name != NULL) + name[pos] = '\0'; + + return name; +} + + +int +main(int argc, char **argv) +{ + // Make sure that stdin, stdout, and and stderr are connected to + // a valid file descriptor. Exit immediatelly with exit code ERROR + // if we cannot make the file descriptors valid. Maybe we should + // print an error message, but our stderr could be screwed anyway. + open_stdxxx(ERROR); + + // Set the program invocation name used in various messages. + argv0 = argv[0]; + + setlocale(LC_ALL, "en_US.UTF-8"); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + + // Set hardware-dependent default values. These can be overriden + // on the command line, thus this must be done before parse_args(). + hardware_init(); + + char **files = parse_args(argc, argv); + + if (opt_mode == MODE_COMPRESS && opt_stdout && is_tty_stdout()) + return ERROR; + + if (opt_mode == MODE_COMPRESS) + lzma_init_encoder(); + else + lzma_init_decoder(); + + io_init(); + process_init(); + + if (opt_mode == MODE_LIST) { + errmsg(V_ERROR, "--list is not implemented yet."); + my_exit(ERROR); + } + + // Hook the signal handlers. We don't need these before we start + // the actual action, so this is done after parsing the command + // line arguments. + establish_signal_handlers(); + + while (*files != NULL && !user_abort) { + if (strcmp("-", *files) == 0) { + if (!opt_force) { + if (opt_mode == MODE_COMPRESS) { + if (is_tty_stdout()) { + ++files; + continue; + } + } else if (is_tty_stdin()) { + ++files; + continue; + } + } + + if (opt_files_name == stdin_filename) { + errmsg(V_ERROR, _("Cannot read data from " + "standard input when " + "reading filenames " + "from standard input")); + ++files; + continue; + } + + *files = (char *)stdin_filename; + } + + process_file(*files++); + } + + if (opt_files_name != NULL) { + while (true) { + char *name = read_name(); + if (name == NULL) + break; + + if (name[0] != '\0') + process_file(name); + + free(name); + } + + if (opt_files_name != stdin_filename) + (void)fclose(opt_files_file); + } + + io_finish(); + + if (exit_signal != 0) { + struct sigaction sa; + sa.sa_handler = SIG_DFL; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(exit_signal, &sa, NULL); + raise(exit_signal); + } + + my_exit(exit_status); +} diff --git a/src/lzma/options.c b/src/lzma/options.c new file mode 100644 index 00000000..2928aafc --- /dev/null +++ b/src/lzma/options.c @@ -0,0 +1,346 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.c +/// \brief Parser for filter-specific options +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/////////////////// +// Generic stuff // +/////////////////// + +typedef struct { + const char *name; + uint64_t id; +} name_id_map; + + +typedef struct { + const char *name; + const name_id_map *map; + uint64_t min; + uint64_t max; +} option_map; + + +/// Parses option=value pairs that are separated with colons, semicolons, +/// or commas: opt=val:opt=val;opt=val,opt=val +/// +/// Each option is a string, that is converted to an integer using the +/// index where the option string is in the array. +/// +/// Value can be either a number with minimum and maximum value limit, or +/// a string-id map mapping a list of possible string values to integers. +/// +/// When parsing both option and value succeed, a filter-specific function +/// is called, which should update the given value to filter-specific +/// options structure. +/// +/// \param str String containing the options from the command line +/// \param opts Filter-specific option map +/// \param set Filter-specific function to update filter_options +/// \param filter_options Pointer to filter-specific options structure +/// +/// \return Returns only if no errors occur. +/// +static void +parse_options(const char *str, const option_map *opts, + void (*set)(void *filter_options, + uint32_t key, uint64_t value), + void *filter_options) +{ + if (str == NULL || str[0] == '\0') + return; + + char *s = xstrdup(str); + char *name = s; + + while (true) { + char *split = strchr(name, ','); + if (split != NULL) + *split = '\0'; + + char *value = strchr(name, '='); + if (value != NULL) + *value++ = '\0'; + + if (value == NULL || value[0] == '\0') { + errmsg(V_ERROR, _("%s: Options must be `name=value' " + "pairs separated with commas"), + str); + my_exit(ERROR); + } + + // Look for the option name from the option map. + bool found = false; + for (size_t i = 0; opts[i].name != NULL; ++i) { + if (strcmp(name, opts[i].name) != 0) + continue; + + if (opts[i].map == NULL) { + // value is an integer. + const uint64_t v = str_to_uint64(name, value, + opts[i].min, opts[i].max); + set(filter_options, i, v); + } else { + // value is a string which we should map + // to an integer. + size_t j; + for (j = 0; opts[i].map[j].name != NULL; ++j) { + if (strcmp(opts[i].map[j].name, value) + == 0) + break; + } + + if (opts[i].map[j].name == NULL) { + errmsg(V_ERROR, _("%s: Invalid option " + "value"), value); + my_exit(ERROR); + } + + set(filter_options, i, j); + } + + found = true; + break; + } + + if (!found) { + errmsg(V_ERROR, _("%s: Invalid option name"), name); + my_exit(ERROR); + } + + if (split == NULL) + break; + + name = split + 1; + } + + free(s); + return; +} + + +////////////// +// Subblock // +////////////// + +enum { + OPT_SIZE, + OPT_RLE, + OPT_ALIGN, +}; + + +static void +set_subblock(void *options, uint32_t key, uint64_t value) +{ + lzma_options_subblock *opt = options; + + switch (key) { + case OPT_SIZE: + opt->subblock_data_size = value; + break; + + case OPT_RLE: + opt->rle = value; + break; + + case OPT_ALIGN: + opt->alignment = value; + break; + } +} + + +extern lzma_options_subblock * +parse_options_subblock(const char *str) +{ + static const option_map opts[] = { + { "size", NULL, LZMA_SUBBLOCK_DATA_SIZE_MIN, + LZMA_SUBBLOCK_DATA_SIZE_MAX }, + { "rle", NULL, LZMA_SUBBLOCK_RLE_OFF, + LZMA_SUBBLOCK_RLE_MAX }, + { "align",NULL, LZMA_SUBBLOCK_ALIGNMENT_MIN, + LZMA_SUBBLOCK_ALIGNMENT_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_subblock *options + = xmalloc(sizeof(lzma_options_subblock)); + *options = (lzma_options_subblock){ + .allow_subfilters = false, + .alignment = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT, + .subblock_data_size = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT, + .rle = LZMA_SUBBLOCK_RLE_OFF, + }; + + parse_options(str, opts, &set_subblock, options); + + return options; +} + + +/////////// +// Delta // +/////////// + +enum { + OPT_DISTANCE, +}; + + +static void +set_delta(void *options, uint32_t key, uint64_t value) +{ + lzma_options_delta *opt = options; + switch (key) { + case OPT_DISTANCE: + opt->distance = value; + break; + } +} + + +extern lzma_options_delta * +parse_options_delta(const char *str) +{ + static const option_map opts[] = { + { "distance", NULL, LZMA_DELTA_DISTANCE_MIN, + LZMA_DELTA_DISTANCE_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_delta *options = xmalloc(sizeof(lzma_options_subblock)); + *options = (lzma_options_delta){ + // It's hard to give a useful default for this. + .distance = LZMA_DELTA_DISTANCE_MIN, + }; + + parse_options(str, opts, &set_delta, options); + + return options; +} + + +////////// +// LZMA // +////////// + +enum { + OPT_DICT, + OPT_LC, + OPT_LP, + OPT_PB, + OPT_MODE, + OPT_FB, + OPT_MF, + OPT_MC +}; + + +static void +set_lzma(void *options, uint32_t key, uint64_t value) +{ + lzma_options_lzma *opt = options; + + switch (key) { + case OPT_DICT: + opt->dictionary_size = value; + break; + + case OPT_LC: + opt->literal_context_bits = value; + break; + + case OPT_LP: + opt->literal_pos_bits = value; + break; + + case OPT_PB: + opt->pos_bits = value; + break; + + case OPT_MODE: + opt->mode = value; + break; + + case OPT_FB: + opt->fast_bytes = value; + break; + + case OPT_MF: + opt->match_finder = value; + break; + + case OPT_MC: + opt->match_finder_cycles = value; + break; + } +} + + +extern lzma_options_lzma * +parse_options_lzma(const char *str) +{ + static const name_id_map modes[] = { + { "fast", LZMA_MODE_FAST }, + { "best", LZMA_MODE_BEST }, + { NULL, 0 } + }; + + static const name_id_map mfs[] = { + { "hc3", LZMA_MF_HC3 }, + { "hc4", LZMA_MF_HC4 }, + { "bt2", LZMA_MF_BT2 }, + { "bt3", LZMA_MF_BT3 }, + { "bt4", LZMA_MF_BT4 }, + { NULL, 0 } + }; + + static const option_map opts[] = { + { "dict", NULL, LZMA_DICTIONARY_SIZE_MIN, + LZMA_DICTIONARY_SIZE_MAX }, + { "lc", NULL, LZMA_LITERAL_CONTEXT_BITS_MIN, + LZMA_LITERAL_CONTEXT_BITS_MAX }, + { "lp", NULL, LZMA_LITERAL_POS_BITS_MIN, + LZMA_LITERAL_POS_BITS_MAX }, + { "pb", NULL, LZMA_POS_BITS_MIN, LZMA_POS_BITS_MAX }, + { "mode", modes, 0, 0 }, + { "fb", NULL, LZMA_FAST_BYTES_MIN, LZMA_FAST_BYTES_MAX }, + { "mf", mfs, 0, 0 }, + { "mc", NULL, 0, UINT32_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_lzma *options = xmalloc(sizeof(lzma_options_lzma)); + *options = (lzma_options_lzma){ + .dictionary_size = LZMA_DICTIONARY_SIZE_DEFAULT, + .literal_context_bits = LZMA_LITERAL_CONTEXT_BITS_DEFAULT, + .literal_pos_bits = LZMA_LITERAL_POS_BITS_DEFAULT, + .pos_bits = LZMA_POS_BITS_DEFAULT, + .mode = LZMA_MODE_BEST, + .fast_bytes = LZMA_FAST_BYTES_DEFAULT, + .match_finder = LZMA_MF_BT4, + .match_finder_cycles = 0, + }; + + parse_options(str, opts, &set_lzma, options); + + return options; +} diff --git a/src/lzma/options.h b/src/lzma/options.h new file mode 100644 index 00000000..885c5969 --- /dev/null +++ b/src/lzma/options.h @@ -0,0 +1,46 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.h +/// \brief Parser for filter-specific options +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef OPTIONS_H +#define OPTIONS_H + +#include "private.h" + + +/// \brief Parser for Subblock options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_subblock *parse_options_subblock(const char *str); + + +/// \brief Parser for Delta options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_delta *parse_options_delta(const char *str); + + +/// \brief Parser for LZMA options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_lzma *parse_options_lzma(const char *str); + +#endif diff --git a/src/lzma/private.h b/src/lzma/private.h new file mode 100644 index 00000000..89afac9b --- /dev/null +++ b/src/lzma/private.h @@ -0,0 +1,55 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file private.h +/// \brief Common includes, definions, and prototypes +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef PRIVATE_H +#define PRIVATE_H + +#include "sysdefs.h" + +#ifdef HAVE_ERRNO_H +# include <errno.h> +#else +extern int errno; +#endif + +#include <sys/stat.h> +#include <limits.h> +#include <signal.h> +#include <pthread.h> +#include <locale.h> +#include <stdio.h> +#include <fcntl.h> +#include <unistd.h> + +#include "gettext.h" +#define _(msgid) gettext(msgid) +#define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n) + +#include "alloc.h" +#include "args.h" +#include "error.h" +#include "hardware.h" +#include "help.h" +#include "io.h" +#include "options.h" +#include "process.h" +#include "suffix.h" +#include "util.h" + +#endif diff --git a/src/lzma/process.c b/src/lzma/process.c new file mode 100644 index 00000000..10a76b74 --- /dev/null +++ b/src/lzma/process.c @@ -0,0 +1,458 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file process.c +/// \brief Compresses or uncompresses a file +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +typedef struct { + lzma_stream strm; + void *options; + + file_pair *pair; + + /// We don't need this for *anything* but seems that at least with + /// glibc pthread_create() doesn't allow NULL. + pthread_t thread; + + bool in_use; + +} thread_data; + + +/// Number of available threads +static size_t free_threads; + +/// Thread-specific data +static thread_data *threads; + +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; + +/// Attributes of new coder threads. They are created in detached state. +/// Coder threads signal to the service thread themselves when they are done. +static pthread_attr_t thread_attr; + + +////////// +// Init // +////////// + +extern void +process_init(void) +{ + threads = malloc(sizeof(thread_data) * opt_threads); + if (threads == NULL) { + out_of_memory(); + my_exit(ERROR); + } + + for (size_t i = 0; i < opt_threads; ++i) + threads[i] = (thread_data){ + .strm = LZMA_STREAM_INIT_VAR, + .options = NULL, + .pair = NULL, + .in_use = false, + }; + + if (pthread_attr_init(&thread_attr) + || pthread_attr_setdetachstate( + &thread_attr, PTHREAD_CREATE_DETACHED)) { + out_of_memory(); + my_exit(ERROR); + } + + free_threads = opt_threads; + + return; +} + + +////////////////////////// +// Thread-specific data // +////////////////////////// + +static thread_data * +get_thread_data(void) +{ + pthread_mutex_lock(&mutex); + + while (free_threads == 0) { + pthread_cond_wait(&cond, &mutex); + + if (user_abort) { + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); + return NULL; + } + } + + thread_data *t = threads; + while (t->in_use) + ++t; + + t->in_use = true; + --free_threads; + + pthread_mutex_unlock(&mutex); + + return t; +} + + +static void +release_thread_data(thread_data *t) +{ + pthread_mutex_lock(&mutex); + + t->in_use = false; + ++free_threads; + + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); + + return; +} + + +static int +create_thread(void *(*func)(thread_data *t), thread_data *t) +{ + if (opt_threads == 1) { + func(t); + } else { + const int err = pthread_create(&t->thread, &thread_attr, + (void *(*)(void *))(func), t); + if (err) { + errmsg(V_ERROR, _("Cannot create a thread: %s"), + strerror(err)); + user_abort = 1; + return -1; + } + } + + return 0; +} + + +///////////////////////// +// One thread per file // +///////////////////////// + +static int +single_init(thread_data *t) +{ + lzma_ret ret; + + if (opt_mode == MODE_COMPRESS) { + const lzma_vli uncompressed_size + = t->pair->src_fd != STDIN_FILENO + ? (lzma_vli)(t->pair->src_st.st_size) + : LZMA_VLI_VALUE_UNKNOWN; + + // TODO Support Multi-Block Streams to store Extra. + if (opt_header == HEADER_ALONE) { + lzma_options_alone alone; + alone.uncompressed_size = uncompressed_size; + memcpy(&alone.lzma, opt_filters[0].options, + sizeof(alone.lzma)); + ret = lzma_alone_encoder(&t->strm, &alone); + } else { + lzma_options_stream stream = { + .check = opt_check, + .has_crc32 = true, + .uncompressed_size = uncompressed_size, + .alignment = 0, + }; + memcpy(stream.filters, opt_filters, + sizeof(stream.filters)); + ret = lzma_stream_encoder_single(&t->strm, &stream); + } + } else { + // TODO Restrict file format if requested on the command line. + ret = lzma_auto_decoder(&t->strm, NULL, NULL); + } + + if (ret != LZMA_OK) { + if (ret == LZMA_MEM_ERROR) + out_of_memory(); + else + internal_error(); + + return -1; + } + + return 0; +} + + +static lzma_ret +single_skip_padding(thread_data *t, uint8_t *in_buf) +{ + // Handle decoding of concatenated Streams. There can be arbitrary + // number of nul-byte padding between the Streams, which must be + // ignored. + // + // NOTE: Concatenating LZMA_Alone files works only if at least + // one of lc, lp, and pb is non-zero. Using the concatenation + // on LZMA_Alone files is strongly discouraged. + while (true) { + while (t->strm.avail_in > 0) { + if (*t->strm.next_in != '\0') + return LZMA_OK; + + ++t->strm.next_in; + --t->strm.avail_in; + } + + if (t->pair->src_eof) + return LZMA_STREAM_END; + + t->strm.next_in = in_buf; + t->strm.avail_in = io_read(t->pair, in_buf, BUFSIZ); + if (t->strm.avail_in == SIZE_MAX) + return LZMA_DATA_ERROR; + } +} + + +static void * +single(thread_data *t) +{ + if (single_init(t)) { + io_close(t->pair, false); + release_thread_data(t); + return NULL; + } + + uint8_t in_buf[BUFSIZ]; + uint8_t out_buf[BUFSIZ]; + lzma_action action = LZMA_RUN; + lzma_ret ret; + bool success = false; + + t->strm.avail_in = 0; + + while (!user_abort) { + if (t->strm.avail_in == 0 && !t->pair->src_eof) { + t->strm.next_in = in_buf; + t->strm.avail_in = io_read(t->pair, in_buf, BUFSIZ); + + if (t->strm.avail_in == SIZE_MAX) + break; + else if (t->pair->src_eof + && opt_mode == MODE_COMPRESS) + action = LZMA_FINISH; + } + + t->strm.next_out = out_buf; + t->strm.avail_out = BUFSIZ; + + ret = lzma_code(&t->strm, action); + + if (opt_mode != MODE_TEST) + if (io_write(t->pair, out_buf, + BUFSIZ - t->strm.avail_out)) + break; + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) { + if (opt_mode == MODE_COMPRESS) { + success = true; + break; + } + + // Support decoding concatenated .lzma files. + ret = single_skip_padding(t, in_buf); + + if (ret == LZMA_STREAM_END) { + assert(t->pair->src_eof); + success = true; + break; + } + + if (ret == LZMA_OK && !single_init(t)) + continue; + + break; + + } else { + errmsg(V_ERROR, "%s: %s", t->pair->src_name, + str_strm_error(ret)); + break; + } + } + } + + io_close(t->pair, success); + release_thread_data(t); + + return NULL; +} + + +/////////////////////////////// +// Multiple threads per file // +/////////////////////////////// + +// TODO + +// I'm not sure what would the best way to implement this. Here's one +// possible way: +// - Reader thread would read the input data and control the coders threads. +// - Every coder thread is associated with input and output buffer pools. +// The input buffer pool is filled by reader thread, and the output buffer +// pool is emptied by the writer thread. +// - Writer thread writes the output data of the oldest living coder thread. +// +// The per-file thread started by the application's main thread is used as +// the reader thread. In the beginning, it starts the writer thread and the +// first coder thread. The coder thread would be left waiting for input from +// the reader thread, and the writer thread would be waiting for input from +// the coder thread. +// +// The reader thread reads the input data into a ring buffer, whose size +// depends on the value returned by lzma_chunk_size(). If the ring buffer +// gets full, the buffer is marked "to be finished", which indicates to +// the coder thread that no more input is coming. Then a new coder thread +// would be started. +// +// TODO + +/* +typedef struct { + /// Buffers + uint8_t (*buffers)[BUFSIZ]; + + /// Number of buffers + size_t buffer_count; + + /// buffers[read_pos] is the buffer currently being read. Once finish + /// is true and read_pos == write_pos, end of input has been reached. + size_t read_pos; + + /// buffers[write_pos] is the buffer into which data is currently + /// being written. + size_t write_pos; + + /// This variable matters only when read_pos == write_pos && finish. + /// In that case, this variable will contain the size of the + /// buffers[read_pos]. + size_t last_size; + + /// True once no more data is being written to the buffer. When this + /// is set, the last_size variable must have been set too. + bool finish; + + /// Mutex to protect access to the variables in this structure + pthread_mutex_t mutex; + + /// Condition to indicate when another thread can continue + pthread_cond_t cond; +} mem_pool; + + +static foo +multi_reader(thread_data *t) +{ + bool done = false; + + do { + const size_t size = io_read(t->pair, + m->buffers + m->write_pos, BUFSIZ); + if (size == SIZE_MAX) { + // TODO + } else if (t->pair->src_eof) { + m->last_size = size; + } + + pthread_mutex_lock(&m->mutex); + + if (++m->write_pos == m->buffer_count) + m->write_pos = 0; + + if (m->write_pos == m->read_pos || t->pair->src_eof) + m->finish = true; + + pthread_cond_signal(&m->cond); + pthread_mutex_unlock(&m->mutex); + + } while (!m->finish); + + return done ? 0 : -1; +} + + +static foo +multi_code() +{ + lzma_action = LZMA_RUN; + + while (true) { + pthread_mutex_lock(&m->mutex); + + while (m->read_pos == m->write_pos && !m->finish) + pthread_cond_wait(&m->cond, &m->mutex); + + pthread_mutex_unlock(&m->mutex); + + if (m->finish) { + t->strm.avail_in = m->last_size; + if (opt_mode == MODE_COMPRESS) + action = LZMA_FINISH; + } else { + t->strm.avail_in = BUFSIZ; + } + + t->strm.next_in = m->buffers + m->read_pos; + + const lzma_ret ret = lzma_code(&t->strm, action); + + } +} + +*/ + + +/////////////////////// +// Starting new file // +/////////////////////// + +extern void +process_file(const char *filename) +{ + thread_data *t = get_thread_data(); + if (t == NULL) + return; // User abort + + // If this fails, it shows appropriate error messages too. + t->pair = io_open(filename); + if (t->pair == NULL) { + release_thread_data(t); + return; + } + + // TODO Currently only one-thread-per-file mode is implemented. + + if (create_thread(&single, t)) { + io_close(t->pair, false); + release_thread_data(t); + } + + return; +} diff --git a/src/lzma/process.h b/src/lzma/process.h new file mode 100644 index 00000000..7fdfbce6 --- /dev/null +++ b/src/lzma/process.h @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file process.c +/// \brief Compresses or uncompresses a file +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef PROCESS_H +#define PROCESS_H + +#include "private.h" + + +extern void process_init(void); + +extern void process_file(const char *filename); + +#endif diff --git a/src/lzma/suffix.c b/src/lzma/suffix.c new file mode 100644 index 00000000..57afce82 --- /dev/null +++ b/src/lzma/suffix.c @@ -0,0 +1,145 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.c +/// \brief Checks filename suffix and creates the destination filename +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +static const struct { + const char *compressed; + const char *uncompressed; +} suffixes[] = { + { ".lzma", "" }, + { ".tlz", ".tar" }, + { ".ylz", ".yar" }, + { NULL, NULL } +}; + + +/// \brief Checks if src_name has given compressed_suffix +/// +/// \param suffix Filename suffix to look for +/// \param src_name Input filename +/// \param src_len strlen(src_name) +/// +/// \return If src_name has the suffix, src_len - strlen(suffix) is +/// returned. It's always a positive integer. Otherwise zero +/// is returned. +static size_t +test_suffix(const char *suffix, const char *src_name, size_t src_len) +{ + const size_t suffix_len = strlen(suffix); + + // The filename must have at least one character in addition to + // the suffix. src_name may contain path to the filename, so we + // need to check for directory separator too. + if (src_len <= suffix_len || src_name[src_len - suffix_len - 1] == '/') + return 0; + + if (strcmp(suffix, src_name + src_len - suffix_len) == 0) + return src_len - suffix_len; + + return 0; +} + + +/// \brief Removes the filename suffix of the compressed file +/// +/// \return Name of the uncompressed file, or NULL if file has unknown +/// suffix. +static char * +uncompressed_name(const char *src_name) +{ + const char *new_suffix = ""; + const size_t src_len = strlen(src_name); + size_t new_len = 0; + + for (size_t i = 0; suffixes[i].compressed != NULL; ++i) { + new_len = test_suffix(suffixes[i].compressed, + src_name, src_len); + if (new_len != 0) { + new_suffix = suffixes[i].uncompressed; + break; + } + } + + if (new_len == 0 && opt_suffix != NULL) + new_len = test_suffix(opt_suffix, src_name, src_len); + + if (new_len == 0) { + errmsg(V_WARNING, _("%s: Filename has an unknown suffix, " + "skipping"), src_name); + return NULL; + } + + const size_t new_suffix_len = strlen(new_suffix); + char *dest_name = malloc(new_len + new_suffix_len + 1); + if (dest_name == NULL) { + out_of_memory(); + return NULL; + } + + memcpy(dest_name, src_name, new_len); + memcpy(dest_name + new_len, new_suffix, new_suffix_len); + dest_name[new_len + new_suffix_len] = '\0'; + + return dest_name; +} + + +/// \brief Appends suffix to src_name +static char * +compressed_name(const char *src_name) +{ + const size_t src_len = strlen(src_name); + + for (size_t i = 0; suffixes[i].compressed != NULL; ++i) { + if (test_suffix(suffixes[i].compressed, src_name, src_len) + != 0) { + errmsg(V_WARNING, _("%s: File already has `%s' " + "suffix, skipping"), src_name, + suffixes[i].compressed); + return NULL; + } + } + + const char *suffix = opt_suffix != NULL + ? opt_suffix : suffixes[0].compressed; + const size_t suffix_len = strlen(suffix); + + char *dest_name = malloc(src_len + suffix_len + 1); + if (dest_name == NULL) { + out_of_memory(); + return NULL; + } + + memcpy(dest_name, src_name, src_len); + memcpy(dest_name + src_len, suffix, suffix_len); + dest_name[src_len + suffix_len] = '\0'; + + return dest_name; +} + + +extern char * +get_dest_name(const char *src_name) +{ + return opt_mode == MODE_COMPRESS + ? compressed_name(src_name) + : uncompressed_name(src_name); +} diff --git a/src/lzma/suffix.h b/src/lzma/suffix.h new file mode 100644 index 00000000..08315659 --- /dev/null +++ b/src/lzma/suffix.h @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.h +/// \brief Checks filename suffix and creates the destination filename +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef SUFFIX_H +#define SUFFIX_H + +extern char *get_dest_name(const char *src_name); + +#endif diff --git a/src/lzma/util.c b/src/lzma/util.c new file mode 100644 index 00000000..6ef6eb0d --- /dev/null +++ b/src/lzma/util.c @@ -0,0 +1,182 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.c +/// \brief Miscellaneous utility functions +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/// \brief Fancy version of strtoull() +/// +/// \param name Name of the option to show in case of an error +/// \param value String containing the number to be parsed; may +/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi" +/// \param min Minimum valid value +/// \param max Maximum valid value +/// +/// \return Parsed value that is in the range [min, max]. Does not return +/// if an error occurs. +/// +extern uint64_t +str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) +{ + uint64_t result = 0; + + // Skip blanks. + while (*value == ' ' || *value == '\t') + ++value; + + if (*value < '0' || *value > '9') { + errmsg(V_ERROR, _("%s: Value is not a non-negative " + "decimal integer"), + value); + my_exit(ERROR); + } + + do { + // Don't overflow. + if (result > (UINT64_MAX - 9) / 10) + goto error; + + result *= 10; + result += *value - '0'; + ++value; + } while (*value >= '0' && *value <= '9'); + + if (*value != '\0') { + // Look for suffix. + static const struct { + const char *name; + uint64_t multiplier; + } suffixes[] = { + { "k", UINT64_C(1000) }, + { "M", UINT64_C(1000000) }, + { "G", UINT64_C(1000000000) }, + { "Ki", UINT64_C(1024) }, + { "Mi", UINT64_C(1048576) }, + { "Gi", UINT64_C(1073741824) }, + { NULL, 0 } + }; + + uint64_t multiplier = 0; + for (size_t i = 0; suffixes[i].name != NULL; ++i) { + if (strcmp(value, suffixes[i].name) == 0) { + multiplier = suffixes[i].multiplier; + break; + } + } + + if (multiplier == 0) { + errmsg(V_ERROR, _("%s: Invalid multiplier suffix. " + "Valid suffixes:"), value); + errmsg(V_ERROR, "`k' (10^3), `M' (10^6), `G' (10^9) " + "`Ki' (2^10), `Mi' (2^20), " + "`Gi' (2^30)"); + my_exit(ERROR); + } + + // Don't overflow here either. + if (result > UINT64_MAX / multiplier) + goto error; + + result *= multiplier; + } + + if (result < min || result > max) + goto error; + + return result; + +error: + errmsg(V_ERROR, _("Value of the option `%s' must be in the range " + "[%llu, %llu]"), name, + (unsigned long long)(min), + (unsigned long long)(max)); + my_exit(ERROR); +} + + +/// \brief Gets filename part from pathname+filename +/// +/// \return Pointer in the filename where the actual filename starts. +/// If the last character is a slash, NULL is returned. +/// +extern const char * +str_filename(const char *name) +{ + const char *base = strrchr(name, '/'); + + if (base == NULL) { + base = name; + } else if (*++base == '\0') { + base = NULL; + errmsg(V_ERROR, _("%s: Invalid filename"), name); + } + + return base; +} + + +/* +/// \brief Simple quoting to get rid of ASCII control characters +/// +/// This is not so cool and locale-dependent, but should be good enough +/// At least we don't print any control characters on the terminal. +/// +extern char * +str_quote(const char *str) +{ + size_t dest_len = 0; + bool has_ctrl = false; + + while (str[dest_len] != '\0') + if (*(unsigned char *)(str + dest_len++) < 0x20) + has_ctrl = true; + + char *dest = malloc(dest_len + 1); + if (dest != NULL) { + if (has_ctrl) { + for (size_t i = 0; i < dest_len; ++i) + if (*(unsigned char *)(str + i) < 0x20) + dest[i] = '?'; + else + dest[i] = str[i]; + + dest[dest_len] = '\0'; + + } else { + // Usually there are no control characters, + // so we can optimize. + memcpy(dest, str, dest_len + 1); + } + } + + return dest; +} +*/ + + +extern bool +is_empty_filename(const char *filename) +{ + if (filename[0] == '\0') { + errmsg(V_WARNING, _("Empty filename, skipping")); + return true; + } + + return false; +} diff --git a/src/lzma/util.h b/src/lzma/util.h new file mode 100644 index 00000000..91bd9ba3 --- /dev/null +++ b/src/lzma/util.h @@ -0,0 +1,32 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.h +/// \brief Miscellaneous utility functions +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef UTIL_H +#define UTIL_H + +#include "private.h" + +extern uint64_t str_to_uint64(const char *name, const char *value, + uint64_t min, uint64_t max); + +extern const char *str_filename(const char *filename); + +extern bool is_empty_filename(const char *filename); + +#endif diff --git a/src/lzmadec/Makefile.am b/src/lzmadec/Makefile.am new file mode 100644 index 00000000..63e391d5 --- /dev/null +++ b/src/lzmadec/Makefile.am @@ -0,0 +1,27 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This program is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +bin_PROGRAMS = lzmadec + +lzmadec_SOURCES = lzmadec.c +lzmadec_CPPFLAGS = \ + -I@top_srcdir@/src/common \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_builddir@/lib +lzmadec_LDFLAGS = -static +lzmadec_LDADD = @top_builddir@/src/liblzma/liblzma.la + +if COND_GNULIB +lzmadec_LDADD += @top_builddir@/lib/libgnu.a +endif diff --git a/src/lzmadec/lzmadec.c b/src/lzmadec/lzmadec.c new file mode 100644 index 00000000..93eed090 --- /dev/null +++ b/src/lzmadec/lzmadec.c @@ -0,0 +1,515 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzmadec.c +/// \brief Simple single-threaded tool to uncompress .lzma files +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" + +#ifdef HAVE_ERRNO_H +# include <errno.h> +#else +extern int errno; +#endif + +#include <stdio.h> +#include <unistd.h> + +#include "getopt.h" +#include "physmem.h" + + +enum return_code { + SUCCESS, + ERROR, + WARNING, +}; + + +enum format_type { + FORMAT_AUTO, + FORMAT_NATIVE, + FORMAT_ALONE, +}; + + +enum { + OPTION_FORMAT = INT_MIN, +}; + + +/// Input buffer +static uint8_t in_buf[BUFSIZ]; + +/// Output buffer +static uint8_t out_buf[BUFSIZ]; + +/// Decoder +static lzma_stream strm = LZMA_STREAM_INIT; + +/// Number of bytes to use memory at maximum +static size_t mem_limit; + +/// Memory allocation hooks +static lzma_allocator allocator = { + .alloc = (void *(*)(void *, size_t, size_t))(&lzma_memlimit_alloc), + .free = (void (*)(void *, void *))(&lzma_memlimit_free), + .opaque = NULL, +}; + +/// Program name to be shown in error messages +static const char *argv0; + +/// File currently being processed +static FILE *file; + +/// Name of the file currently being processed +static const char *filename; + +static enum return_code exit_status = SUCCESS; + +static enum format_type format_type = FORMAT_AUTO; + +static bool force = false; + + +static void lzma_attribute((noreturn)) +help(void) +{ + printf( +"Usage: %s [OPTION]... [FILE]...\n" +"Uncompress files in the .lzma format to the standard output.\n" +"\n" +" -c, --stdout (ignored)\n" +" -d, --decompress (ignored)\n" +" -k, --keep (ignored)\n" +" -f, --force allow reading compressed data from a terminal\n" +" -M, --memory=NUM use NUM bytes of memory at maximum; the suffixes\n" +" k, M, G, Ki, Mi, and Gi are supported.\n" +" --format=FMT accept only files in the given file format;\n" +" possible FMTs are `auto', `native', `single',\n" +" `multi', and `alone', of which `single' and `multi'\n" +" are aliases for `native'\n" +" -h, --help display this help and exit\n" +" -V, --version display version and license information and exit\n" +"\n" +"With no FILE, or when FILE is -, read standard input.\n" +"\n" +"On this configuration, the tool will use about %zu MiB of memory at maximum.\n" +"\n" +"Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n", + argv0, (mem_limit + 512 * 1024) / (1024 * 1024)); + exit(0); +} + + +static void lzma_attribute((noreturn)) +version(void) +{ + printf( +"lzmadec (LZMA Utils) " PACKAGE_VERSION "\n" +"\n" +"Copyright (C) 1999-2006 Igor Pavlov\n" +"Copyright (C) 2007 Lasse Collin\n" +"\n" +"This program is free software; you can redistribute it and/or\n" +"modify it under the terms of the GNU Lesser General Public\n" +"License as published by the Free Software Foundation; either\n" +"version 2.1 of the License, or (at your option) any later version.\n" +"\n" +"This program is distributed in the hope that it will be useful,\n" +"but WITHOUT ANY WARRANTY; without even the implied warranty of\n" +"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" +"Lesser General Public License for more details.\n" +"\n"); + exit(0); +} + + +/// Finds out the amount of physical memory in the system, and sets +/// a default memory usage limit. +static void +set_default_mem_limit(void) +{ + uint64_t mem = physmem(); + if (mem != 0) { + mem /= 3; + +#if UINT64_MAX > SIZE_MAX + if (mem > SIZE_MAX) + mem = SIZE_MAX; +#endif + + mem_limit = mem / 3; + } else { + // Cannot autodetect, use 10 MiB as the default limit. + mem_limit = (1U << 23) + (1U << 21); + } + + return; +} + + +/// \brief Converts a string to size_t +/// +/// This is rudely copied from src/lzma/util.c and modified a little. :-( +/// +static size_t +str_to_size(const char *value) +{ + size_t result = 0; + + if (*value < '0' || *value > '9') { + fprintf(stderr, "%s: %s: Not a number", argv0, value); + exit(ERROR); + } + + do { + // Don't overflow. + if (result > (SIZE_MAX - 9) / 10) + return SIZE_MAX; + + result *= 10; + result += *value - '0'; + ++value; + } while (*value >= '0' && *value <= '9'); + + if (*value != '\0') { + // Look for suffix. + static const struct { + const char *name; + size_t multiplier; + } suffixes[] = { + { "k", 1000 }, + { "M", 1000000 }, + { "G", 1000000000 }, + { "Ki", 1024 }, + { "Mi", 1048576 }, + { "Gi", 1073741824 }, + { NULL, 0 } + }; + + size_t multiplier = 0; + for (size_t i = 0; suffixes[i].name != NULL; ++i) { + if (strcmp(value, suffixes[i].name) == 0) { + multiplier = suffixes[i].multiplier; + break; + } + } + + if (multiplier == 0) { + fprintf(stderr, "%s: %s: Invalid suffix", + argv0, value); + exit(ERROR); + } + + // Don't overflow here either. + if (result > SIZE_MAX / multiplier) + return SIZE_MAX; + + result *= multiplier; + } + + return result; +} + + +/// Parses command line options. +static void +parse_options(int argc, char **argv) +{ + static const char short_opts[] = "cdkfM:hV"; + static const struct option long_opts[] = { + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "force", no_argument, NULL, 'f' }, + { "keep", no_argument, NULL, 'k' }, + { "memory", required_argument, NULL, 'M' }, + { "format", required_argument, NULL, OPTION_FORMAT }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + int c; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) + != -1) { + switch (c) { + case 'c': + case 'd': + case 'k': + break; + + case 'f': + force = true; + break; + + case 'M': + mem_limit = str_to_size(optarg); + break; + + case 'h': + help(); + + case 'V': + version(); + + case OPTION_FORMAT: { + if (strcmp("auto", optarg) == 0) { + format_type = FORMAT_AUTO; + } else if (strcmp("native", optarg) == 0 + || strcmp("single", optarg) == 0 + || strcmp("multi", optarg) == 0) { + format_type = FORMAT_NATIVE; + } else if (strcmp("alone", optarg) == 0) { + format_type = FORMAT_ALONE; + } else { + fprintf(stderr, "%s: %s: Unknown file format " + "name\n", argv0, optarg); + exit(ERROR); + } + break; + } + + default: + exit(ERROR); + } + } + + return; +} + + +/// Initializes lzma_stream structure for decoding of a new Stream. +static void +init(void) +{ + lzma_ret ret; + + switch (format_type) { + case FORMAT_AUTO: + ret = lzma_auto_decoder(&strm, NULL, NULL); + break; + + case FORMAT_NATIVE: + ret = lzma_stream_decoder(&strm, NULL, NULL); + break; + + case FORMAT_ALONE: + ret = lzma_alone_decoder(&strm); + break; + + default: + assert(0); + ret = LZMA_PROG_ERROR; + } + + if (ret != LZMA_OK) { + fprintf(stderr, "%s: ", argv0); + + if (ret == LZMA_MEM_ERROR) + fprintf(stderr, "%s\n", strerror(ENOMEM)); + else + fprintf(stderr, "Internal program error (bug)\n"); + + exit(ERROR); + } + + return; +} + + +static void +read_input(void) +{ + strm.next_in = in_buf; + strm.avail_in = fread(in_buf, 1, BUFSIZ, file); + + if (ferror(file)) { + // POSIX says that fread() sets errno if an error occurred. + // ferror() doesn't touch errno. + fprintf(stderr, "%s: %s: Error reading input file: %s\n", + argv0, filename, strerror(errno)); + exit(ERROR); + } + + return; +} + + +static bool +skip_padding(void) +{ + // Handle concatenated Streams. There can be arbitrary number of + // nul-byte padding between the Streams, which must be ignored. + // + // NOTE: Concatenating LZMA_Alone files works only if at least + // one of lc, lp, and pb is non-zero. Using the concatenation + // on LZMA_Alone files is strongly discouraged. + while (true) { + while (strm.avail_in > 0) { + if (*strm.next_in != '\0') + return true; + + ++strm.next_in; + --strm.avail_in; + } + + if (feof(file)) + return false; + + read_input(); + } +} + + +static void +uncompress(void) +{ + if (file == stdin && !force && isatty(STDIN_FILENO)) { + fprintf(stderr, "%s: Compressed data not read from " + "a terminal.\n%s: Use `-f' to force reading " + "from a terminal, or `-h' for help.\n", + argv0, argv0); + exit(ERROR); + } + + init(); + strm.avail_in = 0; + + while (true) { + if (strm.avail_in == 0) + read_input(); + + strm.next_out = out_buf; + strm.avail_out = BUFSIZ; + + const lzma_ret ret = lzma_code(&strm, LZMA_RUN); + + // Write and check write error before checking decoder error. + // This way as much data as possible gets written to output + // even if decoder detected an error. Checking write error + // needs to be done before checking decoder error due to + // how concatenated Streams are handled a few lines later. + const size_t write_size = BUFSIZ - strm.avail_out; + if (fwrite(out_buf, 1, write_size, stdout) != write_size) { + // Wouldn't be a surprise if writing to stderr would + // fail too but at least try to show an error message. + fprintf(stderr, "%s: Cannot write to " + "standard output: %s\n", argv0, + strerror(errno)); + exit(ERROR); + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) { + if (skip_padding()) { + init(); + continue; + } + + return; + } + + fprintf(stderr, "%s: %s: ", argv0, filename); + + switch (ret) { + case LZMA_DATA_ERROR: + fprintf(stderr, "File is corrupt\n"); + exit(ERROR); + + case LZMA_HEADER_ERROR: + fprintf(stderr, "Unsupported file " + "format or filters\n"); + exit(ERROR); + + case LZMA_MEM_ERROR: + fprintf(stderr, "%s\n", strerror(ENOMEM)); + exit(ERROR); + + case LZMA_BUF_ERROR: + fprintf(stderr, "Unexpected end of input\n"); + exit(ERROR); + + case LZMA_UNSUPPORTED_CHECK: + fprintf(stderr, "Unsupported type of " + "integrity check; not " + "verifying file integrity\n"); + exit_status = WARNING; + break; + + case LZMA_PROG_ERROR: + default: + fprintf(stderr, "Internal program " + "error (bug)\n"); + exit(ERROR); + } + } + } +} + + +int +main(int argc, char **argv) +{ + argv0 = argv[0]; + + set_default_mem_limit(); + + parse_options(argc, argv); + + lzma_init_decoder(); + + lzma_memlimit *mem_limitter = lzma_memlimit_create(mem_limit); + if (mem_limitter == NULL) { + fprintf(stderr, "%s: %s\n", argv0, strerror(ENOMEM)); + exit(ERROR); + } + + allocator.opaque = mem_limitter; + strm.allocator = &allocator; + + if (optind == argc) { + file = stdin; + filename = "(stdin)"; + uncompress(); + } else { + do { + if (strcmp(argv[optind], "-") == 0) { + file = stdin; + filename = "(stdin)"; + uncompress(); + } else { + filename = argv[optind]; + file = fopen(filename, "rb"); + if (file == NULL) { + fprintf(stderr, "%s: %s: %s\n", + argv0, filename, + strerror(errno)); + exit(ERROR); + } + + uncompress(); + fclose(file); + } + } while (++optind < argc); + } + + return exit_status; +} diff --git a/src/scripts/Makefile.am b/src/scripts/Makefile.am new file mode 100644 index 00000000..be696f3d --- /dev/null +++ b/src/scripts/Makefile.am @@ -0,0 +1,24 @@ +dist_bin_SCRIPTS = lzdiff lzgrep lzmore +dist_man_MANS = lzdiff.1 lzgrep.1 lzmore.1 + +install-exec-hook: + cd $(DESTDIR)$(bindir) && \ + rm -f lzcmp lzegrep lzfgrep lzless && \ + $(LN_S) lzdiff lzcmp && \ + $(LN_S) lzgrep lzegrep && \ + $(LN_S) lzgrep lzfgrep && \ + $(LN_S) lzmore lzless + +install-data-hook: + cd $(DESTDIR)$(mandir)/man1 && \ + rm -f lzcmp.1 lzegrep.1 lzfgrep.1 lzless.1 && \ + $(LN_S) lzdiff.1 lzcmp.1 && \ + $(LN_S) lzgrep.1 lzegrep.1 && \ + $(LN_S) lzgrep.1 lzfgrep.1 && \ + $(LN_S) lzmore.1 lzless.1 + +uninstall-hook: + cd $(DESTDIR)$(bindir) && \ + rm -f lzcmp lzegrep lzfgrep lzless + cd $(DESTDIR)$(mandir)/man1 && \ + rm -f lzcmp.1 lzegrep.1 lzfgrep.1 lzless.1 diff --git a/src/scripts/lzdiff b/src/scripts/lzdiff new file mode 100755 index 00000000..c867cbea --- /dev/null +++ b/src/scripts/lzdiff @@ -0,0 +1,67 @@ +#!/bin/sh +# sh is buggy on RS/6000 AIX 3.2. Replace above line with #!/bin/ksh + +# lzcmp and lzdiff are used to invoke the cmp or the diff pro- +# gram on compressed files. All options specified are passed +# directly to cmp or diff. If only 1 file is specified, then +# the files compared are file1 and an uncompressed file1.lzma. +# If two files are specified, then they are uncompressed and +# fed to cmp or diff. The exit status from cmp or diff is +# preserved. + +prog=`echo $0 | sed 's|.*/||'` +case "$prog" in + *cmp) comp=${CMP-cmp} ;; + *) comp=${DIFF-diff} ;; +esac + +OPTIONS= +FILES= +for ARG +do + case "$ARG" in + -*) OPTIONS="$OPTIONS $ARG";; + *) if test -f "$ARG"; then + FILES="$FILES $ARG" + else + echo "${prog}: $ARG not found or not a regular file" + exit 2 + fi ;; + esac +done +if test -z "$FILES"; then + echo "Usage: $prog [${comp}_options] file [file]" + exit 2 +fi +set $FILES +if test $# -eq 1; then + FILE=`echo "$1" | sed 's/[-.][tlaz]*$//'` + lzma -dc "$1" | $comp $OPTIONS - "$FILE" + +elif test $# -eq 2; then + case "$1" in + *[-.]lzma | *.t[la]z) + case "$2" in + *[-.]lzma | *.t[la]z) + F=`echo "$2" | sed 's|.*/||;s|[-.][tlaz]*||'` + TF=`/usr/bin/mktemp ${TMPDIR:-/tmp}/"$F".XXXXXXXXXX` || exit 1 + trap 'rm -f "$TF"; exit 2' EXIT HUP INT PIPE TERM + lzma -dc "$2" > "$TF" || exit + lzma -dc "$1" | $comp $OPTIONS - "$TF" + STAT="$?" + rm -f "$TF" || STAT=2 + trap EXIT HUP INT PIPE TERM + exit $STAT;; + + *) lzma -dc "$1" | $comp $OPTIONS - "$2";; + esac;; + *) case "$2" in + *[-.]lzma | *.t[la]z) + lzma -dc "$2" | $comp $OPTIONS "$1" -;; + *) $comp $OPTIONS "$1" "$2";; + esac;; + esac +else + echo "Usage: $prog [${comp}_options] file [file]" + exit 2 +fi diff --git a/src/scripts/lzdiff.1 b/src/scripts/lzdiff.1 new file mode 100644 index 00000000..7d15f8b8 --- /dev/null +++ b/src/scripts/lzdiff.1 @@ -0,0 +1,51 @@ +.TH LZDIFF 1 "24 Jun 2005" "LZMA utils" +.SH NAME +lzcmp, lzdiff \- compare LZMA compressed files +.SH SYNOPSIS +.B lzcmp +.RI [ cmp_options "] " file1 " [" file2 ] +.br +.B zdiff +.RI [ diff_options "] " file1 " [" file2 ] +.SH DESCRIPTION +.B lzcmp +and +.B zdiff +are used to invoke the +.BR cmp (1) +or the +.BR diff (1) +program on compressed files. All options specified are passed directly to +.B cmp +or +.BR diff "." +If only 1 file is specified, then the files compared are +.I file1 +and an uncompressed +.IB file1 ".gz\fR." +If two files are specified, then they are uncompressed if necessary and fed to +.B cmp +or +.BR diff "." +The exit status from +.B cmp +or +.B diff +is preserved. +.SH AUTHORS +This manual page was ripped from +.BR zdiff (1) +shipped in gzip package. +.SH "SEE ALSO" +.BR cmp (1), +.BR diff (1), +.BR lzmore(1), +.BR lzgrep(1), +.BR lzma (1), +.BR lzmash (1) +.SH BUGS +Messages from the +.B cmp +or +.B diff +programs refer to temporary filenames instead of those specified. diff --git a/src/scripts/lzgrep b/src/scripts/lzgrep new file mode 100755 index 00000000..c872df4e --- /dev/null +++ b/src/scripts/lzgrep @@ -0,0 +1,123 @@ +#!/bin/sh + +# lzgrep -- a wrapper around a grep program that decompresses files as needed +# Adapted to LZMA utils from gzip-1.3.3 + Red Hat's security patches +# Adapted from a version sent by Charles Levert <charles@comm.polymtl.ca> +# Copyright (C) 1998, 2001 Free Software Foundation +# Copyright (C) 1993 Jean-loup Gailly + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +# Improve error handling, this is supported by bash but not all the other +# shells so we hide the possible error: +set -o pipefail > /dev/null 2> /dev/null + +prog=`echo "$0" | sed 's|.*/||'` +case "$prog" in + *egrep) grep=${EGREP-egrep} ;; + *fgrep) grep=${FGREP-fgrep} ;; + *) grep=${GREP-grep} ;; +esac + +pat="" +after_dash_dash="" +files_with_matches=0 +files_without_matches=0 +no_filename=0 +with_filename=0 + +while test $# -ne 0; do + case "$after_dash_dash$1" in + --d* | --rec*) echo >&2 "$0: $1: option not supported"; exit 1;; + --files-with-*) files_with_matches=1;; + --files-witho*) files_without_matches=1;; + --no-f*) no_filename=1;; + --wi*) with_filename=1;; + --*) ;; + -*) + case "$1" in + -*[dr]*) echo >&2 "$0: $1: option not supported"; exit 1;; + esac + case "$1" in + -*H*) with_filename=1;; + esac + case "$1" in + -*h*) no_filename=1;; + esac + case "$1" in + -*L*) files_without_matches=1;; + esac + case "$1" in + -*l*) files_with_matches=1;; + esac;; + esac + case "$after_dash_dash$1" in + -[ef]) opt="$opt $1"; shift; pat="$1" + if test "$grep" = grep; then # grep is buggy with -e on SVR4 + grep=egrep + fi;; + -[ABCdm])opt="$opt $1 $2"; shift;; + --) opt="$opt $1"; after_dash_dash=1;; + -*) opt="$opt $1";; + *) if test -z "$pat"; then + pat="$1" + else + break; + fi;; + esac + shift +done + +if test -z "$pat"; then + echo "grep through lzma files" + echo "usage: $prog [grep_options] pattern [files]" + exit 1 +fi + +if test $# -eq 0; then + lzma -dc | $grep $opt "$pat" + exit $? +fi + +res=0 +trap break SIGPIPE +for i do + lzma -dc "$i" | + if test $files_with_matches -eq 1; then + $grep $opt "$pat" > /dev/null && printf "%s\n" "$i" + elif test $files_without_matches -eq 1; then + $grep $opt "$pat" > /dev/null || printf "%s\n" "$i" + elif test $with_filename -eq 0 && { test $# -eq 1 || test $no_filename -eq 1; }; then + $grep $opt "$pat" + else + i=${i//\\/\\\\} + i=${i//|/\\|} + i=${i//&/\\&} + i=`printf "%s" "$i" | tr '\n' ' '` + if test $with_filename -eq 1; then + sed_script="s|^[^:]*:|${i}:|" + else + sed_script="s|^|${i}:|" + fi + $grep $opt "$pat" | sed "$sed_script" + fi + r=$? + test $res -lt $r && res=$r + # SIGPIPE + 128 + test "$r" -eq 141 && exit $res +done +trap - SIGPIPE +exit $res diff --git a/src/scripts/lzgrep.1 b/src/scripts/lzgrep.1 new file mode 100644 index 00000000..65e344dd --- /dev/null +++ b/src/scripts/lzgrep.1 @@ -0,0 +1,61 @@ +.TH LZGREP 1 "24 Jun 2005" "LZMA utils" +.SH NAME +lzgrep \- search LZMA compressed files for a regular expression +.SH SYNOPSIS +.B lzgrep +.RI [ grep_options ] +.RB [ \-e ] +.I pattern +.IR filename ".\|.\|." +.br +.B lzegrep +.RB ... +.br +.B lzfgrep +.RB ... +.SH DESCRIPTION +.B lzgrep +is used to invoke the +.BR grep (1) +on LZMA compressed files. All options specified are passed directly to +.BR grep . +If no file is specified, then the standard input is decompressed +if necessary and fed to grep. +Otherwise the given files are uncompressed if necessary and fed to +.BR grep . +.PP +If +.B lzgrep +is invoked as +.B lzegrep +or +.B lzfgrep +then +.B egrep +or +.B fgrep +is used instead of +.B grep. +If the GREP environment variable is set, +.B lzgrep +uses it as the +.B grep +program to be invoked. For example: + + for sh: GREP=fgrep lzgrep string files + for csh: (setenv GREP fgrep; lzgrep string files) +.SH AUTHORS +Original +.BR zgrep (1) +manual page by Charles Levert <charles@comm.polymtl.ca>. +.PP +Minor modifications for LZMA utils by Lasse Collin +<lasse.collin@tukaani.org>. +.SH "SEE ALSO" +.BR grep (1), +.BR egrep (1), +.BR fgrep (1), +.BR lzdiff (1), +.BR lzmore (1), +.BR lzma (1), +.BR lzmash (1) diff --git a/src/scripts/lzmore b/src/scripts/lzmore new file mode 100755 index 00000000..d694774d --- /dev/null +++ b/src/scripts/lzmore @@ -0,0 +1,74 @@ +#!/bin/sh + +# Copyright (C) 2001 Free Software Foundation +# Copyright (C) 1992, 1993 Jean-loup Gailly +# Adapted to LZMA utils from gzip-1.3.3 + Red Hat's security patches + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +if test "`echo "$0" | sed 's|.*/||'`" = "lzless"; then + PAGER=less +fi + +if test "`echo -n a`" = "-n a"; then + # looks like a SysV system: + n1=''; n2='\c' +else + n1='-n'; n2='' +fi +oldtty=`stty -g 2>/dev/null` +if stty -cbreak 2>/dev/null; then + cb='cbreak'; ncb='-cbreak' +else + # 'stty min 1' resets eof to ^a on both SunOS and SysV! + cb='min 1 -icanon'; ncb='icanon eof ^d' +fi +if test $? -eq 0 -a -n "$oldtty"; then + trap 'stty $oldtty 2>/dev/null; exit' 0 2 3 5 10 13 15 +else + trap 'stty $ncb echo 2>/dev/null; exit' 0 2 3 5 10 13 15 +fi + +if test $# = 0; then + if test -t 0; then + echo usage: zmore files... + else + lzma -dc | eval ${PAGER-more} + fi +else + FIRST=1 + for FILE + do + < "$FILE" || continue + if test $FIRST -eq 0; then + echo $n1 "--More--(Next file: $FILE)$n2" + stty $cb -echo 2>/dev/null + ANS=`dd bs=1 count=1 2>/dev/null` + stty $ncb echo 2>/dev/null + echo " " + if test "$ANS" = 'e' -o "$ANS" = 'q'; then + exit + fi + fi + if test "$ANS" != 's'; then + echo "------> $FILE <------" + lzma -dc "$FILE" | eval ${PAGER-more} + fi + if test -t; then + FIRST=0 + fi + done +fi diff --git a/src/scripts/lzmore.1 b/src/scripts/lzmore.1 new file mode 100644 index 00000000..77207261 --- /dev/null +++ b/src/scripts/lzmore.1 @@ -0,0 +1,55 @@ +.TH LZMORE 1 "30 Jun 2005" "LZMA utils" +.SH NAME +lzmore, lzless \- view LZMA compressed (text) files +.SH SYNOPSIS +.B lzmore +.RI [ "filename ..." ] +.br +.B lzless +.RI [ "filename ..." ] +.SH DESCRIPTION +.B lzmore +is a filter which allows examination of LZMA compressed text files +one screenful at a time on a soft-copy terminal. +.PP +To use a pager other than the default +.B more, +set environment variable +.B PAGER +to the name of the desired program, such as +.BR less . +If +.B lzmore +is called as +.B lzless +then +.B less +is used as the +.BR PAGER . +.TP +.BR e " or " q +When the prompt --More--(Next file: +.IR file ) +is printed, this command causes lzmore to exit. +.TP +.B s +When the prompt --More--(Next file: +.IR file ) +is printed, this command causes zmore to skip the next file and continue. +.PP +For list of keyboard commands supported while actually viewing the +content of a file, refer to manual of the pager you use, usually +.BR more (1) +or +.BR less (1). +.SH AUTHORS +This manual page was ripped from +.BR zmore (1) +shipped in gzip package. +.SH "SEE ALSO" +.BR more (1), +.BR zmore (1), +.BR lzdiff (1), +.BR lzgrep (1), +.BR lzma (1), +.BR lzmash (1) diff --git a/tests/Makefile.am b/tests/Makefile.am new file mode 100644 index 00000000..30392390 --- /dev/null +++ b/tests/Makefile.am @@ -0,0 +1,43 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +EXTRA_DIST = files tests.h + +AM_CPPFLAGS = \ + -I@top_srcdir@/src/common \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_builddir@/lib + +LDADD = @top_builddir@/src/liblzma/liblzma.la + +if COND_GNULIB +LDADD += @top_builddir/lib/libgnu.a +endif + +check_PROGRAMS = \ + test_check \ + test_stream_flags \ + test_filter_flags \ + test_block_header \ + test_index \ + test_info + +test_check_SOURCES = test_check.c +test_stream_flags_SOURCES = test_stream_flags.c +test_filter_flags_SOURCES = test_filter_flags.c +test_block_header_SOURCES = test_block_header.c +test_index_SOURCES = test_index.c +test_info_SOURCES = test_info.c + +TESTS = $(check_PROGRAMS) diff --git a/tests/test_block.c b/tests/test_block.c new file mode 100644 index 00000000..89063b93 --- /dev/null +++ b/tests/test_block.c @@ -0,0 +1,59 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file test_block.c +/// \brief Tests Block coders +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tests.h" + + +static uint8_t text[] = "Hello world!"; +static uint8_t buffer[4096]; +static lzma_options_block block_options; +static lzma_stream strm = LZMA_STREAM_INIT; + + +static void +test1(void) +{ + +} + + +int +main() +{ + lzma_init(); + + block_options = (lzma_options_block){ + .check_type = LZMA_CHECK_NONE, + .has_eopm = true, + .has_uncompressed_size_in_footer = false, + .has_backward_size = false, + .handle_padding = false, + .total_size = LZMA_VLI_VALUE_UNKNOWN, + .compressed_size = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, + .header_size = 5, + }; + block_options.filters[0].id = LZMA_VLI_VALUE_UNKNOWN; + block_options.filters[0].options = NULL; + + + lzma_end(&strm); + + return 0; +} diff --git a/tests/test_block_header.c b/tests/test_block_header.c new file mode 100644 index 00000000..c6767e38 --- /dev/null +++ b/tests/test_block_header.c @@ -0,0 +1,352 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file test_block_header.c +/// \brief Tests Block Header coders +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tests.h" + + +static uint8_t buffer[4096]; +static lzma_stream strm = LZMA_STREAM_INIT; +static lzma_options_block known_options; +static lzma_options_block decoded_options; + +// We want to test zero, one, and two filters in the chain. + +static const lzma_options_filter filters_none[1] = { + { + .id = LZMA_VLI_VALUE_UNKNOWN, + .options = NULL, + }, +}; + +static const lzma_options_filter filters_powerpc[2] = { + { + .id = LZMA_FILTER_POWERPC, + .options = NULL, + }, { + .id = LZMA_VLI_VALUE_UNKNOWN, + .options = NULL, + }, +}; + +static const lzma_options_delta options_delta = { + .distance = 4, +}; + +static const lzma_options_filter filters_delta[3] = { + { + .id = LZMA_FILTER_DELTA, + .options = (void *)(&options_delta), + }, { + .id = LZMA_FILTER_COPY, + .options = NULL, + }, { + .id = LZMA_VLI_VALUE_UNKNOWN, + .options = NULL, + }, +}; + + +static bool +encode(uint32_t header_size) +{ + memcrap(buffer, sizeof(buffer)); + + if (lzma_block_header_size(&known_options) != LZMA_OK) + return true; + + if (known_options.header_size != header_size) + return true; + + if (lzma_block_header_encode(buffer, &known_options) != LZMA_OK) + return true; + + return false; +} + + +static bool +decode_ret(uint32_t header_size, lzma_ret ret_ok) +{ + memcrap(&decoded_options, sizeof(decoded_options)); + decoded_options.has_crc32 = known_options.has_crc32; + + expect(lzma_block_header_decoder(&strm, &decoded_options) == LZMA_OK); + + return decoder_loop_ret(&strm, buffer, header_size, ret_ok); +} + + +static bool +decode(uint32_t header_size) +{ + memcrap(&decoded_options, sizeof(decoded_options)); + decoded_options.has_crc32 = known_options.has_crc32; + + expect(lzma_block_header_decoder(&strm, &decoded_options) == LZMA_OK); + + if (decoder_loop(&strm, buffer, header_size)) + return true; + + if (known_options.has_eopm != decoded_options.has_eopm) + return true; + + if (known_options.is_metadata != decoded_options.is_metadata) + return true; + + if (known_options.compressed_size == LZMA_VLI_VALUE_UNKNOWN + && known_options.compressed_reserve != 0) { + if (decoded_options.compressed_size != 0) + return true; + } else if (known_options.compressed_size + != decoded_options.compressed_size) { + return true; + } + + if (known_options.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN + && known_options.uncompressed_reserve != 0) { + if (decoded_options.uncompressed_size != 0) + return true; + } else if (known_options.uncompressed_size + != decoded_options.uncompressed_size) { + return true; + } + + if (known_options.compressed_reserve != 0 + && known_options.compressed_reserve + != decoded_options.compressed_reserve) + return true; + + if (known_options.uncompressed_reserve != 0 + && known_options.uncompressed_reserve + != decoded_options.uncompressed_reserve) + return true; + + if (known_options.padding != decoded_options.padding) + return true; + + return false; +} + + +static bool +code(uint32_t header_size) +{ + return encode(header_size) || decode(header_size); +} + + +static bool +helper_loop(uint32_t unpadded_size, uint32_t multiple) +{ + for (int i = 0; i <= LZMA_BLOCK_HEADER_PADDING_MAX; ++i) { + known_options.padding = i; + if (code(unpadded_size + i)) + return true; + } + + for (int i = 0 - LZMA_BLOCK_HEADER_PADDING_MAX - 1; + i <= LZMA_BLOCK_HEADER_PADDING_MAX + 1; ++i) { + known_options.alignment = i; + + uint32_t size = unpadded_size; + while ((size + known_options.alignment) % multiple) + ++size; + + known_options.padding = LZMA_BLOCK_HEADER_PADDING_AUTO; + if (code(size)) + return true; + + } while (++known_options.alignment + <= LZMA_BLOCK_HEADER_PADDING_MAX + 1); + + return false; +} + + +static bool +helper(uint32_t unpadded_size, uint32_t multiple) +{ + known_options.has_crc32 = false; + known_options.is_metadata = false; + if (helper_loop(unpadded_size, multiple)) + return true; + + known_options.has_crc32 = false; + known_options.is_metadata = true; + if (helper_loop(unpadded_size, multiple)) + return true; + + known_options.has_crc32 = true; + known_options.is_metadata = false; + if (helper_loop(unpadded_size + 4, multiple)) + return true; + + known_options.has_crc32 = true; + known_options.is_metadata = true; + if (helper_loop(unpadded_size + 4, multiple)) + return true; + + return false; +} + + +static void +test1(void) +{ + known_options = (lzma_options_block){ + .has_eopm = true, + .compressed_size = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, + .compressed_reserve = 0, + .uncompressed_reserve = 0, + }; + memcpy(known_options.filters, filters_none, sizeof(filters_none)); + expect(!helper(2, 1)); + + memcpy(known_options.filters, filters_powerpc, + sizeof(filters_powerpc)); + expect(!helper(3, 4)); + + memcpy(known_options.filters, filters_delta, sizeof(filters_delta)); + expect(!helper(5, 1)); + + known_options.padding = LZMA_BLOCK_HEADER_PADDING_MAX + 1; + expect(lzma_block_header_size(&known_options) == LZMA_PROG_ERROR); +} + + +static void +test2_helper(uint32_t unpadded_size, uint32_t multiple) +{ + known_options.has_eopm = true; + known_options.compressed_size = LZMA_VLI_VALUE_UNKNOWN; + known_options.uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + known_options.compressed_reserve = 1; + known_options.uncompressed_reserve = 1; + expect(!helper(unpadded_size + 2, multiple)); + + known_options.compressed_reserve = LZMA_VLI_BYTES_MAX; + known_options.uncompressed_reserve = LZMA_VLI_BYTES_MAX; + expect(!helper(unpadded_size + 18, multiple)); + + known_options.compressed_size = 1234; + known_options.uncompressed_size = 2345; + expect(!helper(unpadded_size + 18, multiple)); + + known_options.compressed_reserve = 1; + known_options.uncompressed_reserve = 1; + expect(lzma_block_header_size(&known_options) == LZMA_PROG_ERROR); +} + + +static void +test2(void) +{ + memcpy(known_options.filters, filters_none, sizeof(filters_none)); + test2_helper(2, 1); + + memcpy(known_options.filters, filters_powerpc, + sizeof(filters_powerpc)); + test2_helper(3, 4); + + memcpy(known_options.filters, filters_delta, + sizeof(filters_delta)); + test2_helper(5, 1); +} + + +static void +test3(void) +{ + known_options = (lzma_options_block){ + .has_crc32 = false, + .has_eopm = true, + .is_metadata = false, + .is_metadata = false, + .compressed_size = LZMA_VLI_VALUE_UNKNOWN, + .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, + .compressed_reserve = 1, + .uncompressed_reserve = 1, + }; + memcpy(known_options.filters, filters_none, sizeof(filters_none)); + + known_options.header_size = 3; + expect(lzma_block_header_encode(buffer, &known_options) + == LZMA_PROG_ERROR); + + known_options.header_size = 4; + expect(lzma_block_header_encode(buffer, &known_options) == LZMA_OK); + + known_options.header_size = 5; + expect(lzma_block_header_encode(buffer, &known_options) + == LZMA_PROG_ERROR); + + // NOTE: This assumes that Filter ID 0x1F is not supported. Update + // this test to use some other ID if 0x1F becomes supported. + known_options.filters[0].id = 0x1F; + known_options.header_size = 5; + expect(lzma_block_header_encode(buffer, &known_options) + == LZMA_HEADER_ERROR); +} + + +static void +test4(void) +{ + known_options = (lzma_options_block){ + .has_crc32 = false, + .has_eopm = true, + .is_metadata = false, + .compressed_size = 0, + .uncompressed_size = 0, + .compressed_reserve = LZMA_VLI_BYTES_MAX, + .uncompressed_reserve = LZMA_VLI_BYTES_MAX, + .padding = 0, + }; + memcpy(known_options.filters, filters_powerpc, + sizeof(filters_powerpc)); + expect(!code(21)); + + // Reserved bits + buffer[0] ^= 0x40; + expect(!decode_ret(1, LZMA_HEADER_ERROR)); + buffer[0] ^= 0x40; + + buffer[1] ^= 0x40; + expect(decode_ret(21, LZMA_HEADER_ERROR)); + buffer[1] ^= 0x40; + + +} + + +int +main() +{ + lzma_init(); + + test1(); + test2(); + test3(); + test4(); + + lzma_end(&strm); + + return 0; +} diff --git a/tests/test_check.c b/tests/test_check.c new file mode 100644 index 00000000..14df375a --- /dev/null +++ b/tests/test_check.c @@ -0,0 +1,90 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file test_check.c +/// \brief Tests integrity checks +/// +/// \todo Add SHA256 +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tests.h" + + +static const uint8_t test_string[9] = "123456789"; +static const uint8_t test_unaligned[12] = "xxx123456789"; + + +static bool +test_crc32(void) +{ + static const uint32_t test_vector = 0xCBF43926; + + // Test 1 + uint32_t crc = lzma_crc32(test_string, sizeof(test_string), 0); + if (crc != test_vector) + return true; + + // Test 2 + crc = lzma_crc32(test_unaligned + 3, sizeof(test_string), 0); + if (crc != test_vector) + return true; + + // Test 3 + crc = 0; + for (size_t i = 0; i < sizeof(test_string); ++i) + crc = lzma_crc32(test_string + i, 1, crc); + if (crc != test_vector) + return true; + + return false; +} + + +static bool +test_crc64(void) +{ + static const uint64_t test_vector = 0x995DC9BBDF1939FA; + + // Test 1 + uint64_t crc = lzma_crc64(test_string, sizeof(test_string), 0); + if (crc != test_vector) + return true; + + // Test 2 + crc = lzma_crc64(test_unaligned + 3, sizeof(test_string), 0); + if (crc != test_vector) + return true; + + // Test 3 + crc = 0; + for (size_t i = 0; i < sizeof(test_string); ++i) + crc = lzma_crc64(test_string + i, 1, crc); + if (crc != test_vector) + return true; + + return false; +} + + +int +main() +{ + bool error = false; + + error |= test_crc32(); + error |= test_crc64(); + + return error ? 1 : 0; +} diff --git a/tests/test_filter_flags.c b/tests/test_filter_flags.c new file mode 100644 index 00000000..0a16f21a --- /dev/null +++ b/tests/test_filter_flags.c @@ -0,0 +1,326 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file test_filter_flags.c +/// \brief Tests Filter Flags coders +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tests.h" + + +static uint8_t buffer[4096]; +static lzma_options_filter known_flags; +static lzma_options_filter decoded_flags; +static lzma_stream strm = LZMA_STREAM_INIT; + + +static bool +encode(uint32_t known_size) +{ + memcrap(buffer, sizeof(buffer)); + + uint32_t tmp; + if (lzma_filter_flags_size(&tmp, &known_flags) != LZMA_OK) + return true; + + if (tmp != known_size) + return true; + + size_t out_pos = 0; + if (lzma_filter_flags_encode(buffer, &out_pos, known_size, + &known_flags) != LZMA_OK) + return true; + + if (out_pos != known_size) + return true; + + return false; +} + + +static bool +decode_ret(uint32_t known_size, lzma_ret ret_ok) +{ + memcrap(&decoded_flags, sizeof(decoded_flags)); + + if (lzma_filter_flags_decoder(&strm, &decoded_flags) != LZMA_OK) + return true; + + if (decoder_loop_ret(&strm, buffer, known_size, ret_ok)) + return true; + + return false; +} + + +static bool +decode(uint32_t known_size) +{ + if (decode_ret(known_size, LZMA_STREAM_END)) + return true; + + if (known_flags.id != decoded_flags.id) + return true; + + return false; +} + + +static void +test_copy(void) +{ + // Test 1 (good) + known_flags.id = LZMA_FILTER_COPY; + known_flags.options = NULL; + + expect(!encode(1)); + expect(!decode(1)); + expect(decoded_flags.options == NULL); + + // Test 2 (invalid encoder options) + known_flags.options = &known_flags; + expect(encode(99)); + + // Test 3 (good but unusual Filter Flags field) + buffer[0] = 0xE0; + buffer[1] = LZMA_FILTER_COPY; + expect(!decode(2)); + expect(decoded_flags.options == NULL); + + // Test 4 (invalid Filter Flags field) + buffer[0] = 0xE1; + buffer[1] = LZMA_FILTER_COPY; + buffer[2] = 0; + expect(!decode_ret(3, LZMA_HEADER_ERROR)); + + // Test 5 (good but weird Filter Flags field) + buffer[0] = 0xFF; + buffer[1] = LZMA_FILTER_COPY; + buffer[2] = 0; + expect(!decode(3)); + expect(decoded_flags.options == NULL); + + // Test 6 (invalid Filter Flags field) + buffer[0] = 0xFF; + buffer[1] = LZMA_FILTER_COPY; + buffer[2] = 1; + buffer[3] = 0; + expect(!decode_ret(4, LZMA_HEADER_ERROR)); +} + + +static void +test_subblock(void) +{ + // Test 1 + known_flags.id = LZMA_FILTER_SUBBLOCK; + known_flags.options = NULL; + + expect(!encode(1)); + expect(!decode(1)); + expect(decoded_flags.options != NULL); + expect(((lzma_options_subblock *)(decoded_flags.options)) + ->allow_subfilters); + + // Test 2 + known_flags.options = decoded_flags.options; + expect(!encode(1)); + expect(!decode(1)); + expect(decoded_flags.options != NULL); + expect(((lzma_options_subblock *)(decoded_flags.options)) + ->allow_subfilters); + + free(decoded_flags.options); + free(known_flags.options); + + // Test 3 + buffer[0] = 0xFF; + buffer[1] = LZMA_FILTER_SUBBLOCK; + buffer[2] = 1; + buffer[3] = 0; + expect(!decode_ret(4, LZMA_HEADER_ERROR)); +} + + +static void +test_simple(void) +{ + // Test 1 + known_flags.id = LZMA_FILTER_X86; + known_flags.options = NULL; + + expect(!encode(1)); + expect(!decode(1)); + expect(decoded_flags.options == NULL); + + // Test 2 + lzma_options_simple options; + options.start_offset = 0; + known_flags.options = &options; + expect(!encode(1)); + expect(!decode(1)); + expect(decoded_flags.options == NULL); + + // Test 3 + options.start_offset = 123456; + known_flags.options = &options; + expect(!encode(6)); + expect(!decode(6)); + expect(decoded_flags.options != NULL); + + lzma_options_simple *decoded = decoded_flags.options; + expect(decoded->start_offset == options.start_offset); + + free(decoded); +} + + +static void +test_delta(void) +{ + // Test 1 + known_flags.id = LZMA_FILTER_DELTA; + known_flags.options = NULL; + expect(encode(99)); + + // Test 2 + lzma_options_delta options = { 0 }; + known_flags.options = &options; + expect(encode(99)); + + // Test 3 + options.distance = LZMA_DELTA_DISTANCE_MIN; + expect(!encode(2)); + expect(!decode(2)); + expect(((lzma_options_delta *)(decoded_flags.options)) + ->distance == options.distance); + + free(decoded_flags.options); + + // Test 4 + options.distance = LZMA_DELTA_DISTANCE_MAX; + expect(!encode(2)); + expect(!decode(2)); + expect(((lzma_options_delta *)(decoded_flags.options)) + ->distance == options.distance); + + free(decoded_flags.options); + + // Test 5 + options.distance = LZMA_DELTA_DISTANCE_MAX + 1; + expect(encode(99)); +} + + +static void +validate_lzma(void) +{ + const lzma_options_lzma *known = known_flags.options; + const lzma_options_lzma *decoded = decoded_flags.options; + + expect(known->dictionary_size <= decoded->dictionary_size); + + if (known->dictionary_size == 1) + expect(decoded->dictionary_size == 1); + else + expect(known->dictionary_size + known->dictionary_size / 2 + > decoded->dictionary_size); + + expect(known->literal_context_bits == decoded->literal_context_bits); + expect(known->literal_pos_bits == decoded->literal_pos_bits); + expect(known->pos_bits == decoded->pos_bits); +} + + +static void +test_lzma(void) +{ + // Test 1 + known_flags.id = LZMA_FILTER_LZMA; + known_flags.options = NULL; + expect(encode(99)); + + // Test 2 + lzma_options_lzma options = { + .dictionary_size = 0, + .literal_context_bits = 0, + .literal_pos_bits = 0, + .pos_bits = 0, + .preset_dictionary = NULL, + .preset_dictionary_size = 0, + .mode = LZMA_MODE_INVALID, + .fast_bytes = 0, + .match_finder = LZMA_MF_INVALID, + .match_finder_cycles = 0, + }; + + // Test 3 (empty dictionary not allowed) + known_flags.options = &options; + expect(encode(99)); + + // Test 4 (brute-force test some valid dictionary sizes) + while (options.dictionary_size != LZMA_DICTIONARY_SIZE_MAX) { + if (++options.dictionary_size == 5000) + options.dictionary_size = LZMA_DICTIONARY_SIZE_MAX - 5; + + expect(!encode(3)); + expect(!decode(3)); + validate_lzma(); + + free(decoded_flags.options); + } + + // Test 5 (too big dictionary size) + options.dictionary_size = LZMA_DICTIONARY_SIZE_MAX + 1; + expect(encode(99)); + + // Test 6 (brute-force test lc/lp/pb) + options.dictionary_size = 1; + for (uint32_t lc = LZMA_LITERAL_CONTEXT_BITS_MIN; + lc <= LZMA_LITERAL_CONTEXT_BITS_MAX; ++lc) { + for (uint32_t lp = LZMA_LITERAL_POS_BITS_MIN; + lp <= LZMA_LITERAL_POS_BITS_MAX; ++lp) { + for (uint32_t pb = LZMA_POS_BITS_MIN; + pb <= LZMA_POS_BITS_MAX; ++pb) { + options.literal_context_bits = lc; + options.literal_pos_bits = lp; + options.pos_bits = pb; + + expect(!encode(3)); + expect(!decode(3)); + validate_lzma(); + + free(decoded_flags.options); + } + } + } +} + + +int +main() +{ + lzma_init(); + + test_copy(); + test_subblock(); + test_simple(); + test_delta(); + test_lzma(); + + lzma_end(&strm); + + return 0; +} diff --git a/tests/test_index.c b/tests/test_index.c new file mode 100644 index 00000000..399963d3 --- /dev/null +++ b/tests/test_index.c @@ -0,0 +1,43 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file test_inndex.c +/// \brief Tests functions handling the lzma_index structure +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tests.h" + + +int +main() +{ + lzma_index index[3] = { + { 22, 33, index + 1 }, + { 44, 55, index + 2 }, + { 66, 77, NULL }, + }; + + lzma_index *i = lzma_index_dup(index, NULL); + expect(i != NULL); + + expect(lzma_index_is_equal(index, i)); + + i->next->next->uncompressed_size = 99; + expect(!lzma_index_is_equal(index, i)); + + lzma_index_free(i, NULL); + + return 0; +} diff --git a/tests/test_info.c b/tests/test_info.c new file mode 100644 index 00000000..e7899ef3 --- /dev/null +++ b/tests/test_info.c @@ -0,0 +1,717 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file test_info.c +/// \brief Tests functions handling the lzma_info structure +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tests.h" + + +static lzma_info *info = NULL; +static lzma_info_iter iter; + +static lzma_vli stream_start = 0; +static lzma_vli header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; +static lzma_vli total_size = LZMA_VLI_VALUE_UNKNOWN; +static lzma_vli uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; +static lzma_vli footer_metadata_size = LZMA_VLI_VALUE_UNKNOWN; + +static lzma_index my_index[3] = { + { 22, 33, my_index + 1 }, + { 44, 55, my_index + 2 }, + { 66, 77, NULL }, +}; + +static lzma_metadata my_metadata = { + .header_metadata_size = 11, + .total_size = 22 + 44 + 66, + .uncompressed_size = 33 + 55 + 77, + .index = my_index, + .extra = NULL, +}; + + +static void +reset(void) +{ + expect(lzma_info_init(info, NULL) == info); + stream_start = 0; + header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; + total_size = LZMA_VLI_VALUE_UNKNOWN; + uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + footer_metadata_size = LZMA_VLI_VALUE_UNKNOWN; +} + + +static void +validate(void) +{ + expect(lzma_info_size_get(info, LZMA_INFO_STREAM_START) + == stream_start); + expect(lzma_info_size_get(info, LZMA_INFO_HEADER_METADATA) + == header_metadata_size); + expect(lzma_info_size_get(info, LZMA_INFO_TOTAL) == total_size); + expect(lzma_info_size_get(info, LZMA_INFO_UNCOMPRESSED) + == uncompressed_size); + expect(lzma_info_size_get(info, LZMA_INFO_FOOTER_METADATA) + == footer_metadata_size); +} + + +static void +test1(void) +{ + // Basics + expect(lzma_info_size_set(info, LZMA_INFO_STREAM_START, + stream_start = 1234) == LZMA_OK); + validate(); + expect(lzma_info_size_set(info, LZMA_INFO_HEADER_METADATA, + header_metadata_size = 2345) == LZMA_OK); + validate(); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, total_size = 3456) + == LZMA_OK); + validate(); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, + uncompressed_size = 4567) == LZMA_OK); + validate(); + expect(lzma_info_size_set(info, LZMA_INFO_FOOTER_METADATA, + footer_metadata_size = 5432) == LZMA_OK); + validate(); + + // Not everything allow zero size + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_STREAM_START, + stream_start = 0) == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_HEADER_METADATA, + header_metadata_size = 0) == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, + uncompressed_size = 0) == LZMA_OK); + validate(); + + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, 0) + == LZMA_PROG_ERROR); + + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_FOOTER_METADATA, 0) + == LZMA_PROG_ERROR); + + // Invalid sizes + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_STREAM_START, + LZMA_VLI_VALUE_MAX + 1) == LZMA_PROG_ERROR); + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_HEADER_METADATA, + LZMA_VLI_VALUE_MAX + 1) == LZMA_PROG_ERROR); + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, + LZMA_VLI_VALUE_MAX + 1) == LZMA_PROG_ERROR); + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, + LZMA_VLI_VALUE_MAX + 1) == LZMA_PROG_ERROR); + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_FOOTER_METADATA, + LZMA_VLI_VALUE_MAX + 1) == LZMA_PROG_ERROR); + + reset(); +} + + +static bool +test2_helper(lzma_vli *num, lzma_info_size type) +{ + expect(lzma_info_size_set(info, type, *num = 1234) == LZMA_OK); + validate(); + const bool ret = lzma_info_size_set(info, type, 4321) != LZMA_OK; + reset(); + return ret; +} + + +static void +test2(void) +{ + // Excluding start offset of Stream, once a size has been set, + // trying to set some other known value fails. + expect(!test2_helper(&stream_start, LZMA_INFO_STREAM_START)); + expect(test2_helper(&header_metadata_size, LZMA_INFO_HEADER_METADATA)); + expect(test2_helper(&total_size, LZMA_INFO_TOTAL)); + expect(test2_helper(&uncompressed_size, LZMA_INFO_UNCOMPRESSED)); + expect(test2_helper(&footer_metadata_size, LZMA_INFO_FOOTER_METADATA)); +} + + +static void +test3_init(void) +{ + reset(); + lzma_info_iter_begin(info, &iter); + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); +} + + +static void +test3(void) +{ + // Setting the same sizes multiple times for the same Index Record + // is OK, but the values must always be the same. + test3_init(); + expect(lzma_info_index_count_get(info) == 1); + expect(lzma_info_iter_set(&iter, 1234, 2345) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 1); + expect(lzma_info_iter_set(&iter, 1234, 2345) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 1); + expect(lzma_info_iter_set(&iter, 1111, 2345) == LZMA_DATA_ERROR); + + // Cannot finish an empty Index. + test3_init(); + expect(lzma_info_index_finish(info) == LZMA_DATA_ERROR); + + test3_init(); + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 2); + expect(lzma_info_iter_set(&iter, 1234, 2345) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 2); + expect(lzma_info_index_finish(info) == LZMA_DATA_ERROR); + + test3_init(); + expect(lzma_info_iter_set(&iter, 1234, 2345) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 1); + expect(lzma_info_index_finish(info) == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, 1234) == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, 2345) + == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, 1111) + == LZMA_DATA_ERROR); + + test3_init(); + expect(lzma_info_iter_set(&iter, 1234, 2345) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 1); + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 2); + expect(lzma_info_iter_set(&iter, 4321, 5432) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 2); + expect(lzma_info_index_finish(info) == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, 1234 + 4321) + == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, 2345 + 5432) + == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, 1111) + == LZMA_DATA_ERROR); + + test3_init(); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, 1234 + 4321) + == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, 2345 + 5432) + == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, 1111) + == LZMA_DATA_ERROR); + expect(lzma_info_iter_set(&iter, 1234, 2345) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 1); + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 2); + expect(lzma_info_iter_set(&iter, 4321, 5432) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 2); + expect(lzma_info_index_finish(info) == LZMA_OK); + + test3_init(); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, 1000) == LZMA_OK); + expect(lzma_info_iter_set(&iter, 1001, 2001) == LZMA_DATA_ERROR); + + test3_init(); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, 2000) + == LZMA_OK); + expect(lzma_info_iter_set(&iter, 1001, 2001) == LZMA_DATA_ERROR); + + reset(); +} + + +static void +test4(void) +{ + // 4a + lzma_info_iter_begin(info, &iter); + expect(lzma_info_index_count_get(info) == 0); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.stream_offset == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.uncompressed_offset == 0); + expect(lzma_info_index_count_get(info) == 1); + + expect(lzma_info_iter_set(&iter, 22, 33) == LZMA_OK); + expect(iter.total_size == 22); + expect(iter.uncompressed_size == 33); + expect(iter.stream_offset == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.uncompressed_offset == 0); + expect(lzma_info_index_count_get(info) == 1); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.stream_offset == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.uncompressed_offset == 33); + + // 4b + reset(); + lzma_info_iter_begin(info, &iter); + expect(lzma_info_index_count_get(info) == 0); + expect(lzma_info_size_set(info, LZMA_INFO_STREAM_START, 5) == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_HEADER_METADATA, 11) + == LZMA_OK); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + 11); + expect(iter.uncompressed_offset == 0); + expect(lzma_info_index_count_get(info) == 1); + + expect(lzma_info_iter_set(&iter, 22, 33) == LZMA_OK); + expect(iter.total_size == 22); + expect(iter.uncompressed_size == 33); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + 11); + expect(iter.uncompressed_offset == 0); + expect(lzma_info_index_count_get(info) == 1); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + 11 + 22); + expect(iter.uncompressed_offset == 33); + expect(lzma_info_index_count_get(info) == 2); + + expect(lzma_info_iter_set(&iter, 44, 55) == LZMA_OK); + expect(iter.total_size == 44); + expect(iter.uncompressed_size == 55); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + 11 + 22); + expect(iter.uncompressed_offset == 33); + expect(lzma_info_index_count_get(info) == 2); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44); + expect(iter.uncompressed_offset == 33 + 55); + expect(lzma_info_index_count_get(info) == 3); + + expect(lzma_info_iter_set(&iter, 66, 77) == LZMA_OK); + expect(iter.total_size == 66); + expect(iter.uncompressed_size == 77); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44); + expect(iter.uncompressed_offset == 33 + 55); + expect(lzma_info_index_count_get(info) == 3); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44 + 66); + expect(iter.uncompressed_offset == 33 + 55 + 77); + expect(lzma_info_index_count_get(info) == 4); + + expect(lzma_info_iter_set(&iter, 88, 99) == LZMA_OK); + expect(iter.total_size == 88); + expect(iter.uncompressed_size == 99); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44 + 66); + expect(iter.uncompressed_offset == 33 + 55 + 77); + expect(lzma_info_index_count_get(info) == 4); + + // 4c (continues from 4b) + lzma_info_iter_begin(info, &iter); + expect(lzma_info_index_count_get(info) == 4); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == 22); + expect(iter.uncompressed_size == 33); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + 11); + expect(iter.uncompressed_offset == 0); + expect(lzma_info_index_count_get(info) == 4); + + expect(lzma_info_iter_set(&iter, 22, LZMA_VLI_VALUE_UNKNOWN) + == LZMA_OK); + expect(iter.total_size == 22); + expect(iter.uncompressed_size == 33); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + 11); + expect(iter.uncompressed_offset == 0); + expect(lzma_info_index_count_get(info) == 4); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == 44); + expect(iter.uncompressed_size == 55); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + 11 + 22); + expect(iter.uncompressed_offset == 33); + expect(lzma_info_index_count_get(info) == 4); + + expect(lzma_info_iter_set(&iter, LZMA_VLI_VALUE_UNKNOWN, 55) + == LZMA_OK); + expect(iter.total_size == 44); + expect(iter.uncompressed_size == 55); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + 11 + 22); + expect(iter.uncompressed_offset == 33); + expect(lzma_info_index_count_get(info) == 4); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == 66); + expect(iter.uncompressed_size == 77); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44); + expect(iter.uncompressed_offset == 33 + 55); + expect(lzma_info_index_count_get(info) == 4); + + expect(lzma_info_iter_set(&iter, LZMA_VLI_VALUE_UNKNOWN, + LZMA_VLI_VALUE_UNKNOWN) == LZMA_OK); + expect(iter.total_size == 66); + expect(iter.uncompressed_size == 77); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44); + expect(iter.uncompressed_offset == 33 + 55); + expect(lzma_info_index_count_get(info) == 4); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == 88); + expect(iter.uncompressed_size == 99); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44 + 66); + expect(iter.uncompressed_offset == 33 + 55 + 77); + expect(lzma_info_index_count_get(info) == 4); + + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44 + 66 + 88); + expect(iter.uncompressed_offset == 33 + 55 + 77 + 99); + expect(lzma_info_index_count_get(info) == 5); + + expect(lzma_info_iter_set(&iter, 1234, LZMA_VLI_VALUE_UNKNOWN) + == LZMA_OK); + expect(iter.total_size == 1234); + expect(iter.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44 + 66 + 88); + expect(iter.uncompressed_offset == 33 + 55 + 77 + 99); + expect(lzma_info_index_count_get(info) == 5); + + // Test 4d (continues from 4c) + lzma_info_iter_begin(info, &iter); + for (size_t i = 0; i < 4; ++i) + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(lzma_info_iter_set(&iter, 88, 99) == LZMA_OK); + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.total_size == 1234); + expect(iter.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44 + 66 + 88); + expect(iter.uncompressed_offset == 33 + 55 + 77 + 99); + expect(lzma_info_index_count_get(info) == 5); + + expect(lzma_info_iter_set(&iter, LZMA_VLI_VALUE_UNKNOWN, 4321) + == LZMA_OK); + expect(iter.total_size == 1234); + expect(iter.uncompressed_size == 4321); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + + 11 + 22 + 44 + 66 + 88); + expect(iter.uncompressed_offset == 33 + 55 + 77 + 99); + expect(lzma_info_index_count_get(info) == 5); + + expect(lzma_info_index_finish(info) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 5); + + // Test 4e (continues from 4d) + lzma_info_iter_begin(info, &iter); + for (size_t i = 0; i < 5; ++i) + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(lzma_info_iter_set(&iter, 1234, 4321) == LZMA_OK); + expect(lzma_info_iter_next(&iter, NULL) == LZMA_DATA_ERROR); + + reset(); +} + + +static void +test5(void) +{ + lzma_index *i; + + expect(lzma_info_index_set(info, NULL, NULL, true) + == LZMA_PROG_ERROR); + + reset(); + expect(lzma_info_index_set(info, NULL, my_index, false) == LZMA_OK); + i = lzma_index_dup(my_index, NULL); + expect(i != NULL); + i->next->uncompressed_size = 99; + expect(lzma_info_index_set(info, NULL, i, true) == LZMA_DATA_ERROR); + + reset(); + expect(lzma_info_index_set(info, NULL, my_index, false) == LZMA_OK); + i = lzma_index_dup(my_index, NULL); + expect(i != NULL); + lzma_index_free(i->next->next, NULL); + i->next->next = NULL; + expect(lzma_info_index_set(info, NULL, i, true) == LZMA_DATA_ERROR); + + reset(); + expect(lzma_info_index_set(info, NULL, my_index, false) == LZMA_OK); + i = lzma_index_dup(my_index, NULL); + expect(i != NULL); + lzma_index_free(i->next->next, NULL); + i->next->next = lzma_index_dup(my_index, NULL); + expect(i->next->next != NULL); + expect(lzma_info_index_set(info, NULL, i, true) == LZMA_DATA_ERROR); + + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, + total_size = 22 + 44 + 66) == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, + uncompressed_size = 33 + 55 + 77) == LZMA_OK); + validate(); + expect(lzma_info_index_set(info, NULL, my_index, false) == LZMA_OK); + validate(); + + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, total_size = 77) + == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, + uncompressed_size = 33 + 55 + 77) == LZMA_OK); + validate(); + expect(lzma_info_index_set(info, NULL, my_index, false) + == LZMA_DATA_ERROR); + + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_TOTAL, + total_size = 22 + 44 + 66) == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, + uncompressed_size = 777777) == LZMA_OK); + validate(); + expect(lzma_info_index_set(info, NULL, my_index, false) + == LZMA_DATA_ERROR); + + reset(); +} + + +static void +test6(void) +{ + lzma_metadata metadata; + + // Same complete Metadata in both Header and Footer + expect(lzma_info_size_set(info, LZMA_INFO_HEADER_METADATA, + my_metadata.header_metadata_size) == LZMA_OK); + expect(lzma_info_metadata_set(info, NULL, &my_metadata, true, false) + == LZMA_OK); + expect(lzma_info_metadata_set(info, NULL, &my_metadata, false, false) + == LZMA_OK); + + // Header Metadata is not present but Size of Header Metadata is + // still present in Footer. + reset(); + metadata = my_metadata; + metadata.header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; + expect(lzma_info_size_set(info, LZMA_INFO_HEADER_METADATA, 0) + == LZMA_OK); + expect(lzma_info_metadata_set(info, NULL, &metadata, true, false) + == LZMA_OK); + expect(lzma_info_metadata_set(info, NULL, &my_metadata, false, false) + == LZMA_DATA_ERROR); + + // Header Metadata is present but Size of Header Metadata is missing + // from Footer. + reset(); + metadata = my_metadata; + metadata.header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; + expect(lzma_info_metadata_set(info, NULL, &my_metadata, true, false) + == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_HEADER_METADATA, + my_metadata.header_metadata_size) == LZMA_OK); + expect(lzma_info_metadata_set(info, NULL, &metadata, false, false) + == LZMA_DATA_ERROR); + + // Index missing + reset(); + metadata = my_metadata; + metadata.index = NULL; + expect(lzma_info_metadata_set(info, NULL, &metadata, true, false) + == LZMA_OK); + expect(lzma_info_metadata_set(info, NULL, &metadata, false, false) + == LZMA_DATA_ERROR); + + // Index in Header Metadata but not in Footer Metadata + reset(); + expect(lzma_info_metadata_set(info, NULL, &my_metadata, true, false) + == LZMA_OK); + expect(lzma_info_metadata_set(info, NULL, &metadata, false, false) + == LZMA_OK); + + // Index in Header Metadata but not in Footer Metadata but + // Total Size is missing from Footer. + reset(); + metadata.total_size = LZMA_VLI_VALUE_UNKNOWN; + expect(lzma_info_metadata_set(info, NULL, &my_metadata, true, false) + == LZMA_OK); + expect(lzma_info_metadata_set(info, NULL, &metadata, false, false) + == LZMA_DATA_ERROR); + + // Total Size doesn't match the Index + reset(); + metadata = my_metadata; + metadata.total_size = 9999; + expect(lzma_info_metadata_set(info, NULL, &metadata, true, false) + == LZMA_DATA_ERROR); + + // Uncompressed Size doesn't match the Index + reset(); + metadata = my_metadata; + metadata.uncompressed_size = 9999; + expect(lzma_info_metadata_set(info, NULL, &metadata, true, false) + == LZMA_DATA_ERROR); + + reset(); +} + + +static void +test7(void) +{ + // No info yet, so we cannot locate anything. + expect(lzma_info_metadata_locate(info, true) + == LZMA_VLI_VALUE_UNKNOWN); + expect(lzma_info_metadata_locate(info, false) + == LZMA_VLI_VALUE_UNKNOWN); + + // Setting the Stream start offset doesn't change this situation. + expect(lzma_info_size_set(info, LZMA_INFO_STREAM_START, 5) == LZMA_OK); + expect(lzma_info_metadata_locate(info, true) + == LZMA_VLI_VALUE_UNKNOWN); + expect(lzma_info_metadata_locate(info, false) + == LZMA_VLI_VALUE_UNKNOWN); + + // Setting the Size of Header Metadata known allows us to locate + // the Header Metadata Block. + expect(lzma_info_size_set(info, LZMA_INFO_HEADER_METADATA, 11) + == LZMA_OK); + expect(lzma_info_metadata_locate(info, true) + == 5 + LZMA_STREAM_HEADER_SIZE); + expect(lzma_info_metadata_locate(info, false) + == LZMA_VLI_VALUE_UNKNOWN); + + // Adding a Data Block. As long as Index is not Finished, we cannot + // locate Footer Metadata Block. + lzma_info_iter_begin(info, &iter); + expect(lzma_info_iter_next(&iter, NULL) == LZMA_OK); + expect(iter.stream_offset == 5 + LZMA_STREAM_HEADER_SIZE + 11); + expect(iter.uncompressed_offset == 0); + expect(lzma_info_iter_set(&iter, 22, 33) == LZMA_OK); + expect(lzma_info_metadata_locate(info, true) + == 5 + LZMA_STREAM_HEADER_SIZE); + expect(lzma_info_metadata_locate(info, false) + == LZMA_VLI_VALUE_UNKNOWN); + + // Once the Index is finished, we can locate Footer Metadata Block too. + expect(lzma_info_index_finish(info) == LZMA_OK); + expect(lzma_info_metadata_locate(info, true) + == 5 + LZMA_STREAM_HEADER_SIZE); + expect(lzma_info_metadata_locate(info, false) + == 5 + LZMA_STREAM_HEADER_SIZE + 11 + 22); + + // A retry of most of the above but now with unknown Size of Header + // Metadata Block, which makes locating Footer Metadata Block + // impossible. + reset(); + expect(lzma_info_size_set(info, LZMA_INFO_STREAM_START, 5) == LZMA_OK); + expect(lzma_info_metadata_locate(info, true) + == LZMA_VLI_VALUE_UNKNOWN); + expect(lzma_info_metadata_locate(info, false) + == LZMA_VLI_VALUE_UNKNOWN); + + expect(lzma_info_index_set(info, NULL, my_index, false) == LZMA_OK); + expect(lzma_info_metadata_locate(info, true) + == LZMA_VLI_VALUE_UNKNOWN); + expect(lzma_info_metadata_locate(info, false) + == LZMA_VLI_VALUE_UNKNOWN); + + expect(lzma_info_size_set(info, LZMA_INFO_HEADER_METADATA, 11) + == LZMA_OK); + expect(lzma_info_metadata_locate(info, true) + == 5 + LZMA_STREAM_HEADER_SIZE); + expect(lzma_info_metadata_locate(info, false) + == LZMA_STREAM_HEADER_SIZE + 5 + 11 + 22 + 44 + 66); + + reset(); +} + + +static void +test8(void) +{ + expect(lzma_info_size_set(info, LZMA_INFO_STREAM_START, 5) == LZMA_OK); + expect(lzma_info_size_set(info, LZMA_INFO_HEADER_METADATA, 11) + == LZMA_OK); + + lzma_info_iter_begin(info, &iter); + expect(lzma_info_iter_locate(&iter, NULL, 0, false) + == LZMA_DATA_ERROR); + expect(lzma_info_index_count_get(info) == 0); + + lzma_info_iter_begin(info, &iter); + expect(lzma_info_iter_locate(&iter, NULL, 0, true) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 1); + expect(lzma_info_iter_locate(&iter, NULL, 0, false) == LZMA_OK); + expect(lzma_info_index_count_get(info) == 1); + + // TODO +} + + +/* +static void +test9(void) +{ + // TODO Various integer overflow checks +} +*/ + + +int +main() +{ + lzma_init(); + + info = lzma_info_init(NULL, NULL); + if (info == NULL) + return 1; + + validate(); + + test1(); + test2(); + test3(); + test4(); + test5(); + test6(); + test7(); + test8(); + + lzma_info_free(info, NULL); + return 0; +} diff --git a/tests/test_stream_flags.c b/tests/test_stream_flags.c new file mode 100644 index 00000000..4cd22576 --- /dev/null +++ b/tests/test_stream_flags.c @@ -0,0 +1,191 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file test_stream_flags.c +/// \brief Tests Stream Header and Stream tail coders +// +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tests.h" + + +static lzma_stream_flags known_flags; +static lzma_stream_flags decoded_flags; +static uint8_t buffer[LZMA_STREAM_HEADER_SIZE + LZMA_STREAM_TAIL_SIZE]; +static lzma_stream strm = LZMA_STREAM_INIT; + + +static bool +validate(void) +{ + if (known_flags.check != decoded_flags.check + || known_flags.has_crc32 != decoded_flags.has_crc32 + || known_flags.is_multi != decoded_flags.is_multi) + return true; + + return false; +} + + +static bool +test_header_decoder(size_t expected_size, lzma_ret expected_ret) +{ + memcrap(&decoded_flags, sizeof(decoded_flags)); + + if (lzma_stream_header_decoder(&strm, &decoded_flags) != LZMA_OK) + return true; + + if (coder_loop(&strm, buffer, expected_size, NULL, 0, + expected_ret, LZMA_RUN)) + return true; + + if (expected_ret != LZMA_STREAM_END) + return false; + + return validate(); +} + + +static void +test_header(void) +{ + memcrap(buffer, sizeof(buffer)); + expect(lzma_stream_header_encode(buffer, &known_flags) == LZMA_OK); + succeed(test_header_decoder(LZMA_STREAM_HEADER_SIZE, LZMA_STREAM_END)); +} + + +static bool +test_tail_decoder(size_t expected_size, lzma_ret expected_ret) +{ + memcrap(&decoded_flags, sizeof(decoded_flags)); + + if (lzma_stream_tail_decoder(&strm, &decoded_flags) != LZMA_OK) + return true; + + if (coder_loop(&strm, buffer, expected_size, NULL, 0, + expected_ret, LZMA_RUN)) + return true; + + if (expected_ret == LZMA_STREAM_END && validate()) + return true; + + return false; +} + + +static void +test_tail(void) +{ + memcrap(buffer, sizeof(buffer)); + expect(lzma_stream_tail_encode(buffer, &known_flags) == LZMA_OK); + succeed(test_tail_decoder(LZMA_STREAM_TAIL_SIZE, LZMA_STREAM_END)); +} + + +static void +test_encode_invalid(void) +{ + known_flags.check = LZMA_CHECK_ID_MAX + 1; + known_flags.has_crc32 = false; + known_flags.is_multi = false; + + expect(lzma_stream_header_encode(buffer, &known_flags) + == LZMA_PROG_ERROR); + + expect(lzma_stream_tail_encode(buffer, &known_flags) + == LZMA_PROG_ERROR); + + known_flags.check = (lzma_check_type)(-1); + + expect(lzma_stream_header_encode(buffer, &known_flags) + == LZMA_PROG_ERROR); + + expect(lzma_stream_tail_encode(buffer, &known_flags) + == LZMA_PROG_ERROR); +} + + +static void +test_decode_invalid(void) +{ + known_flags.check = LZMA_CHECK_NONE; + known_flags.has_crc32 = false; + known_flags.is_multi = false; + + expect(lzma_stream_header_encode(buffer, &known_flags) == LZMA_OK); + + // Test 1 (invalid Magic Bytes) + buffer[5] ^= 1; + succeed(test_header_decoder(6, LZMA_DATA_ERROR)); + buffer[5] ^= 1; + + // Test 2a (valid CRC32) + uint32_t crc = lzma_crc32(buffer + 6, 1, 0); + for (size_t i = 0; i < 4; ++i) + buffer[7 + i] = crc >> (i * 8); + succeed(test_header_decoder(LZMA_STREAM_HEADER_SIZE, LZMA_STREAM_END)); + + // Test 2b (invalid Stream Flags with valid CRC32) + buffer[6] ^= 0x20; + crc = lzma_crc32(buffer + 6, 1, 0); + for (size_t i = 0; i < 4; ++i) + buffer[7 + i] = crc >> (i * 8); + succeed(test_header_decoder(7, LZMA_HEADER_ERROR)); + + // Test 3 (invalid CRC32) + expect(lzma_stream_header_encode(buffer, &known_flags) == LZMA_OK); + buffer[LZMA_STREAM_HEADER_SIZE - 1] ^= 1; + succeed(test_header_decoder(LZMA_STREAM_HEADER_SIZE, LZMA_DATA_ERROR)); + + // Test 4 (invalid Stream Flags) + expect(lzma_stream_tail_encode(buffer, &known_flags) == LZMA_OK); + buffer[0] ^= 0x40; + succeed(test_tail_decoder(1, LZMA_HEADER_ERROR)); + buffer[0] ^= 0x40; + + // Test 5 (invalid Magic Bytes) + buffer[2] ^= 1; + succeed(test_tail_decoder(3, LZMA_DATA_ERROR)); +} + + +int +main() +{ + lzma_init(); + + // Valid headers + for (lzma_check_type check = LZMA_CHECK_NONE; + check <= LZMA_CHECK_ID_MAX; ++check) { + for (int has_crc32 = 0; has_crc32 <= 1; ++has_crc32) { + for (int is_multi = 0; is_multi <= 1; ++is_multi) { + known_flags.check = check; + known_flags.has_crc32 = has_crc32; + known_flags.is_multi = is_multi; + + test_header(); + test_tail(); + } + } + } + + // Invalid headers + test_encode_invalid(); + test_decode_invalid(); + + lzma_end(&strm); + + return 0; +} diff --git a/tests/tests.h b/tests/tests.h new file mode 100644 index 00000000..08a3991c --- /dev/null +++ b/tests/tests.h @@ -0,0 +1,148 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tests.h +/// \brief Common definitions for test applications +// +// Copyright (C) 2006 Lasse Collin +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_TESTS_H +#define LZMA_TESTS_H + +#include "sysdefs.h" + +#include <stdio.h> + +#define memcrap(buf, size) memset(buf, 0xFD, size) + +#define expect(test) ((test) ? 0 : (fprintf(stderr, "%s:%u: %s\n", \ + __FILE__, __LINE__, #test), exit(1), 0)) + +#define succeed(test) expect(!test) + +#define fail(test) expect(test) + + +static inline const char * +lzma_ret_sym(lzma_ret ret) +{ + const char *str = ""; + + switch (ret) { + case LZMA_OK: + str = "LZMA_OK"; + break; + + case LZMA_STREAM_END: + str = "LZMA_STREAM_END"; + break; + + case LZMA_PROG_ERROR: + str = "LZMA_PROG_ERROR"; + break; + + case LZMA_DATA_ERROR: + str = "LZMA_DATA_ERROR"; + break; + + case LZMA_MEM_ERROR: + str = "LZMA_MEM_ERROR"; + break; + + case LZMA_BUF_ERROR: + str = "LZMA_BUF_ERROR"; + break; + + case LZMA_HEADER_ERROR: + str = "LZMA_HEADER_ERROR"; + break; + + case LZMA_UNSUPPORTED_CHECK: + str = "LZMA_UNSUPPORTED_CHECK"; + break; + } + + return str; +} + + +static inline bool +coder_loop(lzma_stream *strm, uint8_t *in, size_t in_size, + uint8_t *out, size_t out_size, + lzma_ret expected_ret, lzma_action finishing_action) +{ + size_t in_left = in_size; + size_t out_left = out_size > 0 ? out_size + 1 : 0; + lzma_action action = LZMA_RUN; + lzma_ret ret; + + strm->next_in = NULL; + strm->avail_in = 0; + strm->next_out = NULL; + strm->avail_out = 0; + + while (true) { + if (in_left > 0) { + if (--in_left == 0) + action = finishing_action; + + strm->next_in = in++; + strm->avail_in = 1; + } + + if (out_left > 0) { + --out_left; + strm->next_out = out++; + strm->avail_out = 1; + } + + ret = lzma_code(strm, action); + if (ret != LZMA_OK) + break; + } + + bool error = false; + + if (ret != expected_ret) + error = true; + + if (expected_ret == LZMA_STREAM_END) { + if (strm->total_in != in_size || strm->total_out != out_size) + error = true; + } else { + if (strm->total_in + 1 != in_size + || strm->total_out != out_size) + error = true; + } + + return error; +} + + +static inline bool +decoder_loop_ret(lzma_stream *strm, uint8_t *in, size_t in_size, + lzma_ret expected_ret) +{ + return coder_loop(strm, in, in_size, NULL, 0, expected_ret, LZMA_RUN); +} + + +static inline bool +decoder_loop(lzma_stream *strm, uint8_t *in, size_t in_size) +{ + return coder_loop(strm, in, in_size, NULL, 0, + LZMA_STREAM_END, LZMA_RUN); +} + +#endif |