commit 745ae2fff2a33db73fc1a5dc0de40b2891cd7a60 Author: yourfriendoss Date: Sun Nov 16 15:41:37 2025 +0200 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f14086e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +make +.clangd diff --git a/.zed/settings.json b/.zed/settings.json new file mode 100644 index 0000000..0e0dcd2 --- /dev/null +++ b/.zed/settings.json @@ -0,0 +1,3 @@ +{ + +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..abfaac1 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,69 @@ +cmake_minimum_required(VERSION 3.16) + +if(NOT DEFINED CMAKE_TOOLCHAIN_FILE) + if(DEFINED ENV{VITASDK}) + set(CMAKE_TOOLCHAIN_FILE "$ENV{VITASDK}/share/vita.toolchain.cmake" CACHE PATH "toolchain file") + else() + message(FATAL_ERROR "Please define VITASDK to point to your SDK path!") + endif() +endif() + +project(subsonic) + +include("${VITASDK}/share/vita.cmake" REQUIRED) + +set(VITA_APP_NAME "subsonic") +set(VITA_TITLEID "VSDK00023") +set(VITA_VERSION "01.00") +set(VITA_MKSFOEX_FLAGS "${VITA_MKSFOEX_FLAGS} -d PARENTAL_LEVEL=1") +set(VITA_MAKE_FSELF_FLAGS "${VITA_MAKE_FSELF_FLAGS} -a 0x2F00000000000001") + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,-q -Wall -O3 -fno-inline") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -fno-exceptions") + +find_package(CURL REQUIRED) +find_package(OpenSSL REQUIRED) +find_package(SDL2 REQUIRED) + +add_executable(${PROJECT_NAME} + src/main.cpp + src/lib/debug/debugScreen.c + src/lib/subsonic.hpp + src/lib/dr_mp3.h + src/lib/dr_flac.h + src/lib/audio.cpp + +) + +target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17) + +target_link_libraries(${PROJECT_NAME} + SceLibKernel_stub + SceDisplay_stub + SceAudio_stub + SceTouch_stub + ${CURL_LIBRARIES} + ${OPENSSL_LIBRARIES} + c + m + z + + jsoncpp + + SDL2::SDL2 SDL2_ttf SDL2_image webpdemux webp + freetype bz2 png pthread +) + +vita_create_self(subsonic.self ${PROJECT_NAME}) + +vita_create_vpk(${PROJECT_NAME}.vpk ${VITA_TITLEID} subsonic.self + VERSION ${VITA_VERSION} + NAME ${VITA_APP_NAME} + FILE vpk/Roboto-Regular.ttf Roboto-Regular.ttf + FILE vpk/album.jpg album.jpg + + FILE sce_sys/icon0.png sce_sys/icon0.png + FILE sce_sys/livearea/contents/bg.png sce_sys/livearea/contents/bg.png + FILE sce_sys/livearea/contents/startup.png sce_sys/livearea/contents/startup.png + FILE sce_sys/livearea/contents/template.xml sce_sys/livearea/contents/template.xml +) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..03d0b08 --- /dev/null +++ b/LICENSE @@ -0,0 +1,188 @@ + +GNU GENERAL PUBLIC LICENSE + +Version 3, 29 June 2007 + +Copyright © 2007 Free Software Foundation, Inc. + +Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. +Preamble + +The GNU General Public License is a free, copyleft license for software and other kinds of works. + +The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. + +To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. + +For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. + +Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. + +For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. + +Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. + +Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. + +The precise terms and conditions for copying, distribution and modification follow. +TERMS AND CONDITIONS +0. Definitions. + +“This License” refers to version 3 of the GNU General Public License. + +“Copyright” also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. + +“The Program” refers to any copyrightable work licensed under this License. Each licensee is addressed as “you”. “Licensees” and “recipients” may be individuals or organizations. + +To “modify” a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a “modified version” of the earlier work or a work “based on” the earlier work. + +A “covered work” means either the unmodified Program or a work based on the Program. + +To “propagate” a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. + +To “convey” a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. + +An interactive user interface displays “Appropriate Legal Notices” to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. +1. Source Code. + +The “source code” for a work means the preferred form of the work for making modifications to it. “Object code” means any non-source form of a work. + +A “Standard Interface” means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. + +The “System Libraries” of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A “Major Component”, in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. + +The “Corresponding Source” for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same work. +2. Basic Permissions. + +All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. + +Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. +3. Protecting Users' Legal Rights From Anti-Circumvention Law. + +No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. + +When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. +4. Conveying Verbatim Copies. + +You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. +5. Conveying Modified Source Versions. + +You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified it, and giving a relevant date. + b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to “keep intact all notices”. + c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. + d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. + +A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an “aggregate” if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. +6. Conveying Non-Source Forms. + +You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: + + a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. + b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. + c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. + d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. + e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. + +A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. + +A “User Product” is either (1) a “consumer product”, which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, “normally used” refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. + +“Installation Information” for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. + +If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). + +The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. + +Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. +7. Additional Terms. + +“Additional permissions” are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or + b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or + c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or + d) Limiting the use for publicity purposes of names of licensors or authors of the material; or + e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or + f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. + +All other non-permissive additional terms are considered “further restrictions” within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. +8. Termination. + +You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). + +However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. + +Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. +9. Acceptance Not Required for Having Copies. + +You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. +10. Automatic Licensing of Downstream Recipients. + +Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. + +An “entity transaction” is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. +11. Patents. + +A “contributor” is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's “contributor version”. + +A contributor's “essential patent claims” are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, “control” includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. + +In the following three paragraphs, a “patent license” is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To “grant” such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. + +If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. “Knowingly relying” means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. + +A patent license is “discriminatory” if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. +12. No Surrender of Others' Freedom. + +If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. +13. Use with the GNU Affero General Public License. + +Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. +14. Revised Versions of this License. + +The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License “or any later version” applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. + +If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. + +Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. +15. Disclaimer of Warranty. + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. +16. Limitation of Liability. + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +17. Interpretation of Sections 15 and 16. + +If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/sce_sys/icon0.png b/sce_sys/icon0.png new file mode 100644 index 0000000..cd5bd6a Binary files /dev/null and b/sce_sys/icon0.png differ diff --git a/sce_sys/livearea/contents/bg.png b/sce_sys/livearea/contents/bg.png new file mode 100644 index 0000000..0589e4a Binary files /dev/null and b/sce_sys/livearea/contents/bg.png differ diff --git a/sce_sys/livearea/contents/startup.png b/sce_sys/livearea/contents/startup.png new file mode 100644 index 0000000..7b427ad Binary files /dev/null and b/sce_sys/livearea/contents/startup.png differ diff --git a/sce_sys/livearea/contents/template.xml b/sce_sys/livearea/contents/template.xml new file mode 100644 index 0000000..fa61463 --- /dev/null +++ b/sce_sys/livearea/contents/template.xml @@ -0,0 +1,11 @@ + + + + + bg.png + + + + startup.png + + diff --git a/src/lib/audio.cpp b/src/lib/audio.cpp new file mode 100644 index 0000000..c02c17b --- /dev/null +++ b/src/lib/audio.cpp @@ -0,0 +1,526 @@ + +#include +#include +#include + +#include +#include + +#define DR_FLAC_IMPLEMENTATION +#include "dr_flac.h" + +#define DR_MP3_IMPLEMENTATION +#include "dr_mp3.h" + +#include "audio.hpp" + +// ============================== +// Internal State +// ============================== +static volatile bool gRunning = false; +static volatile bool gPaused = false; +static volatile bool gStopRequest = false; + +static volatile bool stream_eof = false; + +static SceUID gCurlThread = -1; +static SceUID gDecodeThread = -1; +static SceUID gAudioThread = -1; + +static int audio_port = -1; + +// ============================== +// Ringbuffer +// ============================== +#define STREAMBUF_SIZE (1024 * 1024 * 2) +#define FRAMES_PER_CHUNK 1024 + +static uint8_t streambuf[STREAMBUF_SIZE]; +static volatile size_t wb = 0, rb = 0; + +// ============================== +// Double-buffer PCM +// ============================== +static int16_t gAudioBuf[2][FRAMES_PER_CHUNK * 2]; +static volatile int gBufReady[2] = {0,0}; +static volatile int gDecodeIndex = 0; +static volatile int gPlayIndex = 0; +static volatile bool gDecodeDone = false; + +static volatile uint64_t gDecodedFrames = 0; +static volatile uint64_t gTotalFrames = 0; +static volatile uint32_t gSampleRate = 44100; // default +static volatile bool gHasLength = false; // FLAC streaming may not have this + +// ======================================================================= +// CURL callback +// ======================================================================= +static size_t curl_cb(void *ptr, size_t size, size_t nmemb, void *userdata) +{ + if (gStopRequest) return 0; + + size_t total = size * nmemb; + const uint8_t *in = (const uint8_t*)ptr; + size_t written = 0; + + while (written < total && !gStopRequest) + { + size_t used = (wb + STREAMBUF_SIZE - rb) % STREAMBUF_SIZE; + size_t free = STREAMBUF_SIZE - used - 1; + + if (free == 0) { + sceKernelDelayThread(1000); + continue; + } + + size_t chunk = total - written; + if (chunk > free) chunk = free; + + size_t tillEnd = STREAMBUF_SIZE - wb; + size_t first = (chunk < tillEnd) ? chunk : tillEnd; + size_t second = chunk - first; + + memcpy(&streambuf[wb], in + written, first); + wb = (wb + first) % STREAMBUF_SIZE; + written += first; + + if (second > 0) { + memcpy(&streambuf[wb], in + written, second); + wb = (wb + second) % STREAMBUF_SIZE; + written += second; + } + } + + return total; +} + + + +// ======================================================================= +// Streaming read callback for both FLAC and MP3 +// ======================================================================= +static unsigned int stream_read(void *user, void *out, unsigned int want) +{ + (void)user; + + if (gStopRequest) return 0; + + uint8_t *o = (uint8_t*)out; + size_t read = 0; + + while (read < want && !gStopRequest) + { + size_t available = (wb + STREAMBUF_SIZE - rb) % STREAMBUF_SIZE; + + if (available == 0) { + if (stream_eof) break; + sceKernelDelayThread(1000); + continue; + } + + size_t chunk = want - read; + if (chunk > available) chunk = available; + + size_t tillEnd = STREAMBUF_SIZE - rb; + size_t first = (chunk < tillEnd) ? chunk : tillEnd; + size_t second = chunk - first; + + memcpy(o + read, &streambuf[rb], first); + rb = (rb + first) % STREAMBUF_SIZE; + read += first; + + if (second > 0) { + memcpy(o + read, &streambuf[rb], second); + rb = (rb + second) % STREAMBUF_SIZE; + read += second; + } + } + + return read; +} +static drflac_bool32 flac_seek(void*, int, drflac_seek_origin) +{ + return DRFLAC_FALSE; +} +static drmp3_bool32 mp3_seek(void* user, int offset, drmp3_seek_origin origin) +{ + (void)user; + (void)offset; + (void)origin; + // Streaming over HTTP – no real seeking. + return DRMP3_FALSE; +} + +static drflac_bool32 flac_tell(void*, drflac_int64 *pos) +{ + *pos = 0; + return DRFLAC_TRUE; +} + + + +// ======================================================================= +// Threads +// ======================================================================= + +static int curl_thread(unsigned int, void *url_) +{ + CURL *c = curl_easy_init(); + curl_easy_setopt(c, CURLOPT_URL, (char*)url_); + curl_easy_setopt(c, CURLOPT_WRITEFUNCTION, curl_cb); + curl_easy_setopt(c, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(c, CURLOPT_BUFFERSIZE, 64 * 1024); + + curl_easy_perform(c); + + stream_eof = true; + curl_easy_cleanup(c); + return 0; +} + + +// ----------------------------------------------------------- +// AUDIO THREAD +// ----------------------------------------------------------- +static int audio_thread(unsigned int, void*) +{ + for (;;) + { + if (gStopRequest) break; + + if (gPaused) { + sceKernelDelayThread(10000); + continue; + } + + if (!gBufReady[gPlayIndex]) { + if (gDecodeDone && !gBufReady[gPlayIndex ^ 1]) + break; + + sceKernelDelayThread(1000); + continue; + } + + sceAudioOutOutput(audio_port, gAudioBuf[gPlayIndex]); + gBufReady[gPlayIndex] = 0; + gPlayIndex ^= 1; + } + + return 0; +} +static void mp3_meta(void* user, const drmp3_metadata* meta) +{ + // do nothing +} +// MP3 DECODE THREAD +// ----------------------------------------------------------- +static int decode_mp3_thread(unsigned int, void*) +{ + // Wait until some header/data is buffered + while (((wb + STREAMBUF_SIZE - rb) % STREAMBUF_SIZE) < 32 && !gStopRequest) + sceKernelDelayThread(10000); + + if (gStopRequest) return 0; + + drmp3 mp3; + if (!drmp3_init(&mp3, stream_read, mp3_seek, NULL, mp3_meta, NULL, NULL)) + return 0; + + gSampleRate = mp3.sampleRate; + gTotalFrames = 0; // unknown for streaming MP3 + gHasLength = false; + + audio_port = sceAudioOutOpenPort( + SCE_AUDIO_OUT_PORT_TYPE_BGM, + FRAMES_PER_CHUNK, + mp3.sampleRate, + SCE_AUDIO_OUT_MODE_STEREO + ); + + if (audio_port < 0) { + drmp3_uninit(&mp3); + return 0; + } + + int vol[2] = {0x8000, 0x8000}; + sceAudioOutSetVolume( + audio_port, + (SceAudioOutChannelFlag)(SCE_AUDIO_VOLUME_FLAG_L_CH | SCE_AUDIO_VOLUME_FLAG_R_CH), + vol + ); + + gDecodeIndex = 0; + gPlayIndex = 0; + gDecodeDone = false; + gBufReady[0] = 0; + gBufReady[1] = 0; + + // Start audio thread (same as FLAC) + gAudioThread = sceKernelCreateThread( + "audio_out_thread", + audio_thread, + 0x40, + 0x10000, + 0, 0, NULL + ); + sceKernelStartThread(gAudioThread, 0, NULL); + + int16_t inBuf[FRAMES_PER_CHUNK * 8]; // extra space, channels <= 2 anyway + + while (!gStopRequest) + { + int idx = gDecodeIndex; + + // Wait for playback to consume this buffer + while (gBufReady[idx] && !gStopRequest) + sceKernelDelayThread(1000); + + if (gStopRequest) break; + + drmp3_uint64 frames64 = drmp3_read_pcm_frames_s16(&mp3, FRAMES_PER_CHUNK, inBuf); + size_t frames = (size_t)frames64; + + gDecodedFrames += frames; + + if (frames == 0) { + gDecodeDone = true; + break; + } + + int16_t *outBuf = gAudioBuf[idx]; + + if (mp3.channels == 1) { + // Mono -> Stereo + for (size_t i = 0; i < frames; i++) { + outBuf[i*2 + 0] = inBuf[i]; + outBuf[i*2 + 1] = inBuf[i]; + } + } else { + // Interleaved stereo (or more, but we only keep first 2 channels) + for (size_t i = 0; i < frames; i++) { + outBuf[i*2 + 0] = inBuf[i*mp3.channels + 0]; + outBuf[i*2 + 1] = inBuf[i*mp3.channels + 1]; + } + } + + // Zero-pad last chunk if short + if (frames < FRAMES_PER_CHUNK) { + memset(&outBuf[frames * 2], 0, (FRAMES_PER_CHUNK - frames) * 4); + } + + gBufReady[idx] = 1; + gDecodeIndex ^= 1; + } + + drmp3_uninit(&mp3); + return 0; +} +// ----------------------------------------------------------- +// DECODE THREAD +// ----------------------------------------------------------- +static int decode_flac_thread(unsigned int, void*) +{ + // Wait FLAC header + while (((wb + STREAMBUF_SIZE - rb) % STREAMBUF_SIZE) < 32 && !gStopRequest) + sceKernelDelayThread(10000); + + if (gStopRequest) return 0; + + drflac *flac = drflac_open(stream_read, flac_seek, flac_tell, NULL, NULL); + if (!flac) return 0; + + gSampleRate = flac->sampleRate; + gTotalFrames = flac->totalPCMFrameCount; + gHasLength = (gTotalFrames > 0); + + + audio_port = sceAudioOutOpenPort( + SCE_AUDIO_OUT_PORT_TYPE_BGM, + FRAMES_PER_CHUNK, + flac->sampleRate, + SCE_AUDIO_OUT_MODE_STEREO + ); + + int vol[2] = {0x8000,0x8000}; + sceAudioOutSetVolume(audio_port, (SceAudioOutChannelFlag)(SCE_AUDIO_VOLUME_FLAG_L_CH | SCE_AUDIO_VOLUME_FLAG_R_CH), vol); + + gDecodeIndex = 0; + gPlayIndex = 0; + gDecodeDone = false; + gBufReady[0] = gBufReady[1] = 0; + + int16_t inBuf[FRAMES_PER_CHUNK * 8]; + + // Start audio thread + gAudioThread = sceKernelCreateThread("audio_out_thread", audio_thread, 0x40, 0x10000, 0, 0, NULL); + sceKernelStartThread(gAudioThread, 0, NULL); + + // Decode loop + while (!gStopRequest) + { + int idx = gDecodeIndex; + + while (gBufReady[idx] && !gStopRequest) + sceKernelDelayThread(1000); + + if (gStopRequest) break; + + size_t frames = drflac_read_pcm_frames_s16(flac, FRAMES_PER_CHUNK, inBuf); + + gDecodedFrames += frames; + + if (frames == 0) { + gDecodeDone = true; + break; + } + + int16_t *outBuf = gAudioBuf[idx]; + + if (flac->channels == 1) { + for (size_t i = 0; i < frames; i++) { + outBuf[i*2+0] = inBuf[i]; + outBuf[i*2+1] = inBuf[i]; + } + } else { + for (size_t i = 0; i < frames; i++) { + outBuf[i*2+0] = inBuf[i*flac->channels + 0]; + outBuf[i*2+1] = inBuf[i*flac->channels + 1]; + } + } + + if (frames < FRAMES_PER_CHUNK) { + memset(&outBuf[frames * 2], 0, (FRAMES_PER_CHUNK - frames) * 4); + } + + gBufReady[idx] = 1; + gDecodeIndex ^= 1; + } + + drflac_close(flac); + return 0; +} + + +// ======================================================================= +// Public API +// ======================================================================= + +float Audio_GetLengthSeconds() +{ + if (!gHasLength) return 0.0f; + return (float)gTotalFrames / (float)gSampleRate; +} + +float Audio_GetProgress() +{ + if (!gHasLength || gTotalFrames == 0) return 0.0f; + return (float)gDecodedFrames / (float)gTotalFrames; +} + +bool Audio_IsPlaying() +{ + if(gStopRequest) { + return false; + } else { + return !gDecodeDone && gRunning; + } +} +float Audio_SetProgress(float p) +{ + if (p < 0.0f) p = 0.0f; + if (p > 1.0f) p = 1.0f; + + if (!gHasLength || gTotalFrames == 0) + return 0.0f; + + return ((float)gTotalFrames / (float)gSampleRate) * p; +} + +float Audio_GetPositionSeconds() +{ + return (float)gDecodedFrames / (float)gSampleRate; +} + +void Audio_Init() +{ + curl_global_init(CURL_GLOBAL_DEFAULT); +} + +void Audio_Play(const char *url) +{ + if (gRunning) Audio_Stop(); + + gDecodedFrames = 0; + gTotalFrames = 0; + gHasLength = false; + gSampleRate = 44100; + + gStopRequest = false; + gRunning = true; + gPaused = false; + stream_eof = false; + wb = rb = 0; + + gCurlThread = sceKernelCreateThread("curl_stream", curl_thread, 0x40, 0x10000, 0, 0, NULL); + sceKernelStartThread(gCurlThread, strlen(url)+1, (void*)url); + + gDecodeThread = sceKernelCreateThread("decode_thread_flac", decode_flac_thread, 0x40, 0x20000, 0, 0, NULL); + sceKernelStartThread(gDecodeThread, 0, NULL); +} +void Audio_PlayMP3(const char *url) +{ + if (gRunning) Audio_Stop(); + + gDecodedFrames = 0; + gTotalFrames = 0; + gHasLength = false; + gSampleRate = 44100; + + gStopRequest = false; + gRunning = true; + gPaused = false; + stream_eof = false; + wb = rb = 0; + + gCurlThread = sceKernelCreateThread("curl_stream_mp3", curl_thread, 0x40, 0x10000, 0, 0, NULL); + sceKernelStartThread(gCurlThread, strlen(url)+1, (void*)url); + + gDecodeThread = sceKernelCreateThread("decode_thread_mp3", decode_mp3_thread, 0x40, 0x20000, 0, 0, NULL); + sceKernelStartThread(gDecodeThread, 0, NULL); +} + +bool Audio_IsPaused() { + return gPaused; +} + +void Audio_Pause() +{ + gPaused = true; +} + +void Audio_Resume() +{ + gPaused = false; +} + +void Audio_Stop() +{ + if (!gRunning) return; + + gStopRequest = true; + + sceKernelDelayThread(50000); + + if (audio_port >= 0) + sceAudioOutReleasePort(audio_port); + + audio_port = -1; + + gRunning = false; +} + +void Audio_Shutdown() +{ + Audio_Stop(); + curl_global_cleanup(); +} diff --git a/src/lib/audio.hpp b/src/lib/audio.hpp new file mode 100644 index 0000000..17619af --- /dev/null +++ b/src/lib/audio.hpp @@ -0,0 +1,35 @@ +#pragma once +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Call once at program start +void Audio_Init(); + +// Start playing a URL (starts CURL + decode threads) +void Audio_Play(const char *url); +void Audio_PlayMP3(const char *url); // MP3 +// Pause audio output +void Audio_Pause(); + +// Resume audio output +void Audio_Resume(); + +// Stop playback (kills threads + closes audio) +void Audio_Stop(); + +// Cleanup on exit +void Audio_Shutdown(); +bool Audio_IsPlaying(); +bool Audio_IsPaused(); +float Audio_SetProgress(float); + +float Audio_GetPositionSeconds(); +float Audio_GetLengthSeconds(); +float Audio_GetProgress(); // 0.0 – 1.0 + +#ifdef __cplusplus +} +#endif diff --git a/src/lib/debug/debugScreen.c b/src/lib/debug/debugScreen.c new file mode 100644 index 0000000..9637542 --- /dev/null +++ b/src/lib/debug/debugScreen.c @@ -0,0 +1,732 @@ +#ifndef DEBUG_SCREEN_C +#define DEBUG_SCREEN_C + +/* +* debugScreen.c of Vita SDK +* +* - psvDebugScreenInit() +* Initializes debug screen for output. +* +* - psvDebugScreenPuts() +* Similar to the C library function puts() writes a string to the debug +* screen up to but not including the NUL character. +* Supports the most important CSI sequences of ECMA-48 / ISO/IEC 6429:1992. +* Graphic Rendition Combination Mode (GRCM) supported is Cumulative. +* Modifications: +* - CSI SGR codes 30-37/38/39 & 40-47/48/49 set standard/fitting/default intensity, so instead of "\e[1;31m" use "\e31;1m" +* - ANSI color #8 is made darker (40<>80), so that "dark" white is still lighter than "bright" dark +* - support 16 save storages for CSI s and CSI u, e.g "\e[8s" and "\e[8u" +* [1] https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_sequences +* [2] https://jonasjacek.github.io/colors/ +* [3] https://www.ecma-international.org/publications/standards/Ecma-048.htm +* [4] https://invisible-island.net/xterm/ctlseqs/ctlseqs.html +* [5] http://man7.org/linux/man-pages/man4/console_codes.4.html +* +* (CSI = "\e[") +* CSI [n] s = Save Cursor Position to slot #n (0-15). Default 0. +* CSI [n] u = Restore Cursor Position from slot #n (0-15). Default 0. +* CSI n A = Cursor Up times. +* CSI n B = Cursor Down times. +* CSI n C = Cursor Forward times. +* CSI n D = Cursor Back times. +* CSI n E = Cursor Next Line times and to Beginning of that Line. +* CSI n F = Cursor Previous Line times and to Beginning of that Line. +* CSI n G = Cursor to Column . The value is 1-based and defaults to 1 (first column) if omitted. +* CSI n ; m H = Cursor to Row and Column . The values are 1-based and default to 1 (top left corner) if omitted. +* CSI n ; m f = Cursor to Row and Column . The values are 1-based and default to 1 (top left corner) if omitted. +* CSI [n] J = Clears part of the screen. Cursor position does not change. +* 0 (default) from cursor to end of screen. +* 1 from cursor to beginning of the screen. +* 2 entire screen +* CSI [n] K = Clears part of the line. Cursor position does not change. +* 0 (default) from cursor to end of line. +* 1 from cursor to beginning of line. +* 2 clear entire line. +* CSI [n] m = Sets the appearance of the following characters. +* 0 Reset all (colors and inversion) (default) +* 1 Increased intensity ("bright" color) +* 2 Decreased intensity ("faint"/"dark" color) +* 7 Enable inversion +* 22 Standard intensity ("normal" color) +* 27 Disable inversion +* 30–37 Set ANSI foreground color with standard intensity +* 38 Set foreground color. Arguments are 5; or 2;;; +* 39 Default foreground color +* 40–47 Set standard ANSI background color with standard intensity +* 48 Set background color. Arguments are 5; or 2;;; +* 49 Default background color +* 90–97 Set ANSI foreground color with increased intensity +* 100–107 Set ANSI background color with increased intensity +* +* - psvDebugScreenPrintf() +* Similar to the C library function printf() formats a string and ouputs +* it via psvDebugScreenPuts() to the debug screen. +* +* - psvDebugScreenGetColorStateCopy(ColorState *copy) +* Get copy of current color state. +* +* - psvDebugScreenGetCoordsXY(int *x, int *y) +* Get copy of current pixel coordinates. +* Allows for multiple and custom position stores. +* Allows correct positioning when using different font sizes. +* +* - psvDebugScreenSetCoordsXY(int *x, int *y) +* Set pixel coordinates. +* Allows for multiple and custom position stores. +* Allows correct positioning when using different font sizes. +* +* - PsvDebugScreenFont *psvDebugScreenGetFont() +* Get current font. +* +* - PsvDebugScreenFont *psvDebugScreenSetFont(PsvDebugScreenFont *font) { +* Set font. Returns current font. +* +* - PsvDebugScreenFont *psvDebugScreenScaleFont2x(PsvDebugScreenFont *source_font) { +* Scales a font by 2 (e.g. 8x8 to 16x16) and returns new scaled font. +* +* Also see the following samples: +* - debugscreen +* - debug_print +* +*/ + +#include // for malloc(), free() +#include // for vsnprintf() +#include // for memset(), memcpy() +#include // for va_list, va_start(), va_end() +#include + +#include "debugScreen.h" + +#include "debugScreenFont.c" + +#define SCREEN_FB_WIDTH (960) // frame buffer aligned width for accessing vram +#define SCREEN_FB_SIZE (2 * 1024 * 1024) // Must be 256KB aligned +#ifndef SCREEN_TAB_SIZE // this allows easy overriding +#define SCREEN_TAB_SIZE (8) +#endif +#define SCREEN_TAB_W ((F)->size_w * (SCREEN_TAB_SIZE)) +#define F psvDebugScreenFontCurrent + +#define FROM_FULL_RGB(r,g,b ) ( ((b)<<16) | ((g)<<8) | (r) ) +#define CONVERT_RGB_BGR(rgb) rgb = ( (((rgb)&0x0000FF)<<16) | ((rgb)&0x00FF00) | (((rgb)&0xFF0000)>>16) ) + +#define CLEARSCRNBLOCK(H,toH,W,toW,color) for (int h = (H); h < (toH); h++) for (int w = (W); w < (toW); w++) ((uint32_t*)base)[h*(SCREEN_FB_WIDTH) + w] = (color); +#define CLEARSCRNLINES(H,toH,color) { uint32_t *pixel = (uint32_t *)base + ((H) * (SCREEN_FB_WIDTH)); int i = (((toH) - (H)) * (SCREEN_FB_WIDTH)); for (; i > 0; i--) *pixel++ = (color); } + +#define SAVE_STORAGES 16 + +static int initialized = 0; +static int mutex, coordX, coordY; +static int savedX[SAVE_STORAGES] = { 0 }, savedY[SAVE_STORAGES] = { 0 }; +static ColorState colors = { + 0, 0, // truecolor flags + 0, 0, // truecolors + 0, 0, 0, 0, 0, // ANSI/VTERM/GREYSCALE colors + 7, 22, 0, 22, 0, // default colors (ANSI/VTERM/GREYSCALE) + 0, 0 // current colors +}; + +static PsvDebugScreenFont *psvDebugScreenFontCurrent = &psvDebugScreenFont; + +#ifdef __vita__ +#include +#include +#include +static SceUID displayblock; +static void* base; // pointer to frame buffer +#else +#define NO_psvDebugScreenInit +#ifndef psvDebugScreenInitReplacement +#define psvDebugScreenInitReplacement(...) +#endif +#define sceKernelLockMutex(m,v,x) m=v +#define sceKernelUnlockMutex(m,v) m=v +static char base[(SCREEN_FB_WIDTH) * (SCREEN_HEIGHT) * 4]; +#endif + +static uint32_t DARK_COLORS_BGR[8] = { + 0x000000, 0x000040, 0x004000, 0x004040, 0x400000, 0x400040, 0x404000, 0x808080, // 0-7 +}; + +// ANSI/VTERM/GREYSCALE palette: https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit +// modifications: +// - #8 is made darker (40<>80), so that "dark" white is still lighter than "bright" dark +static uint32_t ANSI_COLORS_BGR[256] = { + 0x000000, 0x000080, 0x008000, 0x008080, 0x800000, 0x800080, 0x808000, 0xc0c0c0, // 0-7 + 0x404040, 0x0000ff, 0x00ff00, 0x00ffff, 0xff0000, 0xff00ff, 0xffff00, 0xffffff, // 8-15 + 0x000000, 0x5f0000, 0x870000, 0xaf0000, 0xd70000, 0xff0000, 0x005f00, 0x5f5f00, // 16-23 + 0x875f00, 0xaf5f00, 0xd75f00, 0xff5f00, 0x008700, 0x5f8700, 0x878700, 0xaf8700, // 24-31 + 0xd78700, 0xff8700, 0x00af00, 0x5faf00, 0x87af00, 0xafaf00, 0xd7af00, 0xffaf00, // 32-39 + 0x00d700, 0x5fd700, 0x87d700, 0xafd700, 0xd7d700, 0xffd700, 0x00ff00, 0x5fff00, // 40-47 + 0x87ff00, 0xafff00, 0xd7ff00, 0xffff00, 0x00005f, 0x5f005f, 0x87005f, 0xaf005f, // 48-55 + 0xd7005f, 0xff005f, 0x005f5f, 0x5f5f5f, 0x875f5f, 0xaf5f5f, 0xd75f5f, 0xff5f5f, // 56-63 + 0x00875f, 0x5f875f, 0x87875f, 0xaf875f, 0xd7875f, 0xff875f, 0x00af5f, 0x5faf5f, // 64-71 + 0x87af5f, 0xafaf5f, 0xd7af5f, 0xffaf5f, 0x00d75f, 0x5fd75f, 0x87d75f, 0xafd75f, // 72-79 + 0xd7d75f, 0xffd75f, 0x00ff5f, 0x5fff5f, 0x87ff5f, 0xafff5f, 0xd7ff5f, 0xffff5f, // 80-87 + 0x000087, 0x5f0087, 0x870087, 0xaf0087, 0xd70087, 0xff0087, 0x005f87, 0x5f5f87, // 88-95 + 0x875f87, 0xaf5f87, 0xd75f87, 0xff5f87, 0x008787, 0x5f8787, 0x878787, 0xaf8787, // 96-103 + 0xd78787, 0xff8787, 0x00af87, 0x5faf87, 0x87af87, 0xafaf87, 0xd7af87, 0xffaf87, // 104-111 + 0x00d787, 0x5fd787, 0x87d787, 0xafd787, 0xd7d787, 0xffd787, 0x00ff87, 0x5fff87, // 112-119 + 0x87ff87, 0xafff87, 0xd7ff87, 0xffff87, 0x0000af, 0x5f00af, 0x8700af, 0xaf00af, // 120-127 + 0xd700af, 0xff00af, 0x005faf, 0x5f5faf, 0x875faf, 0xaf5faf, 0xd75faf, 0xff5faf, // 128-135 + 0x0087af, 0x5f87af, 0x8787af, 0xaf87af, 0xd787af, 0xff87af, 0x00afaf, 0x5fafaf, // 136-143 + 0x87afaf, 0xafafaf, 0xd7afaf, 0xffafaf, 0x00d7af, 0x5fd7af, 0x87d7af, 0xafd7af, // 144-151 + 0xd7d7af, 0xffd7af, 0x00ffaf, 0x5fffaf, 0x87ffaf, 0xafffaf, 0xd7ffaf, 0xffffaf, // 152-159 + 0x0000d7, 0x5f00d7, 0x8700d7, 0xaf00d7, 0xd700d7, 0xff00d7, 0x005fd7, 0x5f5fd7, // 160-167 + 0x875fd7, 0xaf5fd7, 0xd75fd7, 0xff5fd7, 0x0087d7, 0x5f87d7, 0x8787d7, 0xaf87d7, // 168-175 + 0xd787d7, 0xff87d7, 0x00afd7, 0x5fafd7, 0x87afd7, 0xafafd7, 0xd7afd7, 0xffafd7, // 176-183 + 0x00d7d7, 0x5fd7d7, 0x87d7d7, 0xafd7d7, 0xd7d7d7, 0xffd7d7, 0x00ffd7, 0x5fffd7, // 184-191 + 0x87ffd7, 0xafffd7, 0xd7ffd7, 0xffffd7, 0x0000ff, 0x5f00ff, 0x8700ff, 0xaf00ff, // 192-199 + 0xd700ff, 0xff00ff, 0x005fff, 0x5f5fff, 0x875fff, 0xaf5fff, 0xd75fff, 0xff5fff, // 200-207 + 0x0087ff, 0x5f87ff, 0x8787ff, 0xaf87ff, 0xd787ff, 0xff87ff, 0x00afff, 0x5fafff, // 208-215 + 0x87afff, 0xafafff, 0xd7afff, 0xffafff, 0x00d7ff, 0x5fd7ff, 0x87d7ff, 0xafd7ff, // 216-223 + 0xd7d7ff, 0xffd7ff, 0x00ffff, 0x5fffff, 0x87ffff, 0xafffff, 0xd7ffff, 0xffffff, // 224-231 + 0x080808, 0x121212, 0x1c1c1c, 0x262626, 0x303030, 0x3a3a3a, 0x444444, 0x4e4e4e, // 232-239 + 0x585858, 0x626262, 0x6c6c6c, 0x767676, 0x808080, 0x8a8a8a, 0x949494, 0x9e9e9e, // 240-247 + 0xa8a8a8, 0xb2b2b2, 0xbcbcbc, 0xc6c6c6, 0xd0d0d0, 0xdadada, 0xe4e4e4, 0xeeeeee, // 248-255 +}; + +/* +* Reset foreground color to default +*/ +static void psvDebugScreenResetFgColor(void) { + colors.fgTrueColorFlag = 0; + colors.fgTrueColor = 0; + colors.fgIndex = colors.fgIndexDefault; + colors.fgIntensity = colors.fgIntensityDefault; +} + +/* +* Reset background color to default +*/ +static void psvDebugScreenResetBgColor(void) { + colors.bgTrueColorFlag = 0; + colors.bgTrueColor = 0; + colors.bgIndex = colors.bgIndexDefault; + colors.bgIntensity = colors.bgIntensityDefault; +} + +/* +* Reset inversion state to default +*/ +static void psvDebugScreenResetInversion(void) { + colors.inversion = colors.inversionDefault; +} + +/* +* Determine colors according to current color state +*/ +static void psvDebugScreenSetColors(void) { + uint32_t *color_fg, *color_bg; + + // special case: inversion + if (!colors.inversion) { + color_fg = &colors.color_fg; + color_bg = &colors.color_bg; + } else { + color_fg = &colors.color_bg; + color_bg = &colors.color_fg; + } + + // foregound color + if ((colors.fgIndex<=7) && (colors.fgIntensity==1)) { // ANSI palette with increased intensity + colors.fgIndex |= 0x8; + } else if ((colors.fgIndex<=15) && (colors.fgIntensity!=1)) { // ANSI palette with standard/decreased intensity + colors.fgIndex &= 0x7; + } + if (colors.fgTrueColorFlag) { + *color_fg = colors.fgTrueColor; + } else { + if ((colors.fgIndex<=7) && (colors.fgIntensity==2)) { // "ANSI" palette with decreased intensity + *color_fg = DARK_COLORS_BGR[colors.fgIndex]; + } else { // ANSI/VTERM/GREYSCALE palette + *color_fg = ANSI_COLORS_BGR[colors.fgIndex]; + } + } + *color_fg |= 0xFF000000; // opaque + + // backgound color + if ((colors.bgIndex<=7) && (colors.bgIntensity==1)) { // ANSI palette with increased intensity + colors.bgIndex |= 0x8; + } else if ((colors.bgIndex<=15) && (colors.bgIntensity!=1)) { // ANSI palette with standard/decreased intensity + colors.bgIndex &= 0x7; + } + if (colors.bgTrueColorFlag) { + *color_bg = colors.bgTrueColor; + } else { + if ((colors.bgIndex<=7) && (colors.bgIntensity==2)) { // "ANSI" palette with decreased intensity + *color_bg = DARK_COLORS_BGR[colors.bgIndex]; + } else { // ANSI/VTERM/GREYSCALE palette + *color_bg = ANSI_COLORS_BGR[colors.bgIndex]; + } + } + *color_bg |= 0xFF000000; // opaque +} + +/* +* Parse CSI sequences +*/ +static size_t psvDebugScreenEscape(const unsigned char *str) { + unsigned int i, argc, arg[32] = { 0 }; + unsigned int c; + uint32_t unit, mode; + int *colorTrueColorFlag; + uint32_t *colorTrueColor; + unsigned char *colorIndex, *colorIntensity; + for (i = 0, argc = 0; (argc < (sizeof(arg)/sizeof(*arg))) && (str[i] != '\0'); i++) { + switch (str[i]) { + // numeric char + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + arg[argc] = (arg[argc] * 10) + (str[i] - '0'); + continue; + // argument separator + case ';': argc++; continue; + // CSI commands + // save/restore position + case 's': + if (arg[0]size_h; return i; + case 'B': coordY += arg[0] * (F)->size_h; return i; + case 'C': coordX += arg[0] * (F)->size_w; return i; + case 'D': coordX -= arg[0] * (F)->size_w; return i; + // cursor movement to beginning of next/previous line(s) + case 'E': coordY += arg[0] * (F)->size_h; coordX = 0; return i; + case 'F': coordY -= arg[0] * (F)->size_h; coordX = 0; return i; + // cursor positioning + case 'G': coordX = (arg[0]-1) * (F)->size_w; return i; + case 'H': + case 'f': + coordY = (arg[0]-1) * (F)->size_h; + coordX = (arg[1]-1) * (F)->size_w; + return i; + // clear part of "J"=screen or "K"=Line, so J code re-uses part of K + case 'J': + case 'K': + if (arg[0]==0) { // from cursor to end of line/screen + CLEARSCRNBLOCK(coordY, coordY + (F)->size_h, coordX, (SCREEN_WIDTH), colors.color_bg); // line + if (str[i]=='J') CLEARSCRNLINES(coordY + (F)->size_h, (SCREEN_HEIGHT), colors.color_bg); // screen + } else if (arg[0]==1) { // from beginning of line/screen to cursor + CLEARSCRNBLOCK(coordY, coordY + (F)->size_h, 0, coordX, colors.color_bg); // line + if (str[i]=='J') CLEARSCRNLINES(0, coordY, colors.color_bg); // screen + } else if (arg[0]==2) { // whole line/screen + if (str[i]=='K') CLEARSCRNLINES(coordY, coordY + (F)->size_h, colors.color_bg) // line + else if (str[i]=='J') CLEARSCRNLINES(0, (SCREEN_HEIGHT), colors.color_bg); // screen + } + return i; + // color + case 'm': + for (c = 0; c <= argc; c++) { + switch (arg[c]) { + // reset all + case 0: + psvDebugScreenResetFgColor(); + psvDebugScreenResetBgColor(); + psvDebugScreenResetInversion(); + continue; + break; + // intensity + case 1: // increased = "bright" color + case 2: // decreased = "dark" color + case 22: // standard = "normal" color + colors.fgIntensity = arg[c]; + continue; + break; + // inversion + case 7: // enable + colors.inversion = 1; + continue; + break; + case 27: // disable + colors.inversion = 0; + continue; + break; + // set from color map or truecolor + case 38: // foreground color + case 48: // background color + mode = arg[c] / 10; + colorTrueColorFlag = mode&1 ? &colors.fgTrueColorFlag : &colors.bgTrueColorFlag; + if (arg[c+1]==5) { // 8-bit: [0-15][16-231][232-255] color map + *colorTrueColorFlag = 0; + colorIndex = mode&1 ? &colors.fgIndex : &colors.bgIndex; + *colorIndex = arg[c+2] & 0xFF; + colorIntensity = mode&1 ? &colors.fgIntensity : &colors.bgIntensity; + *colorIntensity = ((*colorIndex>=8) && (*colorIndex<=15)) ? 1 : 22; + c+=2; // extra arguments + } else if (arg[c+1]==2) { // 24-bit color space + *colorTrueColorFlag = 1; + colorTrueColor = mode&1 ? &colors.fgTrueColor : &colors.bgTrueColor; + *colorTrueColor = FROM_FULL_RGB(arg[c+2], arg[c+3], arg[c+4]); + c+=4; // extra arguments + } + continue; + break; + // default color + case 39: // foreground color + psvDebugScreenResetFgColor(); + continue; + break; + case 49: // background color + psvDebugScreenResetBgColor(); + continue; + break; + // custom color reset + default: + // ANSI colors (30-37, 40-47, 90-97, 100-107) + mode = arg[c] / 10; + if ((mode!=3) && (mode!=4) && (mode!=9) && (mode!=10)) continue; // skip unsupported modes + unit = arg[c] % 10; + if (unit>7) continue; // skip unsupported modes + colorTrueColorFlag = mode&1 ? &colors.fgTrueColorFlag : &colors.bgTrueColorFlag; + *colorTrueColorFlag = 0; + colorIndex = mode&1 ? &colors.fgIndex : &colors.bgIndex; + *colorIndex = unit; + colorIntensity = mode&1 ? &colors.fgIntensity : &colors.bgIntensity; + *colorIntensity = mode&8 ? 1 : 22; + break; + } + } + psvDebugScreenSetColors(); + return i; + } + } + return 0; +} + +/* +* Initialize debug screen +*/ +int psvDebugScreenInit() { + psvDebugScreenResetFgColor(); + psvDebugScreenResetBgColor(); + psvDebugScreenResetInversion(); + psvDebugScreenSetColors(); + +#ifdef NO_psvDebugScreenInit + psvDebugScreenInitReplacement(); + initialized = 1; + return 0; // avoid linking non-initializer (prx) with sceDisplay/sceMemory +#else + mutex = sceKernelCreateMutex("log_mutex", 0, 0, NULL); + displayblock = sceKernelAllocMemBlock("display", SCE_KERNEL_MEMBLOCK_TYPE_USER_CDRAM_RW, (SCREEN_FB_SIZE), NULL); + if (displayblock < 0) + return displayblock; + sceKernelGetMemBlockBase(displayblock, (void**)&base); + SceDisplayFrameBuf frame = { sizeof(frame), base, (SCREEN_FB_WIDTH), 0, (SCREEN_WIDTH), (SCREEN_HEIGHT) }; + initialized = 1; + return sceDisplaySetFrameBuf(&frame, SCE_DISPLAY_SETBUF_NEXTFRAME); +#endif +} + +/* +* Finalize debug screen +*/ +int psvDebugScreenFinish() { + if (!initialized) + return -1; + + initialized = 0; + +#ifdef NO_psvDebugScreenInit + return 0; +#else + sceKernelDeleteMutex(mutex); + sceDisplaySetFrameBuf(NULL, SCE_DISPLAY_SETBUF_IMMEDIATE); + return sceKernelFreeMemBlock(displayblock); +#endif +} + +__attribute__((destructor)) static void psvDebugScreenDestructor() { + psvDebugScreenFinish(); +} + +/* +* Draw text onto debug screen +*/ +int psvDebugScreenPuts(const char * _text) { + const unsigned char*text = (const unsigned char*)_text; + int c; + unsigned char t; + unsigned char drawDummy; + // + uint32_t *vram; + int bits_per_glyph = ((F)->width * (F)->height); + int bitmap_offset; + unsigned char *font; + int row; + int max_row; + int col; + unsigned char mask; + uint32_t *pixel; + + sceKernelLockMutex(mutex, 1, NULL); + for (c = 0; text[c] ; c++) { + t = text[c]; + // handle CSI sequence + if ((t == '\e') && (text[c+1] == '[')) { + c += psvDebugScreenEscape(text + c + 2) + 2; + if (coordX < 0) coordX = 0; // CSI position are 1-based, + if (coordY < 0) coordY = 0; // prevent 0-based coordinate from producing a negative X/Y + continue; + } + // handle non-printable characters #1 (line-dependent codes) + if (t == '\n') { + coordX = 0; + coordY += (F)->size_h; + continue; + } + if (t == '\r') { + coordX = 0; + continue; + } + // check if glyph fits in line + if ((coordX + (F)->width) > (SCREEN_WIDTH)) { + coordY += (F)->size_h; + coordX = 0; + } + // check if glyph fits in screen + if ((coordY + (F)->height) > (SCREEN_HEIGHT)) { + coordX = coordY = 0; + } + // handle non-printable characters #2 + if (t == '\t') { + coordX += (SCREEN_TAB_W) - (coordX % (SCREEN_TAB_W)); + continue; + } + + // draw glyph or dummy glyph (dotted line in the middle) + // works also with not byte-aligned glyphs + vram = ((uint32_t*)base) + coordX + (coordY * (SCREEN_FB_WIDTH)); + row = 0; + // check if glyph is available in font + if ((t > (F)->last) || (t < (F)->first)) { + drawDummy = 1; + bitmap_offset = 0; + font = NULL; + mask = 1 << 7; + } else { + drawDummy = 0; + bitmap_offset = (t - (F)->first) * bits_per_glyph; + font = &(F)->glyphs[ (bitmap_offset / 8) ]; + mask = 1 << 7; + for (col = (bitmap_offset % 8); col > 0; col--, mask >>= 1); + } + // special case: dummy glyph, clear to middle height + max_row = 0; + if (drawDummy) { + max_row = (F)->height / 2; + for (; row < max_row; row++, vram += (SCREEN_FB_WIDTH)) { + pixel = vram; + col = 0; + for (; col < (F)->size_w ; col++) { + *pixel++ = colors.color_bg; + } + } + } + // draw font glyph or dummy glyph + if (drawDummy) { + max_row++; + if (max_row > (F)->height) max_row = (F)->height; + } else { + max_row = (F)->height; + } + for (; row < max_row; row++, vram += (SCREEN_FB_WIDTH)) { + pixel = vram; + col = 0; + for (; col < (F)->width ; col++, mask >>= 1) { + if (drawDummy) { + *pixel++ = (col&1) ? colors.color_fg : colors.color_bg; + } else { + if (!mask) { font++; mask = 1 << 7; } // no more bits: we exhausted this byte + *pixel++ = (*font&mask) ? colors.color_fg : colors.color_bg; + } + } + // right margin + for (; col < (F)->size_w ; col++) + *pixel++ = colors.color_bg; + } + // draw bottom margin + max_row = (F)->size_h; + for (; row < (F)->size_h; row++, vram += (SCREEN_FB_WIDTH)) + for (pixel = vram, col = 0; col < (F)->size_w ; col++) + *pixel++ = colors.color_bg; + // advance X position + coordX += (F)->size_w; + } + sceKernelUnlockMutex(mutex, 1); + return c; +} + + +/* +* Printf text onto debug screen +*/ +__attribute__((__format__ (__printf__, 1, 2))) +int psvDebugScreenPrintf(const char *format, ...) { + char buf[4096]; + + va_list opt; + va_start(opt, format); + int ret = vsnprintf(buf, sizeof(buf), format, opt); + psvDebugScreenPuts(buf); + va_end(opt); + + return ret; +} + +/* +* Return copy of color state +*/ +void psvDebugScreenGetColorStateCopy(ColorState *copy) { + if (copy) { + memcpy(copy, &colors, sizeof(ColorState)); + CONVERT_RGB_BGR(copy->fgTrueColor); + CONVERT_RGB_BGR(copy->bgTrueColor); + CONVERT_RGB_BGR(copy->color_fg); + CONVERT_RGB_BGR(copy->color_bg); + } +} + +/* +* Return copy of pixel coordinates +*/ +void psvDebugScreenGetCoordsXY(int *x, int *y) { + if (x) *x = coordX; + if (y) *y = coordY; +} + +/* +* Set pixel coordinates +*/ +void psvDebugScreenSetCoordsXY(int *x, int *y) { + if (x) { + coordX = *x; + if (coordX < 0) coordX = 0; + } + if (y) { + coordY = *y; + if (coordY < 0) coordY = 0; + } +} + +/* +* Return pointer to current font +*/ +PsvDebugScreenFont *psvDebugScreenGetFont(void) { + return F; +} + +/* +* Set font +*/ +PsvDebugScreenFont *psvDebugScreenSetFont(PsvDebugScreenFont *font) { + if ((font) && (font->glyphs)) F = font; + return F; +} + +/* +* Return scaled-by-2 copy of font +*/ +PsvDebugScreenFont *psvDebugScreenScaleFont2x(PsvDebugScreenFont *source_font) { + // works also with not byte-aligned glyphs + PsvDebugScreenFont *target_font; + size_t size; + size_t align; + int glyph; + int row; + int col; + int count; + unsigned char *source_bitmap; + unsigned char source_mask; + unsigned char *target_bitmap, *target_bitmap2; + unsigned char target_mask, target_mask2; + int target_next_row_bytes, target_next_row_bits; + unsigned char pixel; + + if (!source_font) return NULL; + + // allocate target structure and bitmap + target_font = (PsvDebugScreenFont *)malloc(sizeof(PsvDebugScreenFont)); + memset(target_font, 0, sizeof(PsvDebugScreenFont)); + // copy and scale meta information + target_font->width = 2 * source_font->width; + target_font->height = 2 * source_font->height; + target_font->first = source_font->first; + target_font->last = source_font->last; + target_font->size_w = 2 * source_font->size_w; + target_font->size_h = 2 * source_font->size_h; + + // calculate size of target bitmap + size = target_font->width * target_font->height * (target_font->last - target_font->first + 1); + if (size <= 0) { + free(target_font); + return NULL; + } + align = size % 8; + size /= 8; + if (align) size++; + + // allocate and initialize target bitmap + target_font->glyphs = (unsigned char *)malloc(size); + memset(target_font->glyphs, 0, size); + + // scale source bitmap and store in target bitmap + source_bitmap = source_font->glyphs; + source_mask = 1 << 7; + // + target_bitmap = target_font->glyphs; + target_mask = 1 << 7; + target_next_row_bytes = target_font->width / 8; + target_next_row_bits = target_font->width % 8; + // + for (glyph = source_font->first; glyph <= source_font->last; glyph++) { + for (row = source_font->height; row > 0; row--) { + // Find beginning of next target row + target_bitmap2 = target_bitmap + target_next_row_bytes; // advance full bytes + target_mask2 = target_mask; // advance remaining bits + for (col = target_next_row_bits; col > 0; col--, target_mask2 >>= 1) { + if (!target_mask2) { target_bitmap2++; target_mask2 = 1 << 7; } // no more bits: we advance to the next target byte + } + // Get pixel from source bitmap + for (col = source_font->width; col > 0; col--, source_mask >>= 1) { + if (!source_mask) { source_bitmap++; source_mask = 1 << 7; } // no more bits: we advance to the next source byte + pixel = *source_bitmap & source_mask; + // Put pixels into target bitmap + for (count = 2; count > 0; count--) { + // duplicate column in origial row + if (!target_mask) { target_bitmap++; target_mask = 1 << 7; } // no more bits: we advance to the next target byte + if (pixel) *target_bitmap |= target_mask; + target_mask >>= 1; + // duplicate column in duplicated row + if (!target_mask2) { target_bitmap2++; target_mask2 = 1 << 7; } // no more bits: we advance to the next target byte + if (pixel) *target_bitmap2 |= target_mask2; + target_mask2 >>= 1; + } + } + // Next target row is directly behind duplicated row + target_bitmap = target_bitmap2; + target_mask = target_mask2; + } + } + + return target_font; +} + +#undef SCREEN_TAB_W +#undef F + +#endif diff --git a/src/lib/debug/debugScreen.h b/src/lib/debug/debugScreen.h new file mode 100644 index 0000000..2db1354 --- /dev/null +++ b/src/lib/debug/debugScreen.h @@ -0,0 +1,59 @@ +#ifndef DEBUG_SCREEN_H +#define DEBUG_SCREEN_H + +#include "debugScreen_custom.h" + +typedef struct ColorState { + int fgTrueColorFlag; // flag if truecolors or ANSI/VTERM/GREYSCALE colors are used + int bgTrueColorFlag; // flag if truecolors or ANSI/VTERM/GREYSCALE colors are used + // truecolors + uint32_t fgTrueColor; // color in RGB (internal BGR) + uint32_t bgTrueColor; // color in RGB (internal BGR) + // ANSI/VTERM/GREYSCALE colors + unsigned char fgIndex; // ANSI/VTERM/GREYSCALE color code (0-255) + unsigned char fgIntensity; // 22=normal, 1=increased ("bright"), 2=decreased ("dark") + unsigned char bgIndex; // ANSI/VTERM/GREYSCALE color code (0-255) + unsigned char bgIntensity; // 22=normal, 1=increased ("bright") + int inversion; // flag if bg/fg colors are inverted + + // default colors (ANSI/VTERM/GREYSCALE) + unsigned char fgIndexDefault; // default ANSI/VTERM/GREYSCALE color code + unsigned char fgIntensityDefault; // 22=normal, 1=increased, 2=decreased + unsigned char bgIndexDefault; // default ANSI/VTERM/GREYSCALE color code + unsigned char bgIntensityDefault; // 22=normal, 1=increased + int inversionDefault; // flag if bg/fg colors are inverted + + // current colors (e.g. inverted) + uint32_t color_fg; // color in RGB (internal BGR) + uint32_t color_bg; // color in RGB (internal BGR) +} ColorState; + +typedef struct PsvDebugScreenFont { + unsigned char *glyphs, width, height, first, last, size_w, size_h; // only values 0-255 +} PsvDebugScreenFont; + +#define SCREEN_WIDTH (960) // screen resolution x +#define SCREEN_HEIGHT (544) // screen resolution y + +#ifdef DEBUG_SCREEN_CODE_INCLUDE // not recommended for your own projects, but for sake of backward compatibility +#include "debugScreen.c" +#else +#ifdef __cplusplus +extern "C" { +#endif +int psvDebugScreenInit(); +int psvDebugScreenFinish(); +int psvDebugScreenPuts(const char * _text); +int psvDebugScreenPrintf(const char *format, ...); +void psvDebugScreenGetColorStateCopy(ColorState *copy); +void psvDebugScreenGetCoordsXY(int *x, int *y); +void psvDebugScreenSetCoordsXY(int *x, int *y); +PsvDebugScreenFont *psvDebugScreenGetFont(void); +PsvDebugScreenFont *psvDebugScreenSetFont(PsvDebugScreenFont *font); +PsvDebugScreenFont *psvDebugScreenScaleFont2x(PsvDebugScreenFont *source_font); +#ifdef __cplusplus +} +#endif +#endif + +#endif /* DEBUG_SCREEN_H */ diff --git a/src/lib/debug/debugScreenFont.c b/src/lib/debug/debugScreenFont.c new file mode 100644 index 0000000..d528d32 --- /dev/null +++ b/src/lib/debug/debugScreenFont.c @@ -0,0 +1,144 @@ +/* + * PSP Software Development Kit - http://www.pspdev.org + * ----------------------------------------------------------------------- + * Licensed under the BSD license, see LICENSE in PSPSDK root for details. + * + * font.c - Debug Font. + * + * Copyright (c) 2005 Marcus R. Brown + * Copyright (c) 2005 James Forshaw + * Copyright (c) 2005 John Kelley + * + * $Id: font.c 540 2005-07-08 19:35:10Z warren $ + */ + +PsvDebugScreenFont psvDebugScreenFont = { glyphs:(unsigned char*) +"\x00\x00\x00\x00\x00\x00\x00\x00\x3c\x42\xa5\x81\xa5\x99\x42\x3c" +"\x3c\x7e\xdb\xff\xff\xdb\x66\x3c\x6c\xfe\xfe\xfe\x7c\x38\x10\x00" +"\x10\x38\x7c\xfe\x7c\x38\x10\x00\x10\x38\x54\xfe\x54\x10\x38\x00" +"\x10\x38\x7c\xfe\xfe\x10\x38\x00\x00\x00\x00\x30\x30\x00\x00\x00" +"\xff\xff\xff\xe7\xe7\xff\xff\xff\x38\x44\x82\x82\x82\x44\x38\x00" +"\xc7\xbb\x7d\x7d\x7d\xbb\xc7\xff\x0f\x03\x05\x79\x88\x88\x88\x70" +"\x38\x44\x44\x44\x38\x10\x7c\x10\x30\x28\x24\x24\x28\x20\xe0\xc0" +"\x3c\x24\x3c\x24\x24\xe4\xdc\x18\x10\x54\x38\xee\x38\x54\x10\x00" +"\x10\x10\x10\x7c\x10\x10\x10\x10\x10\x10\x10\xff\x00\x00\x00\x00" +"\x00\x00\x00\xff\x10\x10\x10\x10\x10\x10\x10\xf0\x10\x10\x10\x10" +"\x10\x10\x10\x1f\x10\x10\x10\x10\x10\x10\x10\xff\x10\x10\x10\x10" +"\x10\x10\x10\x10\x10\x10\x10\x10\x00\x00\x00\xff\x00\x00\x00\x00" +"\x00\x00\x00\x1f\x10\x10\x10\x10\x00\x00\x00\xf0\x10\x10\x10\x10" +"\x10\x10\x10\x1f\x00\x00\x00\x00\x10\x10\x10\xf0\x00\x00\x00\x00" +"\x81\x42\x24\x18\x18\x24\x42\x81\x01\x02\x04\x08\x10\x20\x40\x80" +"\x80\x40\x20\x10\x08\x04\x02\x01\x00\x10\x10\xff\x10\x10\x00\x00" +"\x00\x00\x00\x00\x00\x00\x00\x00\x20\x20\x20\x20\x00\x00\x20\x00" +"\x50\x50\x50\x00\x00\x00\x00\x00\x50\x50\xf8\x50\xf8\x50\x50\x00" +"\x20\x78\xa0\x70\x28\xf0\x20\x00\xc0\xc8\x10\x20\x40\x98\x18\x00" +"\x40\xa0\x40\xa8\x90\x98\x60\x00\x10\x20\x40\x00\x00\x00\x00\x00" +"\x10\x20\x40\x40\x40\x20\x10\x00\x40\x20\x10\x10\x10\x20\x40\x00" +"\x20\xa8\x70\x20\x70\xa8\x20\x00\x00\x20\x20\xf8\x20\x20\x00\x00" +"\x00\x00\x00\x00\x00\x20\x20\x40\x00\x00\x00\x78\x00\x00\x00\x00" +"\x00\x00\x00\x00\x00\x60\x60\x00\x00\x00\x08\x10\x20\x40\x80\x00" +"\x70\x88\x98\xa8\xc8\x88\x70\x00\x20\x60\xa0\x20\x20\x20\xf8\x00" +"\x70\x88\x08\x10\x60\x80\xf8\x00\x70\x88\x08\x30\x08\x88\x70\x00" +"\x10\x30\x50\x90\xf8\x10\x10\x00\xf8\x80\xe0\x10\x08\x10\xe0\x00" +"\x30\x40\x80\xf0\x88\x88\x70\x00\xf8\x88\x10\x20\x20\x20\x20\x00" +"\x70\x88\x88\x70\x88\x88\x70\x00\x70\x88\x88\x78\x08\x10\x60\x00" +"\x00\x00\x20\x00\x00\x20\x00\x00\x00\x00\x20\x00\x00\x20\x20\x40" +"\x18\x30\x60\xc0\x60\x30\x18\x00\x00\x00\xf8\x00\xf8\x00\x00\x00" +"\xc0\x60\x30\x18\x30\x60\xc0\x00\x70\x88\x08\x10\x20\x00\x20\x00" +"\x70\x88\x08\x68\xa8\xa8\x70\x00\x20\x50\x88\x88\xf8\x88\x88\x00" +"\xf0\x48\x48\x70\x48\x48\xf0\x00\x30\x48\x80\x80\x80\x48\x30\x00" +"\xe0\x50\x48\x48\x48\x50\xe0\x00\xf8\x80\x80\xf0\x80\x80\xf8\x00" +"\xf8\x80\x80\xf0\x80\x80\x80\x00\x70\x88\x80\xb8\x88\x88\x70\x00" +"\x88\x88\x88\xf8\x88\x88\x88\x00\x70\x20\x20\x20\x20\x20\x70\x00" +"\x38\x10\x10\x10\x90\x90\x60\x00\x88\x90\xa0\xc0\xa0\x90\x88\x00" +"\x80\x80\x80\x80\x80\x80\xf8\x00\x88\xd8\xa8\xa8\x88\x88\x88\x00" +"\x88\xc8\xc8\xa8\x98\x98\x88\x00\x70\x88\x88\x88\x88\x88\x70\x00" +"\xf0\x88\x88\xf0\x80\x80\x80\x00\x70\x88\x88\x88\xa8\x90\x68\x00" +"\xf0\x88\x88\xf0\xa0\x90\x88\x00\x70\x88\x80\x70\x08\x88\x70\x00" +"\xf8\x20\x20\x20\x20\x20\x20\x00\x88\x88\x88\x88\x88\x88\x70\x00" +"\x88\x88\x88\x88\x50\x50\x20\x00\x88\x88\x88\xa8\xa8\xd8\x88\x00" +"\x88\x88\x50\x20\x50\x88\x88\x00\x88\x88\x88\x70\x20\x20\x20\x00" +"\xf8\x08\x10\x20\x40\x80\xf8\x00\x70\x40\x40\x40\x40\x40\x70\x00" +"\x00\x00\x80\x40\x20\x10\x08\x00\x70\x10\x10\x10\x10\x10\x70\x00" +"\x20\x50\x88\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x00" +"\x40\x20\x10\x00\x00\x00\x00\x00\x00\x00\x70\x08\x78\x88\x78\x00" +"\x80\x80\xb0\xc8\x88\xc8\xb0\x00\x00\x00\x70\x88\x80\x88\x70\x00" +"\x08\x08\x68\x98\x88\x98\x68\x00\x00\x00\x70\x88\xf8\x80\x70\x00" +"\x10\x28\x20\xf8\x20\x20\x20\x00\x00\x00\x68\x98\x98\x68\x08\x70" +"\x80\x80\xf0\x88\x88\x88\x88\x00\x20\x00\x60\x20\x20\x20\x70\x00" +"\x10\x00\x30\x10\x10\x10\x90\x60\x40\x40\x48\x50\x60\x50\x48\x00" +"\x60\x20\x20\x20\x20\x20\x70\x00\x00\x00\xd0\xa8\xa8\xa8\xa8\x00" +"\x00\x00\xb0\xc8\x88\x88\x88\x00\x00\x00\x70\x88\x88\x88\x70\x00" +"\x00\x00\xb0\xc8\xc8\xb0\x80\x80\x00\x00\x68\x98\x98\x68\x08\x08" +"\x00\x00\xb0\xc8\x80\x80\x80\x00\x00\x00\x78\x80\xf0\x08\xf0\x00" +"\x40\x40\xf0\x40\x40\x48\x30\x00\x00\x00\x90\x90\x90\x90\x68\x00" +"\x00\x00\x88\x88\x88\x50\x20\x00\x00\x00\x88\xa8\xa8\xa8\x50\x00" +"\x00\x00\x88\x50\x20\x50\x88\x00\x00\x00\x88\x88\x98\x68\x08\x70" +"\x00\x00\xf8\x10\x20\x40\xf8\x00\x18\x20\x20\x40\x20\x20\x18\x00" +"\x20\x20\x20\x00\x20\x20\x20\x00\xc0\x20\x20\x10\x20\x20\xc0\x00" +"\x40\xa8\x10\x00\x00\x00\x00\x00\x00\x00\x20\x50\xf8\x00\x00\x00" +"\x70\x88\x80\x80\x88\x70\x20\x60\x90\x00\x00\x90\x90\x90\x68\x00" +"\x10\x20\x70\x88\xf8\x80\x70\x00\x20\x50\x70\x08\x78\x88\x78\x00" +"\x48\x00\x70\x08\x78\x88\x78\x00\x20\x10\x70\x08\x78\x88\x78\x00" +"\x20\x00\x70\x08\x78\x88\x78\x00\x00\x70\x80\x80\x80\x70\x10\x60" +"\x20\x50\x70\x88\xf8\x80\x70\x00\x50\x00\x70\x88\xf8\x80\x70\x00" +"\x20\x10\x70\x88\xf8\x80\x70\x00\x50\x00\x00\x60\x20\x20\x70\x00" +"\x20\x50\x00\x60\x20\x20\x70\x00\x40\x20\x00\x60\x20\x20\x70\x00" +"\x50\x00\x20\x50\x88\xf8\x88\x00\x20\x00\x20\x50\x88\xf8\x88\x00" +"\x10\x20\xf8\x80\xf0\x80\xf8\x00\x00\x00\x6c\x12\x7e\x90\x6e\x00" +"\x3e\x50\x90\x9c\xf0\x90\x9e\x00\x60\x90\x00\x60\x90\x90\x60\x00" +"\x90\x00\x00\x60\x90\x90\x60\x00\x40\x20\x00\x60\x90\x90\x60\x00" +"\x40\xa0\x00\xa0\xa0\xa0\x50\x00\x40\x20\x00\xa0\xa0\xa0\x50\x00" +"\x90\x00\x90\x90\xb0\x50\x10\xe0\x50\x00\x70\x88\x88\x88\x70\x00" +"\x50\x00\x88\x88\x88\x88\x70\x00\x20\x20\x78\x80\x80\x78\x20\x20" +"\x18\x24\x20\xf8\x20\xe2\x5c\x00\x88\x50\x20\xf8\x20\xf8\x20\x00" +"\xc0\xa0\xa0\xc8\x9c\x88\x88\x8c\x18\x20\x20\xf8\x20\x20\x20\x40" +"\x10\x20\x70\x08\x78\x88\x78\x00\x10\x20\x00\x60\x20\x20\x70\x00" +"\x20\x40\x00\x60\x90\x90\x60\x00\x20\x40\x00\x90\x90\x90\x68\x00" +"\x50\xa0\x00\xa0\xd0\x90\x90\x00\x28\x50\x00\xc8\xa8\x98\x88\x00" +"\x00\x70\x08\x78\x88\x78\x00\xf8\x00\x60\x90\x90\x90\x60\x00\xf0" +"\x20\x00\x20\x40\x80\x88\x70\x00\x00\x00\x00\xf8\x80\x80\x00\x00" +"\x00\x00\x00\xf8\x08\x08\x00\x00\x84\x88\x90\xa8\x54\x84\x08\x1c" +"\x84\x88\x90\xa8\x58\xa8\x3c\x08\x20\x00\x00\x20\x20\x20\x20\x00" +"\x00\x00\x24\x48\x90\x48\x24\x00\x00\x00\x90\x48\x24\x48\x90\x00" +"\x28\x50\x20\x50\x88\xf8\x88\x00\x28\x50\x70\x08\x78\x88\x78\x00" +"\x28\x50\x00\x70\x20\x20\x70\x00\x28\x50\x00\x20\x20\x20\x70\x00" +"\x28\x50\x00\x70\x88\x88\x70\x00\x50\xa0\x00\x60\x90\x90\x60\x00" +"\x28\x50\x00\x88\x88\x88\x70\x00\x50\xa0\x00\xa0\xa0\xa0\x50\x00" +"\xfc\x48\x48\x48\xe8\x08\x50\x20\x00\x50\x00\x50\x50\x50\x10\x20" +"\xc0\x44\xc8\x54\xec\x54\x9e\x04\x10\xa8\x40\x00\x00\x00\x00\x00" +"\x00\x20\x50\x88\x50\x20\x00\x00\x88\x10\x20\x40\x80\x28\x00\x00" +"\x7c\xa8\xa8\x68\x28\x28\x28\x00\x38\x40\x30\x48\x48\x30\x08\x70" +"\x00\x00\x00\x00\x00\x00\xff\xff\xf0\xf0\xf0\xf0\x0f\x0f\x0f\x0f" +"\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00" +"\x00\x00\x00\x3c\x3c\x00\x00\x00\xff\xff\xff\xff\xff\xff\x00\x00" +"\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\x0f\x0f\x0f\x0f\xf0\xf0\xf0\xf0" +"\xfc\xfc\xfc\xfc\xfc\xfc\xfc\xfc\x03\x03\x03\x03\x03\x03\x03\x03" +"\x3f\x3f\x3f\x3f\x3f\x3f\x3f\x3f\x11\x22\x44\x88\x11\x22\x44\x88" +"\x88\x44\x22\x11\x88\x44\x22\x11\xfe\x7c\x38\x10\x00\x00\x00\x00" +"\x00\x00\x00\x00\x10\x38\x7c\xfe\x80\xc0\xe0\xf0\xe0\xc0\x80\x00" +"\x01\x03\x07\x0f\x07\x03\x01\x00\xff\x7e\x3c\x18\x18\x3c\x7e\xff" +"\x81\xc3\xe7\xff\xff\xe7\xc3\x81\xf0\xf0\xf0\xf0\x00\x00\x00\x00" +"\x00\x00\x00\x00\x0f\x0f\x0f\x0f\x0f\x0f\x0f\x0f\x00\x00\x00\x00" +"\x00\x00\x00\x00\xf0\xf0\xf0\xf0\x33\x33\xcc\xcc\x33\x33\xcc\xcc" +"\x00\x20\x20\x50\x50\x88\xf8\x00\x20\x20\x70\x20\x70\x20\x20\x00" +"\x00\x00\x00\x50\x88\xa8\x50\x00\xff\xff\xff\xff\xff\xff\xff\xff" +"\x00\x00\x00\x00\xff\xff\xff\xff\xf0\xf0\xf0\xf0\xf0\xf0\xf0\xf0" +"\x0f\x0f\x0f\x0f\x0f\x0f\x0f\x0f\xff\xff\xff\xff\x00\x00\x00\x00" +"\x00\x00\x68\x90\x90\x90\x68\x00\x30\x48\x48\x70\x48\x48\x70\xc0" +"\xf8\x88\x80\x80\x80\x80\x80\x00\xf8\x50\x50\x50\x50\x50\x98\x00" +"\xf8\x88\x40\x20\x40\x88\xf8\x00\x00\x00\x78\x90\x90\x90\x60\x00" +"\x00\x50\x50\x50\x50\x68\x80\x80\x00\x50\xa0\x20\x20\x20\x20\x00" +"\xf8\x20\x70\xa8\xa8\x70\x20\xf8\x20\x50\x88\xf8\x88\x50\x20\x00" +"\x70\x88\x88\x88\x50\x50\xd8\x00\x30\x40\x40\x20\x50\x50\x50\x20" +"\x00\x00\x00\x50\xa8\xa8\x50\x00\x08\x70\xa8\xa8\xa8\x70\x80\x00" +"\x38\x40\x80\xf8\x80\x40\x38\x00\x70\x88\x88\x88\x88\x88\x88\x00" +"\x00\xf8\x00\xf8\x00\xf8\x00\x00\x20\x20\xf8\x20\x20\x00\xf8\x00" +"\xc0\x30\x08\x30\xc0\x00\xf8\x00\x18\x60\x80\x60\x18\x00\xf8\x00" +"\x10\x28\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\xa0\x40" +"\x00\x20\x00\xf8\x00\x20\x00\x00\x00\x50\xa0\x00\x50\xa0\x00\x00" +"\x00\x18\x24\x24\x18\x00\x00\x00\x00\x30\x78\x78\x30\x00\x00\x00" +"\x00\x00\x00\x00\x30\x00\x00\x00\x3e\x20\x20\x20\xa0\x60\x20\x00" +"\xa0\x50\x50\x50\x00\x00\x00\x00\x40\xa0\x20\x40\xe0\x00\x00\x00" +"\x00\x38\x38\x38\x38\x38\x38\x00\x00\x00\x00\x00\x00\x00\x00\x00", +width :8, height:8, first:0, last:255, size_w:8, size_h:8}; \ No newline at end of file diff --git a/src/lib/debug/debugScreen_custom.h b/src/lib/debug/debugScreen_custom.h new file mode 100644 index 0000000..0910028 --- /dev/null +++ b/src/lib/debug/debugScreen_custom.h @@ -0,0 +1,17 @@ +#ifndef DEBUG_SCREEN_CUSTOM_H +#define DEBUG_SCREEN_CUSTOM_H + +//#define SCREEN_TAB_SIZE (8) + +// backward compatibility for sources based on older Vita SDK versions +//#define DEBUG_SCREEN_CODE_INCLUDE // not recommended for your own projects, but for sake of backward compatibility +#define psvDebugScreenSetFgColor(rgb) psvDebugScreenPrintf("\e[38;2;%lu;%lu;%lum", ((uint32_t)(rgb)>>16)&0xFF, ((uint32_t)(rgb)>>8)&0xFF, (uint32_t)(rgb)&0xFF) +#define psvDebugScreenSetBgColor(rgb) psvDebugScreenPrintf("\e[48;2;%lu;%lu;%lum", ((uint32_t)(rgb)>>16)&0xFF, ((uint32_t)(rgb)>>8)&0xFF, (uint32_t)(rgb)&0xFF) +#define psvDebugScreenClear(rgb) psvDebugScreenSetBgColor(rgb); psvDebugScreenPuts("\e[H\e[2J") + +// custom changes for non-Vita builds +#ifndef __vita__ +#define psvDebugScreenInitReplacement(...) setvbuf(stdout,NULL,_IONBF,0) +#endif + +#endif diff --git a/src/lib/dr_flac.h b/src/lib/dr_flac.h new file mode 100644 index 0000000..a577882 --- /dev/null +++ b/src/lib/dr_flac.h @@ -0,0 +1,12659 @@ +/* +FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file. +dr_flac - v0.13.2 - TBD + +David Reid - mackron@gmail.com + +GitHub: https://github.com/mackron/dr_libs +*/ + +/* +Introduction +============ +dr_flac is a single file library. To use it, do something like the following in one .c file. + + ```c + #define DR_FLAC_IMPLEMENTATION + #include "dr_flac.h" + ``` + +You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, do something like the following: + + ```c + drflac* pFlac = drflac_open_file("MySong.flac", NULL); + if (pFlac == NULL) { + // Failed to open FLAC file + } + + drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32)); + drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples); + ``` + +The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of channels and the bits per sample, +should be directly accessible - just make sure you don't change their values. Samples are always output as interleaved signed 32-bit PCM. In the example above +a native FLAC stream was opened, however dr_flac has seamless support for Ogg encapsulated FLAC streams as well. + +You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and the decoder will give you as many +samples as it can, up to the amount requested. Later on when you need the next batch of samples, just call it again. Example: + + ```c + while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) { + do_something(); + } + ``` + +You can seek to a specific PCM frame with `drflac_seek_to_pcm_frame()`. + +If you just want to quickly decode an entire FLAC file in one go you can do something like this: + + ```c + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount, NULL); + if (pSampleData == NULL) { + // Failed to open and decode FLAC file. + } + + ... + + drflac_free(pSampleData, NULL); + ``` + +You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs respectively, but note that these +should be considered lossy. + + +If you need access to metadata (album art, etc.), use `drflac_open_with_metadata()`, `drflac_open_file_with_metdata()` or `drflac_open_memory_with_metadata()`. +The rationale for keeping these APIs separate is that they're slightly slower than the normal versions and also just a little bit harder to use. dr_flac +reports metadata to the application through the use of a callback, and every metadata block is reported before `drflac_open_with_metdata()` returns. + +The main opening APIs (`drflac_open()`, etc.) will fail if the header is not present. The presents a problem in certain scenarios such as broadcast style +streams or internet radio where the header may not be present because the user has started playback mid-stream. To handle this, use the relaxed APIs: + + `drflac_open_relaxed()` + `drflac_open_with_metadata_relaxed()` + +It is not recommended to use these APIs for file based streams because a missing header would usually indicate a corrupt or perverse file. In addition, these +APIs can take a long time to initialize because they may need to spend a lot of time finding the first frame. + + + +Build Options +============= +#define these options before including this file. + +#define DR_FLAC_NO_STDIO + Disable `drflac_open_file()` and family. + +#define DR_FLAC_NO_OGG + Disables support for Ogg/FLAC streams. + +#define DR_FLAC_BUFFER_SIZE + Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls back to the client for more data. + Larger values means more memory, but better performance. My tests show diminishing returns after about 4KB (which is the default). Consider reducing this if + you have a very efficient implementation of onRead(), or increase it if it's very inefficient. Must be a multiple of 8. + +#define DR_FLAC_NO_CRC + Disables CRC checks. This will offer a performance boost when CRC is unnecessary. This will disable binary search seeking. When seeking, the seek table will + be used if available. Otherwise the seek will be performed using brute force. + +#define DR_FLAC_NO_SIMD + Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler. + +#define DR_FLAC_NO_WCHAR + Disables all functions ending with `_w`. Use this if your compiler does not provide wchar.h. Not required if DR_FLAC_NO_STDIO is also defined. + + + +Notes +===== +- dr_flac does not support changing the sample rate nor channel count mid stream. +- dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization. +- When using Ogg encapsulation, a corrupted metadata block will result in `drflac_open_with_metadata()` and `drflac_open()` returning inconsistent samples due + to differences in corrupted stream recorvery logic between the two APIs. +*/ + +#ifndef dr_flac_h +#define dr_flac_h + +#ifdef __cplusplus +extern "C" { +#endif + +#define DRFLAC_STRINGIFY(x) #x +#define DRFLAC_XSTRINGIFY(x) DRFLAC_STRINGIFY(x) + +#define DRFLAC_VERSION_MAJOR 0 +#define DRFLAC_VERSION_MINOR 13 +#define DRFLAC_VERSION_REVISION 2 +#define DRFLAC_VERSION_STRING DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION) + +#include /* For size_t. */ + +/* Sized Types */ +typedef signed char drflac_int8; +typedef unsigned char drflac_uint8; +typedef signed short drflac_int16; +typedef unsigned short drflac_uint16; +typedef signed int drflac_int32; +typedef unsigned int drflac_uint32; +#if defined(_MSC_VER) && !defined(__clang__) + typedef signed __int64 drflac_int64; + typedef unsigned __int64 drflac_uint64; +#else + #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wlong-long" + #if defined(__clang__) + #pragma GCC diagnostic ignored "-Wc++11-long-long" + #endif + #endif + typedef signed long long drflac_int64; + typedef unsigned long long drflac_uint64; + #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) + #pragma GCC diagnostic pop + #endif +#endif +#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined(_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__) + typedef drflac_uint64 drflac_uintptr; +#else + typedef drflac_uint32 drflac_uintptr; +#endif +typedef drflac_uint8 drflac_bool8; +typedef drflac_uint32 drflac_bool32; +#define DRFLAC_TRUE 1 +#define DRFLAC_FALSE 0 +/* End Sized Types */ + +/* Decorations */ +#if !defined(DRFLAC_API) + #if defined(DRFLAC_DLL) + #if defined(_WIN32) + #define DRFLAC_DLL_IMPORT __declspec(dllimport) + #define DRFLAC_DLL_EXPORT __declspec(dllexport) + #define DRFLAC_DLL_PRIVATE static + #else + #if defined(__GNUC__) && __GNUC__ >= 4 + #define DRFLAC_DLL_IMPORT __attribute__((visibility("default"))) + #define DRFLAC_DLL_EXPORT __attribute__((visibility("default"))) + #define DRFLAC_DLL_PRIVATE __attribute__((visibility("hidden"))) + #else + #define DRFLAC_DLL_IMPORT + #define DRFLAC_DLL_EXPORT + #define DRFLAC_DLL_PRIVATE static + #endif + #endif + + #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION) + #define DRFLAC_API DRFLAC_DLL_EXPORT + #else + #define DRFLAC_API DRFLAC_DLL_IMPORT + #endif + #define DRFLAC_PRIVATE DRFLAC_DLL_PRIVATE + #else + #define DRFLAC_API extern + #define DRFLAC_PRIVATE static + #endif +#endif +/* End Decorations */ + +#if defined(_MSC_VER) && _MSC_VER >= 1700 /* Visual Studio 2012 */ + #define DRFLAC_DEPRECATED __declspec(deprecated) +#elif (defined(__GNUC__) && __GNUC__ >= 4) /* GCC 4 */ + #define DRFLAC_DEPRECATED __attribute__((deprecated)) +#elif defined(__has_feature) /* Clang */ + #if __has_feature(attribute_deprecated) + #define DRFLAC_DEPRECATED __attribute__((deprecated)) + #else + #define DRFLAC_DEPRECATED + #endif +#else + #define DRFLAC_DEPRECATED +#endif + +DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision); +DRFLAC_API const char* drflac_version_string(void); + +/* Allocation Callbacks */ +typedef struct +{ + void* pUserData; + void* (* onMalloc)(size_t sz, void* pUserData); + void* (* onRealloc)(void* p, size_t sz, void* pUserData); + void (* onFree)(void* p, void* pUserData); +} drflac_allocation_callbacks; +/* End Allocation Callbacks */ + +/* +As data is read from the client it is placed into an internal buffer for fast access. This controls the size of that buffer. Larger values means more speed, +but also more memory. In my testing there is diminishing returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8. +*/ +#ifndef DR_FLAC_BUFFER_SIZE +#define DR_FLAC_BUFFER_SIZE 4096 +#endif + + +/* Architecture Detection */ +#if defined(_WIN64) || defined(_LP64) || defined(__LP64__) +#define DRFLAC_64BIT +#endif + +#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC)) + #define DRFLAC_X64 +#elif defined(__i386) || defined(_M_IX86) + #define DRFLAC_X86 +#elif defined(__arm__) || defined(_M_ARM) || defined(__arm64) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) + #define DRFLAC_ARM +#endif +/* End Architecture Detection */ + + +#ifdef DRFLAC_64BIT +typedef drflac_uint64 drflac_cache_t; +#else +typedef drflac_uint32 drflac_cache_t; +#endif + +/* The various metadata block types. */ +#define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO 0 +#define DRFLAC_METADATA_BLOCK_TYPE_PADDING 1 +#define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION 2 +#define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE 3 +#define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT 4 +#define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET 5 +#define DRFLAC_METADATA_BLOCK_TYPE_PICTURE 6 +#define DRFLAC_METADATA_BLOCK_TYPE_INVALID 127 + +/* The various picture types specified in the PICTURE block. */ +#define DRFLAC_PICTURE_TYPE_OTHER 0 +#define DRFLAC_PICTURE_TYPE_FILE_ICON 1 +#define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON 2 +#define DRFLAC_PICTURE_TYPE_COVER_FRONT 3 +#define DRFLAC_PICTURE_TYPE_COVER_BACK 4 +#define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE 5 +#define DRFLAC_PICTURE_TYPE_MEDIA 6 +#define DRFLAC_PICTURE_TYPE_LEAD_ARTIST 7 +#define DRFLAC_PICTURE_TYPE_ARTIST 8 +#define DRFLAC_PICTURE_TYPE_CONDUCTOR 9 +#define DRFLAC_PICTURE_TYPE_BAND 10 +#define DRFLAC_PICTURE_TYPE_COMPOSER 11 +#define DRFLAC_PICTURE_TYPE_LYRICIST 12 +#define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION 13 +#define DRFLAC_PICTURE_TYPE_DURING_RECORDING 14 +#define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE 15 +#define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE 16 +#define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH 17 +#define DRFLAC_PICTURE_TYPE_ILLUSTRATION 18 +#define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE 19 +#define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE 20 + +typedef enum +{ + drflac_container_native, + drflac_container_ogg, + drflac_container_unknown +} drflac_container; + +typedef enum +{ + DRFLAC_SEEK_SET, + DRFLAC_SEEK_CUR, + DRFLAC_SEEK_END +} drflac_seek_origin; + +/* The order of members in this structure is important because we map this directly to the raw data within the SEEKTABLE metadata block. */ +typedef struct +{ + drflac_uint64 firstPCMFrame; + drflac_uint64 flacFrameOffset; /* The offset from the first byte of the header of the first frame. */ + drflac_uint16 pcmFrameCount; +} drflac_seekpoint; + +typedef struct +{ + drflac_uint16 minBlockSizeInPCMFrames; + drflac_uint16 maxBlockSizeInPCMFrames; + drflac_uint32 minFrameSizeInPCMFrames; + drflac_uint32 maxFrameSizeInPCMFrames; + drflac_uint32 sampleRate; + drflac_uint8 channels; + drflac_uint8 bitsPerSample; + drflac_uint64 totalPCMFrameCount; + drflac_uint8 md5[16]; +} drflac_streaminfo; + +typedef struct +{ + /* + The metadata type. Use this to know how to interpret the data below. Will be set to one of the + DRFLAC_METADATA_BLOCK_TYPE_* tokens. + */ + drflac_uint32 type; + + /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */ + drflac_uint32 rawDataSize; + + /* The offset in the stream of the raw data. */ + drflac_uint64 rawDataOffset; + + /* + A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to + not modify the contents of this buffer. Use the structures below for more meaningful and structured + information about the metadata. It's possible for this to be null. + */ + const void* pRawData; + + union + { + drflac_streaminfo streaminfo; + + struct + { + int unused; + } padding; + + struct + { + drflac_uint32 id; + const void* pData; + drflac_uint32 dataSize; + } application; + + struct + { + drflac_uint32 seekpointCount; + const drflac_seekpoint* pSeekpoints; + } seektable; + + struct + { + drflac_uint32 vendorLength; + const char* vendor; + drflac_uint32 commentCount; + const void* pComments; + } vorbis_comment; + + struct + { + char catalog[128]; + drflac_uint64 leadInSampleCount; + drflac_bool32 isCD; + drflac_uint8 trackCount; + const void* pTrackData; + } cuesheet; + + struct + { + drflac_uint32 type; + drflac_uint32 mimeLength; + const char* mime; + drflac_uint32 descriptionLength; + const char* description; + drflac_uint32 width; + drflac_uint32 height; + drflac_uint32 colorDepth; + drflac_uint32 indexColorCount; + drflac_uint32 pictureDataSize; + drflac_uint64 pictureDataOffset; /* Offset from the start of the stream. */ + const drflac_uint8* pPictureData; + } picture; + } data; +} drflac_metadata; + + +/* +Callback for when data needs to be read from the client. + + +Parameters +---------- +pUserData (in) + The user data that was passed to drflac_open() and family. + +pBufferOut (out) + The output buffer. + +bytesToRead (in) + The number of bytes to read. + + +Return Value +------------ +The number of bytes actually read. + + +Remarks +------- +A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until either the entire bytesToRead is filled or +you have reached the end of the stream. +*/ +typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead); + +/* +Callback for when data needs to be seeked. + + +Parameters +---------- +pUserData (in) + The user data that was passed to drflac_open() and family. + +offset (in) + The number of bytes to move, relative to the origin. Will never be negative. + +origin (in) + The origin of the seek - the current position, the start of the stream, or the end of the stream. + + +Return Value +------------ +Whether or not the seek was successful. + + +Remarks +------- +Seeking relative to the start and the current position must always be supported. If seeking from the end of the stream is not supported, return DRFLAC_FALSE. + +When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of the FLAC stream. This needs to be detected +and handled by returning DRFLAC_FALSE. +*/ +typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin); + +/* +Callback for when the current position in the stream needs to be retrieved. + + +Parameters +---------- +pUserData (in) + The user data that was passed to drflac_open() and family. + +pCursor (out) + A pointer to a variable to receive the current position in the stream. + + +Return Value +------------ +Whether or not the operation was successful. +*/ +typedef drflac_bool32 (* drflac_tell_proc)(void* pUserData, drflac_int64* pCursor); + +/* +Callback for when a metadata block is read. + + +Parameters +---------- +pUserData (in) + The user data that was passed to drflac_open() and family. + +pMetadata (in) + A pointer to a structure containing the data of the metadata block. + + +Remarks +------- +Use pMetadata->type to determine which metadata block is being handled and how to read the data. This +will be set to one of the DRFLAC_METADATA_BLOCK_TYPE_* tokens. +*/ +typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata); + + +/* Structure for internal use. Only used for decoders opened with drflac_open_memory. */ +typedef struct +{ + const drflac_uint8* data; + size_t dataSize; + size_t currentReadPos; +} drflac__memory_stream; + +/* Structure for internal use. Used for bit streaming. */ +typedef struct +{ + /* The function to call when more data needs to be read. */ + drflac_read_proc onRead; + + /* The function to call when the current read position needs to be moved. */ + drflac_seek_proc onSeek; + + /* The function to call when the current read position needs to be retrieved. */ + drflac_tell_proc onTell; + + /* The user data to pass around to onRead and onSeek. */ + void* pUserData; + + + /* + The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the + stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether + or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t). + */ + size_t unalignedByteCount; + + /* The content of the unaligned bytes. */ + drflac_cache_t unalignedCache; + + /* The index of the next valid cache line in the "L2" cache. */ + drflac_uint32 nextL2Line; + + /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */ + drflac_uint32 consumedBits; + + /* + The cached data which was most recently read from the client. There are two levels of cache. Data flows as such: + Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions. + */ + drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)]; + drflac_cache_t cache; + + /* + CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this + is reset to 0 at the beginning of each frame. + */ + drflac_uint16 crc16; + drflac_cache_t crc16Cache; /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */ + drflac_uint32 crc16CacheIgnoredBytes; /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */ +} drflac_bs; + +typedef struct +{ + /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */ + drflac_uint8 subframeType; + + /* The number of wasted bits per sample as specified by the sub-frame header. */ + drflac_uint8 wastedBitsPerSample; + + /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */ + drflac_uint8 lpcOrder; + + /* A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. */ + drflac_int32* pSamplesS32; +} drflac_subframe; + +typedef struct +{ + /* + If the stream uses variable block sizes, this will be set to the index of the first PCM frame. If fixed block sizes are used, this will + always be set to 0. This is 64-bit because the decoded PCM frame number will be 36 bits. + */ + drflac_uint64 pcmFrameNumber; + + /* + If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. This + is 32-bit because in fixed block sizes, the maximum frame number will be 31 bits. + */ + drflac_uint32 flacFrameNumber; + + /* The sample rate of this frame. */ + drflac_uint32 sampleRate; + + /* The number of PCM frames in each sub-frame within this frame. */ + drflac_uint16 blockSizeInPCMFrames; + + /* + The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this + will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE. + */ + drflac_uint8 channelAssignment; + + /* The number of bits per sample within this frame. */ + drflac_uint8 bitsPerSample; + + /* The frame's CRC. */ + drflac_uint8 crc8; +} drflac_frame_header; + +typedef struct +{ + /* The header. */ + drflac_frame_header header; + + /* + The number of PCM frames left to be read in this FLAC frame. This is initially set to the block size. As PCM frames are read, + this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame. + */ + drflac_uint32 pcmFramesRemaining; + + /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */ + drflac_subframe subframes[8]; +} drflac_frame; + +typedef struct +{ + /* The function to call when a metadata block is read. */ + drflac_meta_proc onMeta; + + /* The user data posted to the metadata callback function. */ + void* pUserDataMD; + + /* Memory allocation callbacks. */ + drflac_allocation_callbacks allocationCallbacks; + + + /* The sample rate. Will be set to something like 44100. */ + drflac_uint32 sampleRate; + + /* + The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the + value specified in the STREAMINFO block. + */ + drflac_uint8 channels; + + /* The bits per sample. Will be set to something like 16, 24, etc. */ + drflac_uint8 bitsPerSample; + + /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */ + drflac_uint16 maxBlockSizeInPCMFrames; + + /* + The total number of PCM Frames making up the stream. Can be 0 in which case it's still a valid stream, but just means + the total PCM frame count is unknown. Likely the case with streams like internet radio. + */ + drflac_uint64 totalPCMFrameCount; + + + /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */ + drflac_container container; + + /* The number of seekpoints in the seektable. */ + drflac_uint32 seekpointCount; + + + /* Information about the frame the decoder is currently sitting on. */ + drflac_frame currentFLACFrame; + + + /* The index of the PCM frame the decoder is currently sitting on. This is only used for seeking. */ + drflac_uint64 currentPCMFrame; + + /* The position of the first FLAC frame in the stream. This is only ever used for seeking. */ + drflac_uint64 firstFLACFramePosInBytes; + + + /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */ + drflac__memory_stream memoryStream; + + + /* A pointer to the decoded sample data. This is an offset of pExtraData. */ + drflac_int32* pDecodedSamples; + + /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */ + drflac_seekpoint* pSeekpoints; + + /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */ + void* _oggbs; + + /* Internal use only. Used for profiling and testing different seeking modes. */ + drflac_bool32 _noSeekTableSeek : 1; + drflac_bool32 _noBinarySearchSeek : 1; + drflac_bool32 _noBruteForceSeek : 1; + + /* The bit streamer. The raw FLAC data is fed through this object. */ + drflac_bs bs; + + /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */ + drflac_uint8 pExtraData[1]; +} drflac; + + +/* +Opens a FLAC decoder. + + +Parameters +---------- +onRead (in) + The function to call when data needs to be read from the client. + +onSeek (in) + The function to call when the read position of the client data needs to move. + +pUserData (in, optional) + A pointer to application defined data that will be passed to onRead and onSeek. + +pAllocationCallbacks (in, optional) + A pointer to application defined callbacks for managing memory allocations. + + +Return Value +------------ +Returns a pointer to an object representing the decoder. + + +Remarks +------- +Close the decoder with `drflac_close()`. + +`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`. + +This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated FLAC, both of which should work seamlessly +without any manual intervention. Ogg encapsulation also works with multiplexed streams which basically means it can play FLAC encoded audio tracks in videos. + +This is the lowest level function for opening a FLAC stream. You can also use `drflac_open_file()` and `drflac_open_memory()` to open the stream from a file or +from a block of memory respectively. + +The STREAMINFO block must be present for this to succeed. Use `drflac_open_relaxed()` to open a FLAC stream where the header may not be present. + +Use `drflac_open_with_metadata()` if you need access to metadata. + + +Seek Also +--------- +drflac_open_file() +drflac_open_memory() +drflac_open_with_metadata() +drflac_close() +*/ +DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Opens a FLAC stream with relaxed validation of the header block. + + +Parameters +---------- +onRead (in) + The function to call when data needs to be read from the client. + +onSeek (in) + The function to call when the read position of the client data needs to move. + +container (in) + Whether or not the FLAC stream is encapsulated using standard FLAC encapsulation or Ogg encapsulation. + +pUserData (in, optional) + A pointer to application defined data that will be passed to onRead and onSeek. + +pAllocationCallbacks (in, optional) + A pointer to application defined callbacks for managing memory allocations. + + +Return Value +------------ +A pointer to an object representing the decoder. + + +Remarks +------- +The same as drflac_open(), except attempts to open the stream even when a header block is not present. + +Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do not set this to `drflac_container_unknown` +as that is for internal use only. + +Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never found it will continue forever. To abort, +force your `onRead` callback to return 0, which dr_flac will use as an indicator that the end of the stream was found. + +Use `drflac_open_with_metadata_relaxed()` if you need access to metadata. +*/ +DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.). + + +Parameters +---------- +onRead (in) + The function to call when data needs to be read from the client. + +onSeek (in) + The function to call when the read position of the client data needs to move. + +onMeta (in) + The function to call for every metadata block. + +pUserData (in, optional) + A pointer to application defined data that will be passed to onRead, onSeek and onMeta. + +pAllocationCallbacks (in, optional) + A pointer to application defined callbacks for managing memory allocations. + + +Return Value +------------ +A pointer to an object representing the decoder. + + +Remarks +------- +Close the decoder with `drflac_close()`. + +`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`. + +This is slower than `drflac_open()`, so avoid this one if you don't need metadata. Internally, this will allocate and free memory on the heap for every +metadata block except for STREAMINFO and PADDING blocks. + +The caller is notified of the metadata via the `onMeta` callback. All metadata blocks will be handled before the function returns. This callback takes a +pointer to a `drflac_metadata` object which is a union containing the data of all relevant metadata blocks. Use the `type` member to discriminate against +the different metadata types. + +The STREAMINFO block must be present for this to succeed. Use `drflac_open_with_metadata_relaxed()` to open a FLAC stream where the header may not be present. + +Note that this will behave inconsistently with `drflac_open()` if the stream is an Ogg encapsulated stream and a metadata block is corrupted. This is due to +the way the Ogg stream recovers from corrupted pages. When `drflac_open_with_metadata()` is being used, the open routine will try to read the contents of the +metadata block, whereas `drflac_open()` will simply seek past it (for the sake of efficiency). This inconsistency can result in different samples being +returned depending on whether or not the stream is being opened with metadata. + + +Seek Also +--------- +drflac_open_file_with_metadata() +drflac_open_memory_with_metadata() +drflac_open() +drflac_close() +*/ +DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present. + +See Also +-------- +drflac_open_with_metadata() +drflac_open_relaxed() +*/ +DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Closes the given FLAC decoder. + + +Parameters +---------- +pFlac (in) + The decoder to close. + + +Remarks +------- +This will destroy the decoder object. + + +See Also +-------- +drflac_open() +drflac_open_with_metadata() +drflac_open_file() +drflac_open_file_w() +drflac_open_file_with_metadata() +drflac_open_file_with_metadata_w() +drflac_open_memory() +drflac_open_memory_with_metadata() +*/ +DRFLAC_API void drflac_close(drflac* pFlac); + + +/* +Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM. + + +Parameters +---------- +pFlac (in) + The decoder. + +framesToRead (in) + The number of PCM frames to read. + +pBufferOut (out, optional) + A pointer to the buffer that will receive the decoded samples. + + +Return Value +------------ +Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end. + + +Remarks +------- +pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked. +*/ +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut); + + +/* +Reads sample data from the given FLAC decoder, output as interleaved signed 16-bit PCM. + + +Parameters +---------- +pFlac (in) + The decoder. + +framesToRead (in) + The number of PCM frames to read. + +pBufferOut (out, optional) + A pointer to the buffer that will receive the decoded samples. + + +Return Value +------------ +Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end. + + +Remarks +------- +pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked. + +Note that this is lossy for streams where the bits per sample is larger than 16. +*/ +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut); + +/* +Reads sample data from the given FLAC decoder, output as interleaved 32-bit floating point PCM. + + +Parameters +---------- +pFlac (in) + The decoder. + +framesToRead (in) + The number of PCM frames to read. + +pBufferOut (out, optional) + A pointer to the buffer that will receive the decoded samples. + + +Return Value +------------ +Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end. + + +Remarks +------- +pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked. + +Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly represent every possible number. +*/ +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut); + +/* +Seeks to the PCM frame at the given index. + + +Parameters +---------- +pFlac (in) + The decoder. + +pcmFrameIndex (in) + The index of the PCM frame to seek to. See notes below. + + +Return Value +------------- +`DRFLAC_TRUE` if successful; `DRFLAC_FALSE` otherwise. +*/ +DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex); + + + +#ifndef DR_FLAC_NO_STDIO +/* +Opens a FLAC decoder from the file at the given path. + + +Parameters +---------- +pFileName (in) + The path of the file to open, either absolute or relative to the current directory. + +pAllocationCallbacks (in, optional) + A pointer to application defined callbacks for managing memory allocations. + + +Return Value +------------ +A pointer to an object representing the decoder. + + +Remarks +------- +Close the decoder with drflac_close(). + + +Remarks +------- +This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the number of files a process can have open +at any given time, so keep this mind if you have many decoders open at the same time. + + +See Also +-------- +drflac_open_file_with_metadata() +drflac_open() +drflac_close() +*/ +DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks); +DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.) + + +Parameters +---------- +pFileName (in) + The path of the file to open, either absolute or relative to the current directory. + +pAllocationCallbacks (in, optional) + A pointer to application defined callbacks for managing memory allocations. + +onMeta (in) + The callback to fire for each metadata block. + +pUserData (in) + A pointer to the user data to pass to the metadata callback. + +pAllocationCallbacks (in) + A pointer to application defined callbacks for managing memory allocations. + + +Remarks +------- +Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled. + + +See Also +-------- +drflac_open_with_metadata() +drflac_open() +drflac_close() +*/ +DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); +DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); +#endif + +/* +Opens a FLAC decoder from a pre-allocated block of memory + + +Parameters +---------- +pData (in) + A pointer to the raw encoded FLAC data. + +dataSize (in) + The size in bytes of `data`. + +pAllocationCallbacks (in) + A pointer to application defined callbacks for managing memory allocations. + + +Return Value +------------ +A pointer to an object representing the decoder. + + +Remarks +------- +This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for the lifetime of the decoder. + + +See Also +-------- +drflac_open() +drflac_close() +*/ +DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.) + + +Parameters +---------- +pData (in) + A pointer to the raw encoded FLAC data. + +dataSize (in) + The size in bytes of `data`. + +onMeta (in) + The callback to fire for each metadata block. + +pUserData (in) + A pointer to the user data to pass to the metadata callback. + +pAllocationCallbacks (in) + A pointer to application defined callbacks for managing memory allocations. + + +Remarks +------- +Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled. + + +See Also +------- +drflac_open_with_metadata() +drflac_open() +drflac_close() +*/ +DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + + + +/* High Level APIs */ + +/* +Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a +pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free(). + +You can pass in custom memory allocation callbacks via the pAllocationCallbacks parameter. This can be NULL in which +case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. + +Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously +read samples into a dynamically sized buffer on the heap until no samples are left. + +Do not call this function on a broadcast type of stream (like internet radio streams and whatnot). +*/ +DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ +DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ +DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +#ifndef DR_FLAC_NO_STDIO +/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */ +DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ +DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ +DRFLAC_API float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); +#endif + +/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */ +DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ +DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ +DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Frees memory that was allocated internally by dr_flac. + +Set pAllocationCallbacks to the same object that was passed to drflac_open_*_and_read_pcm_frames_*(). If you originally passed in NULL, pass in NULL for this. +*/ +DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks); + + +/* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */ +typedef struct +{ + drflac_uint32 countRemaining; + const char* pRunningData; +} drflac_vorbis_comment_iterator; + +/* +Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT +metadata block. +*/ +DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments); + +/* +Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The +returned string is NOT null terminated. +*/ +DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut); + + +/* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */ +typedef struct +{ + drflac_uint32 countRemaining; + const char* pRunningData; +} drflac_cuesheet_track_iterator; + +/* The order of members here is important because we map this directly to the raw data within the CUESHEET metadata block. */ +typedef struct +{ + drflac_uint64 offset; + drflac_uint8 index; + drflac_uint8 reserved[3]; +} drflac_cuesheet_track_index; + +typedef struct +{ + drflac_uint64 offset; + drflac_uint8 trackNumber; + char ISRC[12]; + drflac_bool8 isAudio; + drflac_bool8 preEmphasis; + drflac_uint8 indexCount; + const drflac_cuesheet_track_index* pIndexPoints; +} drflac_cuesheet_track; + +/* +Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata +block. +*/ +DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData); + +/* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */ +DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack); + + +#ifdef __cplusplus +} +#endif +#endif /* dr_flac_h */ + + +/************************************************************************************************************************************************************ + ************************************************************************************************************************************************************ + + IMPLEMENTATION + + ************************************************************************************************************************************************************ + ************************************************************************************************************************************************************/ +#if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION) +#ifndef dr_flac_c +#define dr_flac_c + +/* Disable some annoying warnings. */ +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) + #pragma GCC diagnostic push + #if __GNUC__ >= 7 + #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" + #endif +#endif + +#ifdef __linux__ + #ifndef _BSD_SOURCE + #define _BSD_SOURCE + #endif + #ifndef _DEFAULT_SOURCE + #define _DEFAULT_SOURCE + #endif + #ifndef __USE_BSD + #define __USE_BSD + #endif + #include +#endif + +#include +#include + +/* Inline */ +#ifdef _MSC_VER + #define DRFLAC_INLINE __forceinline +#elif defined(__GNUC__) + /* + I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when + the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some + case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the + command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue + I am using "__inline__" only when we're compiling in strict ANSI mode. + */ + #if defined(__STRICT_ANSI__) + #define DRFLAC_GNUC_INLINE_HINT __inline__ + #else + #define DRFLAC_GNUC_INLINE_HINT inline + #endif + + #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__) + #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT __attribute__((always_inline)) + #else + #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT + #endif +#elif defined(__WATCOMC__) + #define DRFLAC_INLINE __inline +#else + #define DRFLAC_INLINE +#endif +/* End Inline */ + +/* +Intrinsics Support + +There's a bug in GCC 4.2.x which results in an incorrect compilation error when using _mm_slli_epi32() where it complains with + + "error: shift must be an immediate" + +Unfortuantely dr_flac depends on this for a few things so we're just going to disable SSE on GCC 4.2 and below. +*/ +#if !defined(DR_FLAC_NO_SIMD) + #if defined(DRFLAC_X64) || defined(DRFLAC_X86) + #if defined(_MSC_VER) && !defined(__clang__) + /* MSVC. */ + #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2) /* 2005 */ + #define DRFLAC_SUPPORT_SSE2 + #endif + #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41) /* 2010 */ + #define DRFLAC_SUPPORT_SSE41 + #endif + #elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) + /* Assume GNUC-style. */ + #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2) + #define DRFLAC_SUPPORT_SSE2 + #endif + #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41) + #define DRFLAC_SUPPORT_SSE41 + #endif + #endif + + /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */ + #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include) + #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include() + #define DRFLAC_SUPPORT_SSE2 + #endif + #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include() + #define DRFLAC_SUPPORT_SSE41 + #endif + #endif + + #if defined(DRFLAC_SUPPORT_SSE41) + #include + #elif defined(DRFLAC_SUPPORT_SSE2) + #include + #endif + #endif + + #if defined(DRFLAC_ARM) + #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)) + #define DRFLAC_SUPPORT_NEON + #include + #endif + #endif +#endif + +/* Compile-time CPU feature support. */ +#if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) + #if defined(_MSC_VER) && !defined(__clang__) + #if _MSC_VER >= 1400 + #include + static void drflac__cpuid(int info[4], int fid) + { + __cpuid(info, fid); + } + #else + #define DRFLAC_NO_CPUID + #endif + #else + #if defined(__GNUC__) || defined(__clang__) + static void drflac__cpuid(int info[4], int fid) + { + /* + It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the + specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for + supporting different assembly dialects. + + What's basically happening is that we're saving and restoring the ebx register manually. + */ + #if defined(DRFLAC_X86) && defined(__PIC__) + __asm__ __volatile__ ( + "xchg{l} {%%}ebx, %k1;" + "cpuid;" + "xchg{l} {%%}ebx, %k1;" + : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0) + ); + #else + __asm__ __volatile__ ( + "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0) + ); + #endif + } + #else + #define DRFLAC_NO_CPUID + #endif + #endif +#else + #define DRFLAC_NO_CPUID +#endif + +static DRFLAC_INLINE drflac_bool32 drflac_has_sse2(void) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2) + #if defined(DRFLAC_X64) + return DRFLAC_TRUE; /* 64-bit targets always support SSE2. */ + #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__) + return DRFLAC_TRUE; /* If the compiler is allowed to freely generate SSE2 code we can assume support. */ + #else + #if defined(DRFLAC_NO_CPUID) + return DRFLAC_FALSE; + #else + int info[4]; + drflac__cpuid(info, 1); + return (info[3] & (1 << 26)) != 0; + #endif + #endif + #else + return DRFLAC_FALSE; /* SSE2 is only supported on x86 and x64 architectures. */ + #endif +#else + return DRFLAC_FALSE; /* No compiler support. */ +#endif +} + +static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void) +{ +#if defined(DRFLAC_SUPPORT_SSE41) + #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41) + #if defined(__SSE4_1__) || defined(__AVX__) + return DRFLAC_TRUE; /* If the compiler is allowed to freely generate SSE41 code we can assume support. */ + #else + #if defined(DRFLAC_NO_CPUID) + return DRFLAC_FALSE; + #else + int info[4]; + drflac__cpuid(info, 1); + return (info[2] & (1 << 19)) != 0; + #endif + #endif + #else + return DRFLAC_FALSE; /* SSE41 is only supported on x86 and x64 architectures. */ + #endif +#else + return DRFLAC_FALSE; /* No compiler support. */ +#endif +} + + +#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) && !defined(__clang__) + #define DRFLAC_HAS_LZCNT_INTRINSIC +#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) + #define DRFLAC_HAS_LZCNT_INTRINSIC +#elif defined(__clang__) + #if defined(__has_builtin) + #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl) + #define DRFLAC_HAS_LZCNT_INTRINSIC + #endif + #endif +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(__clang__) + #define DRFLAC_HAS_BYTESWAP16_INTRINSIC + #define DRFLAC_HAS_BYTESWAP32_INTRINSIC + #define DRFLAC_HAS_BYTESWAP64_INTRINSIC +#elif defined(__clang__) + #if defined(__has_builtin) + #if __has_builtin(__builtin_bswap16) + #define DRFLAC_HAS_BYTESWAP16_INTRINSIC + #endif + #if __has_builtin(__builtin_bswap32) + #define DRFLAC_HAS_BYTESWAP32_INTRINSIC + #endif + #if __has_builtin(__builtin_bswap64) + #define DRFLAC_HAS_BYTESWAP64_INTRINSIC + #endif + #endif +#elif defined(__GNUC__) + #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define DRFLAC_HAS_BYTESWAP32_INTRINSIC + #define DRFLAC_HAS_BYTESWAP64_INTRINSIC + #endif + #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) + #define DRFLAC_HAS_BYTESWAP16_INTRINSIC + #endif +#elif defined(__WATCOMC__) && defined(__386__) + #define DRFLAC_HAS_BYTESWAP16_INTRINSIC + #define DRFLAC_HAS_BYTESWAP32_INTRINSIC + #define DRFLAC_HAS_BYTESWAP64_INTRINSIC + extern __inline drflac_uint16 _watcom_bswap16(drflac_uint16); + extern __inline drflac_uint32 _watcom_bswap32(drflac_uint32); + extern __inline drflac_uint64 _watcom_bswap64(drflac_uint64); +#pragma aux _watcom_bswap16 = \ + "xchg al, ah" \ + parm [ax] \ + value [ax] \ + modify nomemory; +#pragma aux _watcom_bswap32 = \ + "bswap eax" \ + parm [eax] \ + value [eax] \ + modify nomemory; +#pragma aux _watcom_bswap64 = \ + "bswap eax" \ + "bswap edx" \ + "xchg eax,edx" \ + parm [eax edx] \ + value [eax edx] \ + modify nomemory; +#endif + + +/* Standard library stuff. */ +#ifndef DRFLAC_ASSERT +#include +#define DRFLAC_ASSERT(expression) assert(expression) +#endif +#ifndef DRFLAC_MALLOC +#define DRFLAC_MALLOC(sz) malloc((sz)) +#endif +#ifndef DRFLAC_REALLOC +#define DRFLAC_REALLOC(p, sz) realloc((p), (sz)) +#endif +#ifndef DRFLAC_FREE +#define DRFLAC_FREE(p) free((p)) +#endif +#ifndef DRFLAC_COPY_MEMORY +#define DRFLAC_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz)) +#endif +#ifndef DRFLAC_ZERO_MEMORY +#define DRFLAC_ZERO_MEMORY(p, sz) memset((p), 0, (sz)) +#endif +#ifndef DRFLAC_ZERO_OBJECT +#define DRFLAC_ZERO_OBJECT(p) DRFLAC_ZERO_MEMORY((p), sizeof(*(p))) +#endif + +#define DRFLAC_MAX_SIMD_VECTOR_SIZE 64 /* 64 for AVX-512 in the future. */ + +/* Result Codes */ +typedef drflac_int32 drflac_result; +#define DRFLAC_SUCCESS 0 +#define DRFLAC_ERROR -1 /* A generic error. */ +#define DRFLAC_INVALID_ARGS -2 +#define DRFLAC_INVALID_OPERATION -3 +#define DRFLAC_OUT_OF_MEMORY -4 +#define DRFLAC_OUT_OF_RANGE -5 +#define DRFLAC_ACCESS_DENIED -6 +#define DRFLAC_DOES_NOT_EXIST -7 +#define DRFLAC_ALREADY_EXISTS -8 +#define DRFLAC_TOO_MANY_OPEN_FILES -9 +#define DRFLAC_INVALID_FILE -10 +#define DRFLAC_TOO_BIG -11 +#define DRFLAC_PATH_TOO_LONG -12 +#define DRFLAC_NAME_TOO_LONG -13 +#define DRFLAC_NOT_DIRECTORY -14 +#define DRFLAC_IS_DIRECTORY -15 +#define DRFLAC_DIRECTORY_NOT_EMPTY -16 +#define DRFLAC_END_OF_FILE -17 +#define DRFLAC_NO_SPACE -18 +#define DRFLAC_BUSY -19 +#define DRFLAC_IO_ERROR -20 +#define DRFLAC_INTERRUPT -21 +#define DRFLAC_UNAVAILABLE -22 +#define DRFLAC_ALREADY_IN_USE -23 +#define DRFLAC_BAD_ADDRESS -24 +#define DRFLAC_BAD_SEEK -25 +#define DRFLAC_BAD_PIPE -26 +#define DRFLAC_DEADLOCK -27 +#define DRFLAC_TOO_MANY_LINKS -28 +#define DRFLAC_NOT_IMPLEMENTED -29 +#define DRFLAC_NO_MESSAGE -30 +#define DRFLAC_BAD_MESSAGE -31 +#define DRFLAC_NO_DATA_AVAILABLE -32 +#define DRFLAC_INVALID_DATA -33 +#define DRFLAC_TIMEOUT -34 +#define DRFLAC_NO_NETWORK -35 +#define DRFLAC_NOT_UNIQUE -36 +#define DRFLAC_NOT_SOCKET -37 +#define DRFLAC_NO_ADDRESS -38 +#define DRFLAC_BAD_PROTOCOL -39 +#define DRFLAC_PROTOCOL_UNAVAILABLE -40 +#define DRFLAC_PROTOCOL_NOT_SUPPORTED -41 +#define DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED -42 +#define DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED -43 +#define DRFLAC_SOCKET_NOT_SUPPORTED -44 +#define DRFLAC_CONNECTION_RESET -45 +#define DRFLAC_ALREADY_CONNECTED -46 +#define DRFLAC_NOT_CONNECTED -47 +#define DRFLAC_CONNECTION_REFUSED -48 +#define DRFLAC_NO_HOST -49 +#define DRFLAC_IN_PROGRESS -50 +#define DRFLAC_CANCELLED -51 +#define DRFLAC_MEMORY_ALREADY_MAPPED -52 +#define DRFLAC_AT_END -53 + +#define DRFLAC_CRC_MISMATCH -100 +/* End Result Codes */ + + +#define DRFLAC_SUBFRAME_CONSTANT 0 +#define DRFLAC_SUBFRAME_VERBATIM 1 +#define DRFLAC_SUBFRAME_FIXED 8 +#define DRFLAC_SUBFRAME_LPC 32 +#define DRFLAC_SUBFRAME_RESERVED 255 + +#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE 0 +#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1 + +#define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT 0 +#define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE 8 +#define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE 9 +#define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE 10 + +#define DRFLAC_SEEKPOINT_SIZE_IN_BYTES 18 +#define DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES 36 +#define DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES 12 + +#define drflac_align(x, a) ((((x) + (a) - 1) / (a)) * (a)) + + +DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision) +{ + if (pMajor) { + *pMajor = DRFLAC_VERSION_MAJOR; + } + + if (pMinor) { + *pMinor = DRFLAC_VERSION_MINOR; + } + + if (pRevision) { + *pRevision = DRFLAC_VERSION_REVISION; + } +} + +DRFLAC_API const char* drflac_version_string(void) +{ + return DRFLAC_VERSION_STRING; +} + + +/* CPU caps. */ +#if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread"))) + #else + #define DRFLAC_NO_THREAD_SANITIZE + #endif +#else + #define DRFLAC_NO_THREAD_SANITIZE +#endif + +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) +static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE; +#endif + +#ifndef DRFLAC_NO_CPUID +static drflac_bool32 drflac__gIsSSE2Supported = DRFLAC_FALSE; +static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE; + +/* +I've had a bug report that Clang's ThreadSanitizer presents a warning in this function. Having reviewed this, this does +actually make sense. However, since CPU caps should never differ for a running process, I don't think the trade off of +complicating internal API's by passing around CPU caps versus just disabling the warnings is worthwhile. I'm therefore +just going to disable these warnings. This is disabled via the DRFLAC_NO_THREAD_SANITIZE attribute. +*/ +DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void) +{ + static drflac_bool32 isCPUCapsInitialized = DRFLAC_FALSE; + + if (!isCPUCapsInitialized) { + /* LZCNT */ +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) + int info[4] = {0}; + drflac__cpuid(info, 0x80000001); + drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0; +#endif + + /* SSE2 */ + drflac__gIsSSE2Supported = drflac_has_sse2(); + + /* SSE4.1 */ + drflac__gIsSSE41Supported = drflac_has_sse41(); + + /* Initialized. */ + isCPUCapsInitialized = DRFLAC_TRUE; + } +} +#else +static drflac_bool32 drflac__gIsNEONSupported = DRFLAC_FALSE; + +static DRFLAC_INLINE drflac_bool32 drflac__has_neon(void) +{ +#if defined(DRFLAC_SUPPORT_NEON) + #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON) + #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)) + return DRFLAC_TRUE; /* If the compiler is allowed to freely generate NEON code we can assume support. */ + #else + /* TODO: Runtime check. */ + return DRFLAC_FALSE; + #endif + #else + return DRFLAC_FALSE; /* NEON is only supported on ARM architectures. */ + #endif +#else + return DRFLAC_FALSE; /* No compiler support. */ +#endif +} + +DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void) +{ + drflac__gIsNEONSupported = drflac__has_neon(); + +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) + drflac__gIsLZCNTSupported = DRFLAC_TRUE; +#endif +} +#endif + + +/* Endian Management */ +static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void) +{ +#if defined(DRFLAC_X86) || defined(DRFLAC_X64) + return DRFLAC_TRUE; +#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN + return DRFLAC_TRUE; +#else + int n = 1; + return (*(char*)&n) == 1; +#endif +} + +static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n) +{ +#ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC + #if defined(_MSC_VER) && !defined(__clang__) + return _byteswap_ushort(n); + #elif defined(__GNUC__) || defined(__clang__) + return __builtin_bswap16(n); + #elif defined(__WATCOMC__) && defined(__386__) + return _watcom_bswap16(n); + #else + #error "This compiler does not support the byte swap intrinsic." + #endif +#else + return ((n & 0xFF00) >> 8) | + ((n & 0x00FF) << 8); +#endif +} + +static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n) +{ +#ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC + #if defined(_MSC_VER) && !defined(__clang__) + return _byteswap_ulong(n); + #elif defined(__GNUC__) || defined(__clang__) + #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(__ARM_ARCH_6M__) && !defined(DRFLAC_64BIT) /* <-- 64-bit inline assembly has not been tested, so disabling for now. */ + /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */ + drflac_uint32 r; + __asm__ __volatile__ ( + #if defined(DRFLAC_64BIT) + "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n) /* <-- This is untested. If someone in the community could test this, that would be appreciated! */ + #else + "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n) + #endif + ); + return r; + #else + return __builtin_bswap32(n); + #endif + #elif defined(__WATCOMC__) && defined(__386__) + return _watcom_bswap32(n); + #else + #error "This compiler does not support the byte swap intrinsic." + #endif +#else + return ((n & 0xFF000000) >> 24) | + ((n & 0x00FF0000) >> 8) | + ((n & 0x0000FF00) << 8) | + ((n & 0x000000FF) << 24); +#endif +} + +static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n) +{ +#ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC + #if defined(_MSC_VER) && !defined(__clang__) + return _byteswap_uint64(n); + #elif defined(__GNUC__) || defined(__clang__) + return __builtin_bswap64(n); + #elif defined(__WATCOMC__) && defined(__386__) + return _watcom_bswap64(n); + #else + #error "This compiler does not support the byte swap intrinsic." + #endif +#else + /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */ + return ((n & ((drflac_uint64)0xFF000000 << 32)) >> 56) | + ((n & ((drflac_uint64)0x00FF0000 << 32)) >> 40) | + ((n & ((drflac_uint64)0x0000FF00 << 32)) >> 24) | + ((n & ((drflac_uint64)0x000000FF << 32)) >> 8) | + ((n & ((drflac_uint64)0xFF000000 )) << 8) | + ((n & ((drflac_uint64)0x00FF0000 )) << 24) | + ((n & ((drflac_uint64)0x0000FF00 )) << 40) | + ((n & ((drflac_uint64)0x000000FF )) << 56); +#endif +} + + +static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n) +{ + if (drflac__is_little_endian()) { + return drflac__swap_endian_uint16(n); + } + + return n; +} + +static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n) +{ + if (drflac__is_little_endian()) { + return drflac__swap_endian_uint32(n); + } + + return n; +} + +static DRFLAC_INLINE drflac_uint32 drflac__be2host_32_ptr_unaligned(const void* pData) +{ + const drflac_uint8* pNum = (drflac_uint8*)pData; + return *(pNum) << 24 | *(pNum+1) << 16 | *(pNum+2) << 8 | *(pNum+3); +} + +static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n) +{ + if (drflac__is_little_endian()) { + return drflac__swap_endian_uint64(n); + } + + return n; +} + + +static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n) +{ + if (!drflac__is_little_endian()) { + return drflac__swap_endian_uint32(n); + } + + return n; +} + +static DRFLAC_INLINE drflac_uint32 drflac__le2host_32_ptr_unaligned(const void* pData) +{ + const drflac_uint8* pNum = (drflac_uint8*)pData; + return *pNum | *(pNum+1) << 8 | *(pNum+2) << 16 | *(pNum+3) << 24; +} + + +static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n) +{ + drflac_uint32 result = 0; + result |= (n & 0x7F000000) >> 3; + result |= (n & 0x007F0000) >> 2; + result |= (n & 0x00007F00) >> 1; + result |= (n & 0x0000007F) >> 0; + + return result; +} + + + +/* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */ +static drflac_uint8 drflac__crc8_table[] = { + 0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D, + 0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D, + 0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD, + 0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD, + 0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA, + 0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A, + 0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A, + 0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A, + 0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4, + 0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4, + 0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44, + 0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34, + 0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63, + 0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13, + 0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83, + 0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3 +}; + +static drflac_uint16 drflac__crc16_table[] = { + 0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011, + 0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022, + 0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072, + 0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041, + 0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2, + 0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1, + 0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1, + 0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082, + 0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192, + 0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1, + 0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1, + 0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2, + 0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151, + 0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162, + 0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132, + 0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101, + 0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312, + 0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321, + 0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371, + 0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342, + 0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1, + 0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2, + 0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2, + 0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381, + 0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291, + 0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2, + 0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2, + 0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1, + 0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252, + 0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261, + 0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231, + 0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202 +}; + +static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint8 data) +{ + return drflac__crc8_table[crc ^ data]; +} + +static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count) +{ +#ifdef DR_FLAC_NO_CRC + (void)crc; + (void)data; + (void)count; + return 0; +#else +#if 0 + /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") */ + drflac_uint8 p = 0x07; + for (int i = count-1; i >= 0; --i) { + drflac_uint8 bit = (data & (1 << i)) >> i; + if (crc & 0x80) { + crc = ((crc << 1) | bit) ^ p; + } else { + crc = ((crc << 1) | bit); + } + } + return crc; +#else + drflac_uint32 wholeBytes; + drflac_uint32 leftoverBits; + drflac_uint64 leftoverDataMask; + + static drflac_uint64 leftoverDataMaskTable[8] = { + 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F + }; + + DRFLAC_ASSERT(count <= 32); + + wholeBytes = count >> 3; + leftoverBits = count - (wholeBytes*8); + leftoverDataMask = leftoverDataMaskTable[leftoverBits]; + + switch (wholeBytes) { + case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits))); + case 3: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits))); + case 2: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits))); + case 1: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits))); + case 0: if (leftoverBits > 0) crc = (drflac_uint8)((crc << leftoverBits) ^ drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)]); + } + return crc; +#endif +#endif +} + +static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_uint8 data) +{ + return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data]; +} + +static DRFLAC_INLINE drflac_uint16 drflac_crc16_cache(drflac_uint16 crc, drflac_cache_t data) +{ +#ifdef DRFLAC_64BIT + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF)); +#endif + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 8) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 0) & 0xFF)); + + return crc; +} + +static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount) +{ + switch (byteCount) + { +#ifdef DRFLAC_64BIT + case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF)); + case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF)); + case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF)); + case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF)); +#endif + case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF)); + case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF)); + case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 8) & 0xFF)); + case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 0) & 0xFF)); + } + + return crc; +} + +#if 0 +static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count) +{ +#ifdef DR_FLAC_NO_CRC + (void)crc; + (void)data; + (void)count; + return 0; +#else +#if 0 + /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") */ + drflac_uint16 p = 0x8005; + for (int i = count-1; i >= 0; --i) { + drflac_uint16 bit = (data & (1ULL << i)) >> i; + if (r & 0x8000) { + r = ((r << 1) | bit) ^ p; + } else { + r = ((r << 1) | bit); + } + } + + return crc; +#else + drflac_uint32 wholeBytes; + drflac_uint32 leftoverBits; + drflac_uint64 leftoverDataMask; + + static drflac_uint64 leftoverDataMaskTable[8] = { + 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F + }; + + DRFLAC_ASSERT(count <= 64); + + wholeBytes = count >> 3; + leftoverBits = count & 7; + leftoverDataMask = leftoverDataMaskTable[leftoverBits]; + + switch (wholeBytes) { + default: + case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits))); + case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits))); + case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits))); + case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits))); + case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)]; + } + return crc; +#endif +#endif +} + +static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count) +{ +#ifdef DR_FLAC_NO_CRC + (void)crc; + (void)data; + (void)count; + return 0; +#else + drflac_uint32 wholeBytes; + drflac_uint32 leftoverBits; + drflac_uint64 leftoverDataMask; + + static drflac_uint64 leftoverDataMaskTable[8] = { + 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F + }; + + DRFLAC_ASSERT(count <= 64); + + wholeBytes = count >> 3; + leftoverBits = count & 7; + leftoverDataMask = leftoverDataMaskTable[leftoverBits]; + + switch (wholeBytes) { + default: + case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 << 32) << leftoverBits)) >> (56 + leftoverBits))); /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */ + case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 << 32) << leftoverBits)) >> (48 + leftoverBits))); + case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 << 32) << leftoverBits)) >> (40 + leftoverBits))); + case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF << 32) << leftoverBits)) >> (32 + leftoverBits))); + case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 ) << leftoverBits)) >> (24 + leftoverBits))); + case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 ) << leftoverBits)) >> (16 + leftoverBits))); + case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 ) << leftoverBits)) >> ( 8 + leftoverBits))); + case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF ) << leftoverBits)) >> ( 0 + leftoverBits))); + case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)]; + } + return crc; +#endif +} + + +static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 count) +{ +#ifdef DRFLAC_64BIT + return drflac_crc16__64bit(crc, data, count); +#else + return drflac_crc16__32bit(crc, data, count); +#endif +} +#endif + + +#ifdef DRFLAC_64BIT +#define drflac__be2host__cache_line drflac__be2host_64 +#else +#define drflac__be2host__cache_line drflac__be2host_32 +#endif + +/* +BIT READING ATTEMPT #2 + +This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting +on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache +is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an +array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data +from onRead() is read into. +*/ +#define DRFLAC_CACHE_L1_SIZE_BYTES(bs) (sizeof((bs)->cache)) +#define DRFLAC_CACHE_L1_SIZE_BITS(bs) (sizeof((bs)->cache)*8) +#define DRFLAC_CACHE_L1_BITS_REMAINING(bs) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits) +#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount) (~((~(drflac_cache_t)0) >> (_bitCount))) +#define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount)) +#define DRFLAC_CACHE_L1_SELECT(bs, _bitCount) (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)) +#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount))) +#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1))) +#define DRFLAC_CACHE_L2_SIZE_BYTES(bs) (sizeof((bs)->cacheL2)) +#define DRFLAC_CACHE_L2_LINE_COUNT(bs) (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0])) +#define DRFLAC_CACHE_L2_LINES_REMAINING(bs) (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line) + + +#ifndef DR_FLAC_NO_CRC +static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs) +{ + bs->crc16 = 0; + bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3; +} + +static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs) +{ + if (bs->crc16CacheIgnoredBytes == 0) { + bs->crc16 = drflac_crc16_cache(bs->crc16, bs->crc16Cache); + } else { + bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes); + bs->crc16CacheIgnoredBytes = 0; + } +} + +static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs) +{ + /* We should never be flushing in a situation where we are not aligned on a byte boundary. */ + DRFLAC_ASSERT((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0); + + /* + The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined + by the number of bits that have been consumed. + */ + if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) { + drflac__update_crc16(bs); + } else { + /* We only accumulate the consumed bits. */ + bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes); + + /* + The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated + so we can handle that later. + */ + bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3; + } + + return bs->crc16; +} +#endif + +static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs) +{ + size_t bytesRead; + size_t alignedL1LineCount; + + /* Fast path. Try loading straight from L2. */ + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + bs->cache = bs->cacheL2[bs->nextL2Line++]; + return DRFLAC_TRUE; + } + + /* + If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's + any left. + */ + if (bs->unalignedByteCount > 0) { + return DRFLAC_FALSE; /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */ + } + + bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs)); + + bs->nextL2Line = 0; + if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) { + bs->cache = bs->cacheL2[bs->nextL2Line++]; + return DRFLAC_TRUE; + } + + + /* + If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably + means we've just reached the end of the file. We need to move the valid data down to the end of the buffer + and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to + the size of the L1 so we'll need to seek backwards by any misaligned bytes. + */ + alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs); + + /* We need to keep track of any unaligned bytes for later use. */ + bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs)); + if (bs->unalignedByteCount > 0) { + bs->unalignedCache = bs->cacheL2[alignedL1LineCount]; + } + + if (alignedL1LineCount > 0) { + size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount; + size_t i; + for (i = alignedL1LineCount; i > 0; --i) { + bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1]; + } + + bs->nextL2Line = (drflac_uint32)offset; + bs->cache = bs->cacheL2[bs->nextL2Line++]; + return DRFLAC_TRUE; + } else { + /* If we get into this branch it means we weren't able to load any L1-aligned data. */ + bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs); + return DRFLAC_FALSE; + } +} + +static drflac_bool32 drflac__reload_cache(drflac_bs* bs) +{ + size_t bytesRead; + +#ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); +#endif + + /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */ + if (drflac__reload_l1_cache_from_l2(bs)) { + bs->cache = drflac__be2host__cache_line(bs->cache); + bs->consumedBits = 0; +#ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs->cache; +#endif + return DRFLAC_TRUE; + } + + /* Slow path. */ + + /* + If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last + few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the + data from the unaligned cache. + */ + bytesRead = bs->unalignedByteCount; + if (bytesRead == 0) { + bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); /* <-- The stream has been exhausted, so marked the bits as consumed. */ + return DRFLAC_FALSE; + } + + DRFLAC_ASSERT(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs)); + bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8; + + bs->cache = drflac__be2host__cache_line(bs->unalignedCache); + bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs)); /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */ + bs->unalignedByteCount = 0; /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */ + +#ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs->cache >> bs->consumedBits; + bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3; +#endif + return DRFLAC_TRUE; +} + +static void drflac__reset_cache(drflac_bs* bs) +{ + bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs); /* <-- This clears the L2 cache. */ + bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); /* <-- This clears the L1 cache. */ + bs->cache = 0; + bs->unalignedByteCount = 0; /* <-- This clears the trailing unaligned bytes. */ + bs->unalignedCache = 0; + +#ifndef DR_FLAC_NO_CRC + bs->crc16Cache = 0; + bs->crc16CacheIgnoredBytes = 0; +#endif +} + + +static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut) +{ + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResultOut != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 32); + + if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) { + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + } + + if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* + If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do + a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly + more optimal solution for this. + */ +#ifdef DRFLAC_64BIT + *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount); + bs->consumedBits += bitCount; + bs->cache <<= bitCount; +#else + if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { + *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount); + bs->consumedBits += bitCount; + bs->cache <<= bitCount; + } else { + /* Cannot shift by 32-bits, so need to do it differently. */ + *pResultOut = (drflac_uint32)bs->cache; + bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); + bs->cache = 0; + } +#endif + + return DRFLAC_TRUE; + } else { + /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */ + drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs); + drflac_uint32 bitCountLo = bitCount - bitCountHi; + drflac_uint32 resultHi; + + DRFLAC_ASSERT(bitCountHi > 0); + DRFLAC_ASSERT(bitCountHi < 32); + resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi); + + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* This happens when we get to end of stream */ + return DRFLAC_FALSE; + } + + *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo); + bs->consumedBits += bitCountLo; + bs->cache <<= bitCountLo; + return DRFLAC_TRUE; + } +} + +static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult) +{ + drflac_uint32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 32); + + if (!drflac__read_uint32(bs, bitCount, &result)) { + return DRFLAC_FALSE; + } + + /* Do not attempt to shift by 32 as it's undefined. */ + if (bitCount < 32) { + drflac_uint32 signbit; + signbit = ((result >> (bitCount-1)) & 0x01); + result |= (~signbit + 1) << bitCount; + } + + *pResult = (drflac_int32)result; + return DRFLAC_TRUE; +} + +#ifdef DRFLAC_64BIT +static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut) +{ + drflac_uint32 resultHi; + drflac_uint32 resultLo; + + DRFLAC_ASSERT(bitCount <= 64); + DRFLAC_ASSERT(bitCount > 32); + + if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_uint32(bs, 32, &resultLo)) { + return DRFLAC_FALSE; + } + + *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo); + return DRFLAC_TRUE; +} +#endif + +/* Function below is unused, but leaving it here in case I need to quickly add it again. */ +#if 0 +static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut) +{ + drflac_uint64 result; + drflac_uint64 signbit; + + DRFLAC_ASSERT(bitCount <= 64); + + if (!drflac__read_uint64(bs, bitCount, &result)) { + return DRFLAC_FALSE; + } + + signbit = ((result >> (bitCount-1)) & 0x01); + result |= (~signbit + 1) << bitCount; + + *pResultOut = (drflac_int64)result; + return DRFLAC_TRUE; +} +#endif + +static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult) +{ + drflac_uint32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 16); + + if (!drflac__read_uint32(bs, bitCount, &result)) { + return DRFLAC_FALSE; + } + + *pResult = (drflac_uint16)result; + return DRFLAC_TRUE; +} + +#if 0 +static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult) +{ + drflac_int32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 16); + + if (!drflac__read_int32(bs, bitCount, &result)) { + return DRFLAC_FALSE; + } + + *pResult = (drflac_int16)result; + return DRFLAC_TRUE; +} +#endif + +static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult) +{ + drflac_uint32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 8); + + if (!drflac__read_uint32(bs, bitCount, &result)) { + return DRFLAC_FALSE; + } + + *pResult = (drflac_uint8)result; + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult) +{ + drflac_int32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 8); + + if (!drflac__read_int32(bs, bitCount, &result)) { + return DRFLAC_FALSE; + } + + *pResult = (drflac_int8)result; + return DRFLAC_TRUE; +} + + +static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek) +{ + if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + bs->consumedBits += (drflac_uint32)bitsToSeek; + bs->cache <<= bitsToSeek; + return DRFLAC_TRUE; + } else { + /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */ + bitsToSeek -= DRFLAC_CACHE_L1_BITS_REMAINING(bs); + bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs); + bs->cache = 0; + + /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */ +#ifdef DRFLAC_64BIT + while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) { + drflac_uint64 bin; + if (!drflac__read_uint64(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) { + return DRFLAC_FALSE; + } + bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs); + } +#else + while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) { + drflac_uint32 bin; + if (!drflac__read_uint32(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) { + return DRFLAC_FALSE; + } + bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs); + } +#endif + + /* Whole leftover bytes. */ + while (bitsToSeek >= 8) { + drflac_uint8 bin; + if (!drflac__read_uint8(bs, 8, &bin)) { + return DRFLAC_FALSE; + } + bitsToSeek -= 8; + } + + /* Leftover bits. */ + if (bitsToSeek > 0) { + drflac_uint8 bin; + if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) { + return DRFLAC_FALSE; + } + bitsToSeek = 0; /* <-- Necessary for the assert below. */ + } + + DRFLAC_ASSERT(bitsToSeek == 0); + return DRFLAC_TRUE; + } +} + + +/* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */ +static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs) +{ + DRFLAC_ASSERT(bs != NULL); + + /* + The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first + thing to do is align to the next byte. + */ + if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) { + return DRFLAC_FALSE; + } + + for (;;) { + drflac_uint8 hi; + +#ifndef DR_FLAC_NO_CRC + drflac__reset_crc16(bs); +#endif + + if (!drflac__read_uint8(bs, 8, &hi)) { + return DRFLAC_FALSE; + } + + if (hi == 0xFF) { + drflac_uint8 lo; + if (!drflac__read_uint8(bs, 6, &lo)) { + return DRFLAC_FALSE; + } + + if (lo == 0x3E) { + return DRFLAC_TRUE; + } else { + if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) { + return DRFLAC_FALSE; + } + } + } + } + + /* Should never get here. */ + /*return DRFLAC_FALSE;*/ +} + + +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) +#define DRFLAC_IMPLEMENT_CLZ_LZCNT +#endif +#if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(__clang__) +#define DRFLAC_IMPLEMENT_CLZ_MSVC +#endif +#if defined(__WATCOMC__) && defined(__386__) +#define DRFLAC_IMPLEMENT_CLZ_WATCOM +#endif +#ifdef __MRC__ +#include +#define DRFLAC_IMPLEMENT_CLZ_MRC +#endif + +static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x) +{ + drflac_uint32 n; + static drflac_uint32 clz_table_4[] = { + 0, + 4, + 3, 3, + 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1 + }; + + if (x == 0) { + return sizeof(x)*8; + } + + n = clz_table_4[x >> (sizeof(x)*8 - 4)]; + if (n == 0) { +#ifdef DRFLAC_64BIT + if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n = 32; x <<= 32; } + if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; } + if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8; x <<= 8; } + if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4; x <<= 4; } +#else + if ((x & 0xFFFF0000) == 0) { n = 16; x <<= 16; } + if ((x & 0xFF000000) == 0) { n += 8; x <<= 8; } + if ((x & 0xF0000000) == 0) { n += 4; x <<= 4; } +#endif + n += clz_table_4[x >> (sizeof(x)*8 - 4)]; + } + + return n - 1; +} + +#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT +static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void) +{ + /* Fast compile time check for ARM. */ +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) + return DRFLAC_TRUE; +#elif defined(__MRC__) + return DRFLAC_TRUE; +#else + /* If the compiler itself does not support the intrinsic then we'll need to return false. */ + #ifdef DRFLAC_HAS_LZCNT_INTRINSIC + return drflac__gIsLZCNTSupported; + #else + return DRFLAC_FALSE; + #endif +#endif +} + +static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) +{ + /* + It's critical for competitive decoding performance that this function be highly optimal. With MSVC we can use the __lzcnt64() and __lzcnt() intrinsics + to achieve good performance, however on GCC and Clang it's a little bit more annoying. The __builtin_clzl() and __builtin_clzll() intrinsics leave + it undefined as to the return value when `x` is 0. We need this to be well defined as returning 32 or 64, depending on whether or not it's a 32- or + 64-bit build. To work around this we would need to add a conditional to check for the x = 0 case, but this creates unnecessary inefficiency. To work + around this problem I have written some inline assembly to emit the LZCNT (x86) or CLZ (ARM) instruction directly which removes the need to include + the conditional. This has worked well in the past, but for some reason Clang's MSVC compatible driver, clang-cl, does not seem to be handling this + in the same way as the normal Clang driver. It seems that `clang-cl` is just outputting the wrong results sometimes, maybe due to some register + getting clobbered? + + I'm not sure if this is a bug with dr_flac's inlined assembly (most likely), a bug in `clang-cl` or just a misunderstanding on my part with inline + assembly rules for `clang-cl`. If somebody can identify an error in dr_flac's inlined assembly I'm happy to get that fixed. + + Fortunately there is an easy workaround for this. Clang implements MSVC-specific intrinsics for compatibility. It also defines _MSC_VER for extra + compatibility. We can therefore just check for _MSC_VER and use the MSVC intrinsic which, fortunately for us, Clang supports. It would still be nice + to know how to fix the inlined assembly for correctness sake, however. + */ + +#if defined(_MSC_VER) /*&& !defined(__clang__)*/ /* <-- Intentionally wanting Clang to use the MSVC __lzcnt64/__lzcnt intrinsics due to above ^. */ + #ifdef DRFLAC_64BIT + return (drflac_uint32)__lzcnt64(x); + #else + return (drflac_uint32)__lzcnt(x); + #endif +#else + #if defined(__GNUC__) || defined(__clang__) + #if defined(DRFLAC_X64) + { + drflac_uint64 r; + __asm__ __volatile__ ( + "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc" + ); + + return (drflac_uint32)r; + } + #elif defined(DRFLAC_X86) + { + drflac_uint32 r; + __asm__ __volatile__ ( + "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc" + ); + + return r; + } + #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(__ARM_ARCH_6M__) && !(defined(__thumb__) && !defined(__thumb2__)) && !defined(DRFLAC_64BIT) /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */ + { + unsigned int r; + __asm__ __volatile__ ( + #if defined(DRFLAC_64BIT) + "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x) /* <-- This is untested. If someone in the community could test this, that would be appreciated! */ + #else + "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x) + #endif + ); + + return r; + } + #else + if (x == 0) { + return sizeof(x)*8; + } + #ifdef DRFLAC_64BIT + return (drflac_uint32)__builtin_clzll((drflac_uint64)x); + #else + return (drflac_uint32)__builtin_clzl((drflac_uint32)x); + #endif + #endif + #else + /* Unsupported compiler. */ + #error "This compiler does not support the lzcnt intrinsic." + #endif +#endif +} +#endif + +#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC +#include /* For BitScanReverse(). */ + +static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x) +{ + drflac_uint32 n; + + if (x == 0) { + return sizeof(x)*8; + } + +#ifdef DRFLAC_64BIT + _BitScanReverse64((unsigned long*)&n, x); +#else + _BitScanReverse((unsigned long*)&n, x); +#endif + return sizeof(x)*8 - n - 1; +} +#endif + +#ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM +static __inline drflac_uint32 drflac__clz_watcom (drflac_uint32); +#ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM_LZCNT +/* Use the LZCNT instruction (only available on some processors since the 2010s). */ +#pragma aux drflac__clz_watcom_lzcnt = \ + "db 0F3h, 0Fh, 0BDh, 0C0h" /* lzcnt eax, eax */ \ + parm [eax] \ + value [eax] \ + modify nomemory; +#else +/* Use the 386+-compatible implementation. */ +#pragma aux drflac__clz_watcom = \ + "bsr eax, eax" \ + "xor eax, 31" \ + parm [eax] nomemory \ + value [eax] \ + modify exact [eax] nomemory; +#endif +#endif + +static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x) +{ +#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT + if (drflac__is_lzcnt_supported()) { + return drflac__clz_lzcnt(x); + } else +#endif + { +#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC + return drflac__clz_msvc(x); +#elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM_LZCNT) + return drflac__clz_watcom_lzcnt(x); +#elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM) + return (x == 0) ? sizeof(x)*8 : drflac__clz_watcom(x); +#elif defined(__MRC__) + return __cntlzw(x); +#else + return drflac__clz_software(x); +#endif + } +} + + +static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut) +{ + drflac_uint32 zeroCounter = 0; + drflac_uint32 setBitOffsetPlus1; + + while (bs->cache == 0) { + zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs); + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + } + + if (bs->cache == 1) { + /* Not catching this would lead to undefined behaviour: a shift of a 32-bit number by 32 or more is undefined */ + *pOffsetOut = zeroCounter + (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs) - 1; + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; + } + + setBitOffsetPlus1 = drflac__clz(bs->cache); + setBitOffsetPlus1 += 1; + + if (setBitOffsetPlus1 > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* This happens when we get to end of stream */ + return DRFLAC_FALSE; + } + + bs->consumedBits += setBitOffsetPlus1; + bs->cache <<= setBitOffsetPlus1; + + *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1; + return DRFLAC_TRUE; +} + + + +static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart) +{ + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(offsetFromStart > 0); + + /* + Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which + is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit. + To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder. + */ + if (offsetFromStart > 0x7FFFFFFF) { + drflac_uint64 bytesRemaining = offsetFromStart; + if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, DRFLAC_SEEK_SET)) { + return DRFLAC_FALSE; + } + bytesRemaining -= 0x7FFFFFFF; + + while (bytesRemaining > 0x7FFFFFFF) { + if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, DRFLAC_SEEK_CUR)) { + return DRFLAC_FALSE; + } + bytesRemaining -= 0x7FFFFFFF; + } + + if (bytesRemaining > 0) { + if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, DRFLAC_SEEK_CUR)) { + return DRFLAC_FALSE; + } + } + } else { + if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, DRFLAC_SEEK_SET)) { + return DRFLAC_FALSE; + } + } + + /* The cache should be reset to force a reload of fresh data from the client. */ + drflac__reset_cache(bs); + return DRFLAC_TRUE; +} + + +static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut) +{ + drflac_uint8 crc; + drflac_uint64 result; + drflac_uint8 utf8[7] = {0}; + int byteCount; + int i; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pNumberOut != NULL); + DRFLAC_ASSERT(pCRCOut != NULL); + + crc = *pCRCOut; + + if (!drflac__read_uint8(bs, 8, utf8)) { + *pNumberOut = 0; + return DRFLAC_AT_END; + } + crc = drflac_crc8(crc, utf8[0], 8); + + if ((utf8[0] & 0x80) == 0) { + *pNumberOut = utf8[0]; + *pCRCOut = crc; + return DRFLAC_SUCCESS; + } + + /*byteCount = 1;*/ + if ((utf8[0] & 0xE0) == 0xC0) { + byteCount = 2; + } else if ((utf8[0] & 0xF0) == 0xE0) { + byteCount = 3; + } else if ((utf8[0] & 0xF8) == 0xF0) { + byteCount = 4; + } else if ((utf8[0] & 0xFC) == 0xF8) { + byteCount = 5; + } else if ((utf8[0] & 0xFE) == 0xFC) { + byteCount = 6; + } else if ((utf8[0] & 0xFF) == 0xFE) { + byteCount = 7; + } else { + *pNumberOut = 0; + return DRFLAC_CRC_MISMATCH; /* Bad UTF-8 encoding. */ + } + + /* Read extra bytes. */ + DRFLAC_ASSERT(byteCount > 1); + + result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1))); + for (i = 1; i < byteCount; ++i) { + if (!drflac__read_uint8(bs, 8, utf8 + i)) { + *pNumberOut = 0; + return DRFLAC_AT_END; + } + crc = drflac_crc8(crc, utf8[i], 8); + + result = (result << 6) | (utf8[i] & 0x3F); + } + + *pNumberOut = result; + *pCRCOut = crc; + return DRFLAC_SUCCESS; +} + + +static DRFLAC_INLINE drflac_uint32 drflac__ilog2_u32(drflac_uint32 x) +{ +#if 1 /* Needs optimizing. */ + drflac_uint32 result = 0; + while (x > 0) { + result += 1; + x >>= 1; + } + + return result; +#endif +} + +static DRFLAC_INLINE drflac_bool32 drflac__use_64_bit_prediction(drflac_uint32 bitsPerSample, drflac_uint32 order, drflac_uint32 precision) +{ + /* https://web.archive.org/web/20220205005724/https://github.com/ietf-wg-cellar/flac-specification/blob/37a49aa48ba4ba12e8757badfc59c0df35435fec/rfc_backmatter.md */ + return bitsPerSample + precision + drflac__ilog2_u32(order) > 32; +} + + +/* +The next two functions are responsible for calculating the prediction. + +When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's +safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16. +*/ +#if defined(__clang__) +__attribute__((no_sanitize("signed-integer-overflow"))) +#endif +static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) +{ + drflac_int32 prediction = 0; + + DRFLAC_ASSERT(order <= 32); + + /* 32-bit version. */ + + /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */ + switch (order) + { + case 32: prediction += coefficients[31] * pDecodedSamples[-32]; + case 31: prediction += coefficients[30] * pDecodedSamples[-31]; + case 30: prediction += coefficients[29] * pDecodedSamples[-30]; + case 29: prediction += coefficients[28] * pDecodedSamples[-29]; + case 28: prediction += coefficients[27] * pDecodedSamples[-28]; + case 27: prediction += coefficients[26] * pDecodedSamples[-27]; + case 26: prediction += coefficients[25] * pDecodedSamples[-26]; + case 25: prediction += coefficients[24] * pDecodedSamples[-25]; + case 24: prediction += coefficients[23] * pDecodedSamples[-24]; + case 23: prediction += coefficients[22] * pDecodedSamples[-23]; + case 22: prediction += coefficients[21] * pDecodedSamples[-22]; + case 21: prediction += coefficients[20] * pDecodedSamples[-21]; + case 20: prediction += coefficients[19] * pDecodedSamples[-20]; + case 19: prediction += coefficients[18] * pDecodedSamples[-19]; + case 18: prediction += coefficients[17] * pDecodedSamples[-18]; + case 17: prediction += coefficients[16] * pDecodedSamples[-17]; + case 16: prediction += coefficients[15] * pDecodedSamples[-16]; + case 15: prediction += coefficients[14] * pDecodedSamples[-15]; + case 14: prediction += coefficients[13] * pDecodedSamples[-14]; + case 13: prediction += coefficients[12] * pDecodedSamples[-13]; + case 12: prediction += coefficients[11] * pDecodedSamples[-12]; + case 11: prediction += coefficients[10] * pDecodedSamples[-11]; + case 10: prediction += coefficients[ 9] * pDecodedSamples[-10]; + case 9: prediction += coefficients[ 8] * pDecodedSamples[- 9]; + case 8: prediction += coefficients[ 7] * pDecodedSamples[- 8]; + case 7: prediction += coefficients[ 6] * pDecodedSamples[- 7]; + case 6: prediction += coefficients[ 5] * pDecodedSamples[- 6]; + case 5: prediction += coefficients[ 4] * pDecodedSamples[- 5]; + case 4: prediction += coefficients[ 3] * pDecodedSamples[- 4]; + case 3: prediction += coefficients[ 2] * pDecodedSamples[- 3]; + case 2: prediction += coefficients[ 1] * pDecodedSamples[- 2]; + case 1: prediction += coefficients[ 0] * pDecodedSamples[- 1]; + } + + return (drflac_int32)(prediction >> shift); +} + +static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) +{ + drflac_int64 prediction; + + DRFLAC_ASSERT(order <= 32); + + /* 64-bit version. */ + + /* This method is faster on the 32-bit build when compiling with VC++. See note below. */ +#ifndef DRFLAC_64BIT + if (order == 8) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; + } + else if (order == 7) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + } + else if (order == 3) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + } + else if (order == 6) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + } + else if (order == 5) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + } + else if (order == 4) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + } + else if (order == 12) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; + prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; + prediction += coefficients[9] * (drflac_int64)pDecodedSamples[-10]; + prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11]; + prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12]; + } + else if (order == 2) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + } + else if (order == 1) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + } + else if (order == 10) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; + prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; + prediction += coefficients[9] * (drflac_int64)pDecodedSamples[-10]; + } + else if (order == 9) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; + prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; + } + else if (order == 11) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; + prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; + prediction += coefficients[9] * (drflac_int64)pDecodedSamples[-10]; + prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11]; + } + else + { + int j; + + prediction = 0; + for (j = 0; j < (int)order; ++j) { + prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1]; + } + } +#endif + + /* + VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some + reason. The ugly version above is faster so we'll just switch between the two depending on the target platform. + */ +#ifdef DRFLAC_64BIT + prediction = 0; + switch (order) + { + case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32]; + case 31: prediction += coefficients[30] * (drflac_int64)pDecodedSamples[-31]; + case 30: prediction += coefficients[29] * (drflac_int64)pDecodedSamples[-30]; + case 29: prediction += coefficients[28] * (drflac_int64)pDecodedSamples[-29]; + case 28: prediction += coefficients[27] * (drflac_int64)pDecodedSamples[-28]; + case 27: prediction += coefficients[26] * (drflac_int64)pDecodedSamples[-27]; + case 26: prediction += coefficients[25] * (drflac_int64)pDecodedSamples[-26]; + case 25: prediction += coefficients[24] * (drflac_int64)pDecodedSamples[-25]; + case 24: prediction += coefficients[23] * (drflac_int64)pDecodedSamples[-24]; + case 23: prediction += coefficients[22] * (drflac_int64)pDecodedSamples[-23]; + case 22: prediction += coefficients[21] * (drflac_int64)pDecodedSamples[-22]; + case 21: prediction += coefficients[20] * (drflac_int64)pDecodedSamples[-21]; + case 20: prediction += coefficients[19] * (drflac_int64)pDecodedSamples[-20]; + case 19: prediction += coefficients[18] * (drflac_int64)pDecodedSamples[-19]; + case 18: prediction += coefficients[17] * (drflac_int64)pDecodedSamples[-18]; + case 17: prediction += coefficients[16] * (drflac_int64)pDecodedSamples[-17]; + case 16: prediction += coefficients[15] * (drflac_int64)pDecodedSamples[-16]; + case 15: prediction += coefficients[14] * (drflac_int64)pDecodedSamples[-15]; + case 14: prediction += coefficients[13] * (drflac_int64)pDecodedSamples[-14]; + case 13: prediction += coefficients[12] * (drflac_int64)pDecodedSamples[-13]; + case 12: prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12]; + case 11: prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11]; + case 10: prediction += coefficients[ 9] * (drflac_int64)pDecodedSamples[-10]; + case 9: prediction += coefficients[ 8] * (drflac_int64)pDecodedSamples[- 9]; + case 8: prediction += coefficients[ 7] * (drflac_int64)pDecodedSamples[- 8]; + case 7: prediction += coefficients[ 6] * (drflac_int64)pDecodedSamples[- 7]; + case 6: prediction += coefficients[ 5] * (drflac_int64)pDecodedSamples[- 6]; + case 5: prediction += coefficients[ 4] * (drflac_int64)pDecodedSamples[- 5]; + case 4: prediction += coefficients[ 3] * (drflac_int64)pDecodedSamples[- 4]; + case 3: prediction += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3]; + case 2: prediction += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2]; + case 1: prediction += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1]; + } +#endif + + return (drflac_int32)(prediction >> shift); +} + + +#if 0 +/* +Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the +sake of readability and should only be used as a reference. +*/ +static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + drflac_uint32 i; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pSamplesOut != NULL); + + for (i = 0; i < count; ++i) { + drflac_uint32 zeroCounter = 0; + for (;;) { + drflac_uint8 bit; + if (!drflac__read_uint8(bs, 1, &bit)) { + return DRFLAC_FALSE; + } + + if (bit == 0) { + zeroCounter += 1; + } else { + break; + } + } + + drflac_uint32 decodedRice; + if (riceParam > 0) { + if (!drflac__read_uint32(bs, riceParam, &decodedRice)) { + return DRFLAC_FALSE; + } + } else { + decodedRice = 0; + } + + decodedRice |= (zeroCounter << riceParam); + if ((decodedRice & 0x01)) { + decodedRice = ~(decodedRice >> 1); + } else { + decodedRice = (decodedRice >> 1); + } + + + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i); + } else { + pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i); + } + } + + return DRFLAC_TRUE; +} +#endif + +#if 0 +static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) +{ + drflac_uint32 zeroCounter = 0; + drflac_uint32 decodedRice; + + for (;;) { + drflac_uint8 bit; + if (!drflac__read_uint8(bs, 1, &bit)) { + return DRFLAC_FALSE; + } + + if (bit == 0) { + zeroCounter += 1; + } else { + break; + } + } + + if (riceParam > 0) { + if (!drflac__read_uint32(bs, riceParam, &decodedRice)) { + return DRFLAC_FALSE; + } + } else { + decodedRice = 0; + } + + *pZeroCounterOut = zeroCounter; + *pRiceParamPartOut = decodedRice; + return DRFLAC_TRUE; +} +#endif + +#if 0 +static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) +{ + drflac_cache_t riceParamMask; + drflac_uint32 zeroCounter; + drflac_uint32 setBitOffsetPlus1; + drflac_uint32 riceParamPart; + drflac_uint32 riceLength; + + DRFLAC_ASSERT(riceParam > 0); /* <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. */ + + riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam); + + zeroCounter = 0; + while (bs->cache == 0) { + zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs); + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + } + + setBitOffsetPlus1 = drflac__clz(bs->cache); + zeroCounter += setBitOffsetPlus1; + setBitOffsetPlus1 += 1; + + riceLength = setBitOffsetPlus1 + riceParam; + if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength)); + + bs->consumedBits += riceLength; + bs->cache <<= riceLength; + } else { + drflac_uint32 bitCountLo; + drflac_cache_t resultHi; + + bs->consumedBits += riceLength; + bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1); /* <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" */ + + /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */ + bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs); + resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam); /* <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. */ + + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { +#ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); +#endif + bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs->consumedBits = 0; +#ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs->cache; +#endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* This happens when we get to end of stream */ + return DRFLAC_FALSE; + } + } + + riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo)); + + bs->consumedBits += bitCountLo; + bs->cache <<= bitCountLo; + } + + pZeroCounterOut[0] = zeroCounter; + pRiceParamPartOut[0] = riceParamPart; + + return DRFLAC_TRUE; +} +#endif + +static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) +{ + drflac_uint32 riceParamPlus1 = riceParam + 1; + /*drflac_cache_t riceParamPlus1Mask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/ + drflac_uint32 riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1); + drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; + + /* + The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have + no idea how this will work in practice... + */ + drflac_cache_t bs_cache = bs->cache; + drflac_uint32 bs_consumedBits = bs->consumedBits; + + /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */ + drflac_uint32 lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + pZeroCounterOut[0] = lzcount; + + /* + It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting + this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled + outside of this function at a higher level. + */ + extract_rice_param_part: + bs_cache <<= lzcount; + bs_consumedBits += lzcount; + + if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { + /* Getting here means the rice parameter part is wholly contained within the current cache line. */ + pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); + bs_cache <<= riceParamPlus1; + bs_consumedBits += riceParamPlus1; + } else { + drflac_uint32 riceParamPartHi; + drflac_uint32 riceParamPartLo; + drflac_uint32 riceParamPartLoBitCount; + + /* + Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache + line, reload the cache, and then combine it with the head of the next cache line. + */ + + /* Grab the high part of the rice parameter part. */ + riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); + + /* Before reloading the cache we need to grab the size in bits of the low part. */ + riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; + DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32); + + /* Now reload the cache. */ + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = riceParamPartLoBitCount; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* This happens when we get to end of stream */ + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; + } + + /* We should now have enough information to construct the rice parameter part. */ + riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount))); + pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo; + + bs_cache <<= riceParamPartLoBitCount; + } + } else { + /* + Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call + to drflac__clz() and we need to reload the cache. + */ + drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits); + for (;;) { + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = 0; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits; + } + + lzcount = drflac__clz(bs_cache); + zeroCounter += lzcount; + + if (lzcount < sizeof(bs_cache)*8) { + break; + } + } + + pZeroCounterOut[0] = zeroCounter; + goto extract_rice_param_part; + } + + /* Make sure the cache is restored at the end of it all. */ + bs->cache = bs_cache; + bs->consumedBits = bs_consumedBits; + + return DRFLAC_TRUE; +} + +static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam) +{ + drflac_uint32 riceParamPlus1 = riceParam + 1; + drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; + + /* + The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have + no idea how this will work in practice... + */ + drflac_cache_t bs_cache = bs->cache; + drflac_uint32 bs_consumedBits = bs->consumedBits; + + /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */ + drflac_uint32 lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + /* + It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting + this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled + outside of this function at a higher level. + */ + extract_rice_param_part: + bs_cache <<= lzcount; + bs_consumedBits += lzcount; + + if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { + /* Getting here means the rice parameter part is wholly contained within the current cache line. */ + bs_cache <<= riceParamPlus1; + bs_consumedBits += riceParamPlus1; + } else { + /* + Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache + line, reload the cache, and then combine it with the head of the next cache line. + */ + + /* Before reloading the cache we need to grab the size in bits of the low part. */ + drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; + DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32); + + /* Now reload the cache. */ + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = riceParamPartLoBitCount; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* This happens when we get to end of stream */ + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; + } + + bs_cache <<= riceParamPartLoBitCount; + } + } else { + /* + Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call + to drflac__clz() and we need to reload the cache. + */ + for (;;) { + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = 0; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits; + } + + lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + break; + } + } + + goto extract_rice_param_part; + } + + /* Make sure the cache is restored at the end of it all. */ + bs->cache = bs_cache; + bs->consumedBits = bs_consumedBits; + + return DRFLAC_TRUE; +} + + +static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorder(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + drflac_uint32 zeroCountPart0; + drflac_uint32 riceParamPart0; + drflac_uint32 riceParamMask; + drflac_uint32 i; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pSamplesOut != NULL); + + (void)bitsPerSample; + (void)order; + (void)shift; + (void)coefficients; + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + + i = 0; + while (i < count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamPart0 &= riceParamMask; + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + + pSamplesOut[i] = riceParamPart0; + + i += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + drflac_uint32 zeroCountPart0 = 0; + drflac_uint32 zeroCountPart1 = 0; + drflac_uint32 zeroCountPart2 = 0; + drflac_uint32 zeroCountPart3 = 0; + drflac_uint32 riceParamPart0 = 0; + drflac_uint32 riceParamPart1 = 0; + drflac_uint32 riceParamPart2 = 0; + drflac_uint32 riceParamPart3 = 0; + drflac_uint32 riceParamMask; + const drflac_int32* pSamplesOutEnd; + drflac_uint32 i; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pSamplesOut != NULL); + + if (lpcOrder == 0) { + return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut); + } + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + pSamplesOutEnd = pSamplesOut + (count & ~3); + + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + while (pSamplesOut < pSamplesOutEnd) { + /* + Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version + against an array. Not sure why, but perhaps it's making more efficient use of registers? + */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { + return DRFLAC_FALSE; + } + + riceParamPart0 &= riceParamMask; + riceParamPart1 &= riceParamMask; + riceParamPart2 &= riceParamMask; + riceParamPart3 &= riceParamMask; + + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart1 |= (zeroCountPart1 << riceParam); + riceParamPart2 |= (zeroCountPart2 << riceParam); + riceParamPart3 |= (zeroCountPart3 << riceParam); + + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; + riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; + riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 3); + + pSamplesOut += 4; + } + } else { + while (pSamplesOut < pSamplesOutEnd) { + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { + return DRFLAC_FALSE; + } + + riceParamPart0 &= riceParamMask; + riceParamPart1 &= riceParamMask; + riceParamPart2 &= riceParamMask; + riceParamPart3 &= riceParamMask; + + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart1 |= (zeroCountPart1 << riceParam); + riceParamPart2 |= (zeroCountPart2 << riceParam); + riceParamPart3 |= (zeroCountPart3 << riceParam); + + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; + riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; + riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 3); + + pSamplesOut += 4; + } + } + + i = (count & ~3); + while (i < count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamPart0 &= riceParamMask; + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + /*riceParamPart0 = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/ + + /* Sample reconstruction. */ + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0); + } else { + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0); + } + + i += 1; + pSamplesOut += 1; + } + + return DRFLAC_TRUE; +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b) +{ + __m128i r; + + /* Pack. */ + r = _mm_packs_epi32(a, b); + + /* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */ + r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0)); + + /* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */ + r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0)); + r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0)); + + return r; +} +#endif + +#if defined(DRFLAC_SUPPORT_SSE41) +static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a) +{ + return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); +} + +static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x) +{ + __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2))); + __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2)); + return _mm_add_epi32(x64, x32); +} + +static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x) +{ + return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2))); +} + +static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count) +{ + /* + To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side + is shifted with zero bits, whereas the right side is shifted with sign bits. + */ + __m128i lo = _mm_srli_epi64(x, count); + __m128i hi = _mm_srai_epi32(x, count); + + hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0)); /* The high part needs to have the low part cleared. */ + + return _mm_or_si128(lo, hi); +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts0 = 0; + drflac_uint32 zeroCountParts1 = 0; + drflac_uint32 zeroCountParts2 = 0; + drflac_uint32 zeroCountParts3 = 0; + drflac_uint32 riceParamParts0 = 0; + drflac_uint32 riceParamParts1 = 0; + drflac_uint32 riceParamParts2 = 0; + drflac_uint32 riceParamParts3 = 0; + __m128i coefficients128_0; + __m128i coefficients128_4; + __m128i coefficients128_8; + __m128i samples128_0; + __m128i samples128_4; + __m128i samples128_8; + __m128i riceParamMask128; + + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + riceParamMask128 = _mm_set1_epi32(riceParamMask); + + /* Pre-load. */ + coefficients128_0 = _mm_setzero_si128(); + coefficients128_4 = _mm_setzero_si128(); + coefficients128_8 = _mm_setzero_si128(); + + samples128_0 = _mm_setzero_si128(); + samples128_4 = _mm_setzero_si128(); + samples128_8 = _mm_setzero_si128(); + + /* + Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than + what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results + in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted + so I think there's opportunity for this to be simplified. + */ +#if 1 + { + int runningOrder = order; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0)); + samples128_0 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 4)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break; + case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0); break; + case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4)); + samples128_4 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 8)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break; + case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0); break; + case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8)); + samples128_8 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 12)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break; + case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0); break; + case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3)); + } +#else + /* This causes strict-aliasing warnings with GCC. */ + switch (order) + { + case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12]; + case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11]; + case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10]; + case 9: ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9]; + case 8: ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8]; + case 7: ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7]; + case 6: ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6]; + case 5: ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5]; + case 4: ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4]; + case 3: ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3]; + case 2: ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2]; + case 1: ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1]; + } +#endif + + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + __m128i prediction128; + __m128i zeroCountPart128; + __m128i riceParamPart128; + + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) { + return DRFLAC_FALSE; + } + + zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0); + riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0); + + riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128); + riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam)); + riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01))); /* <-- SSE2 compatible */ + /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/ /* <-- Only supported from SSE4.1 and is slower in my testing... */ + + if (order <= 4) { + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi32(prediction128); + prediction128 = _mm_srai_epi32(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); + + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + } else if (order <= 8) { + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_mullo_epi32(coefficients128_4, samples128_4); + prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0)); + + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi32(prediction128); + prediction128 = _mm_srai_epi32(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); + + samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + } else { + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_mullo_epi32(coefficients128_8, samples128_8); + prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4)); + prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0)); + + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi32(prediction128); + prediction128 = _mm_srai_epi32(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); + + samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4); + samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + } + + /* We store samples in groups of 4. */ + _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0); + pDecodedSamples += 4; + } + + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamParts0 &= riceParamMask; + riceParamParts0 |= (zeroCountParts0 << riceParam); + riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01]; + + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts0 = 0; + drflac_uint32 zeroCountParts1 = 0; + drflac_uint32 zeroCountParts2 = 0; + drflac_uint32 zeroCountParts3 = 0; + drflac_uint32 riceParamParts0 = 0; + drflac_uint32 riceParamParts1 = 0; + drflac_uint32 riceParamParts2 = 0; + drflac_uint32 riceParamParts3 = 0; + __m128i coefficients128_0; + __m128i coefficients128_4; + __m128i coefficients128_8; + __m128i samples128_0; + __m128i samples128_4; + __m128i samples128_8; + __m128i prediction128; + __m128i riceParamMask128; + + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + + DRFLAC_ASSERT(order <= 12); + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + riceParamMask128 = _mm_set1_epi32(riceParamMask); + + prediction128 = _mm_setzero_si128(); + + /* Pre-load. */ + coefficients128_0 = _mm_setzero_si128(); + coefficients128_4 = _mm_setzero_si128(); + coefficients128_8 = _mm_setzero_si128(); + + samples128_0 = _mm_setzero_si128(); + samples128_4 = _mm_setzero_si128(); + samples128_8 = _mm_setzero_si128(); + +#if 1 + { + int runningOrder = order; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0)); + samples128_0 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 4)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break; + case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0); break; + case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4)); + samples128_4 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 8)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break; + case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0); break; + case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8)); + samples128_8 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 12)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break; + case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0); break; + case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3)); + } +#else + switch (order) + { + case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12]; + case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11]; + case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10]; + case 9: ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9]; + case 8: ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8]; + case 7: ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7]; + case 6: ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6]; + case 5: ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5]; + case 4: ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4]; + case 3: ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3]; + case 2: ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2]; + case 1: ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1]; + } +#endif + + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + __m128i zeroCountPart128; + __m128i riceParamPart128; + + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) { + return DRFLAC_FALSE; + } + + zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0); + riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0); + + riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128); + riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam)); + riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1))); + + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_xor_si128(prediction128, prediction128); /* Reset to 0. */ + + switch (order) + { + case 12: + case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0)))); + case 10: + case 9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2)))); + case 8: + case 7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0)))); + case 6: + case 5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2)))); + case 4: + case 3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0)))); + case 2: + case 1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2)))); + } + + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi64(prediction128); + prediction128 = drflac__mm_srai_epi64(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); + + /* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */ + samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4); + samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + + /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */ + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + + /* We store samples in groups of 4. */ + _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0); + pDecodedSamples += 4; + } + + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamParts0 &= riceParamMask; + riceParamParts0 |= (zeroCountParts0 << riceParam); + riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01]; + + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pSamplesOut != NULL); + + /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */ + if (lpcOrder > 0 && lpcOrder <= 12) { + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut); + } else { + return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut); + } + } else { + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x) +{ + vst1q_s32(p+0, x.val[0]); + vst1q_s32(p+4, x.val[1]); +} + +static DRFLAC_INLINE void drflac__vst2q_u32(drflac_uint32* p, uint32x4x2_t x) +{ + vst1q_u32(p+0, x.val[0]); + vst1q_u32(p+4, x.val[1]); +} + +static DRFLAC_INLINE void drflac__vst2q_f32(float* p, float32x4x2_t x) +{ + vst1q_f32(p+0, x.val[0]); + vst1q_f32(p+4, x.val[1]); +} + +static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x) +{ + vst1q_s16(p, vcombine_s16(x.val[0], x.val[1])); +} + +static DRFLAC_INLINE void drflac__vst2q_u16(drflac_uint16* p, uint16x4x2_t x) +{ + vst1q_u16(p, vcombine_u16(x.val[0], x.val[1])); +} + +static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0) +{ + drflac_int32 x[4]; + x[3] = x3; + x[2] = x2; + x[1] = x1; + x[0] = x0; + return vld1q_s32(x); +} + +static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b) +{ + /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */ + + /* Reference */ + /*return drflac__vdupq_n_s32x4( + vgetq_lane_s32(a, 0), + vgetq_lane_s32(b, 3), + vgetq_lane_s32(b, 2), + vgetq_lane_s32(b, 1) + );*/ + + return vextq_s32(b, a, 1); +} + +static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b) +{ + /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */ + + /* Reference */ + /*return drflac__vdupq_n_s32x4( + vgetq_lane_s32(a, 0), + vgetq_lane_s32(b, 3), + vgetq_lane_s32(b, 2), + vgetq_lane_s32(b, 1) + );*/ + + return vextq_u32(b, a, 1); +} + +static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x) +{ + /* The sum must end up in position 0. */ + + /* Reference */ + /*return vdupq_n_s32( + vgetq_lane_s32(x, 3) + + vgetq_lane_s32(x, 2) + + vgetq_lane_s32(x, 1) + + vgetq_lane_s32(x, 0) + );*/ + + int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x)); + return vpadd_s32(r, r); +} + +static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x) +{ + return vadd_s64(vget_high_s64(x), vget_low_s64(x)); +} + +static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x) +{ + /* Reference */ + /*return drflac__vdupq_n_s32x4( + vgetq_lane_s32(x, 0), + vgetq_lane_s32(x, 1), + vgetq_lane_s32(x, 2), + vgetq_lane_s32(x, 3) + );*/ + + return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x))); +} + +static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x) +{ + return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF)); +} + +static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x) +{ + return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF)); +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts[4]; + drflac_uint32 riceParamParts[4]; + int32x4_t coefficients128_0; + int32x4_t coefficients128_4; + int32x4_t coefficients128_8; + int32x4_t samples128_0; + int32x4_t samples128_4; + int32x4_t samples128_8; + uint32x4_t riceParamMask128; + int32x4_t riceParam128; + int32x2_t shift64; + uint32x4_t one128; + + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + riceParamMask128 = vdupq_n_u32(riceParamMask); + + riceParam128 = vdupq_n_s32(riceParam); + shift64 = vdup_n_s32(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */ + one128 = vdupq_n_u32(1); + + /* + Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than + what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results + in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted + so I think there's opportunity for this to be simplified. + */ + { + int runningOrder = order; + drflac_int32 tempC[4] = {0, 0, 0, 0}; + drflac_int32 tempS[4] = {0, 0, 0, 0}; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = vld1q_s32(coefficients + 0); + samples128_0 = vld1q_s32(pSamplesOut - 4); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */ + case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */ + case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */ + } + + coefficients128_0 = vld1q_s32(tempC); + samples128_0 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = vld1q_s32(coefficients + 4); + samples128_4 = vld1q_s32(pSamplesOut - 8); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */ + case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */ + case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */ + } + + coefficients128_4 = vld1q_s32(tempC); + samples128_4 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = vld1q_s32(coefficients + 8); + samples128_8 = vld1q_s32(pSamplesOut - 12); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */ + case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */ + case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */ + } + + coefficients128_8 = vld1q_s32(tempC); + samples128_8 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = drflac__vrevq_s32(coefficients128_0); + coefficients128_4 = drflac__vrevq_s32(coefficients128_4); + coefficients128_8 = drflac__vrevq_s32(coefficients128_8); + } + + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + int32x4_t prediction128; + int32x2_t prediction64; + uint32x4_t zeroCountPart128; + uint32x4_t riceParamPart128; + + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) { + return DRFLAC_FALSE; + } + + zeroCountPart128 = vld1q_u32(zeroCountParts); + riceParamPart128 = vld1q_u32(riceParamParts); + + riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128); + riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128)); + riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128)); + + if (order <= 4) { + for (i = 0; i < 4; i += 1) { + prediction128 = vmulq_s32(coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s32(prediction128); + prediction64 = vshl_s32(prediction64, shift64); + prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); + + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); + } + } else if (order <= 8) { + for (i = 0; i < 4; i += 1) { + prediction128 = vmulq_s32(coefficients128_4, samples128_4); + prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s32(prediction128); + prediction64 = vshl_s32(prediction64, shift64); + prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); + + samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); + } + } else { + for (i = 0; i < 4; i += 1) { + prediction128 = vmulq_s32(coefficients128_8, samples128_8); + prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4); + prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s32(prediction128); + prediction64 = vshl_s32(prediction64, shift64); + prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); + + samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8); + samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); + } + } + + /* We store samples in groups of 4. */ + vst1q_s32(pDecodedSamples, samples128_0); + pDecodedSamples += 4; + } + + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamParts[0] &= riceParamMask; + riceParamParts[0] |= (zeroCountParts[0] << riceParam); + riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01]; + + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts[4]; + drflac_uint32 riceParamParts[4]; + int32x4_t coefficients128_0; + int32x4_t coefficients128_4; + int32x4_t coefficients128_8; + int32x4_t samples128_0; + int32x4_t samples128_4; + int32x4_t samples128_8; + uint32x4_t riceParamMask128; + int32x4_t riceParam128; + int64x1_t shift64; + uint32x4_t one128; + int64x2_t prediction128 = { 0 }; + uint32x4_t zeroCountPart128; + uint32x4_t riceParamPart128; + + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + riceParamMask128 = vdupq_n_u32(riceParamMask); + + riceParam128 = vdupq_n_s32(riceParam); + shift64 = vdup_n_s64(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */ + one128 = vdupq_n_u32(1); + + /* + Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than + what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results + in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted + so I think there's opportunity for this to be simplified. + */ + { + int runningOrder = order; + drflac_int32 tempC[4] = {0, 0, 0, 0}; + drflac_int32 tempS[4] = {0, 0, 0, 0}; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = vld1q_s32(coefficients + 0); + samples128_0 = vld1q_s32(pSamplesOut - 4); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */ + case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */ + case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */ + } + + coefficients128_0 = vld1q_s32(tempC); + samples128_0 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = vld1q_s32(coefficients + 4); + samples128_4 = vld1q_s32(pSamplesOut - 8); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */ + case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */ + case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */ + } + + coefficients128_4 = vld1q_s32(tempC); + samples128_4 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = vld1q_s32(coefficients + 8); + samples128_8 = vld1q_s32(pSamplesOut - 12); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */ + case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */ + case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */ + } + + coefficients128_8 = vld1q_s32(tempC); + samples128_8 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = drflac__vrevq_s32(coefficients128_0); + coefficients128_4 = drflac__vrevq_s32(coefficients128_4); + coefficients128_8 = drflac__vrevq_s32(coefficients128_8); + } + + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) { + return DRFLAC_FALSE; + } + + zeroCountPart128 = vld1q_u32(zeroCountParts); + riceParamPart128 = vld1q_u32(riceParamParts); + + riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128); + riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128)); + riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128)); + + for (i = 0; i < 4; i += 1) { + int64x1_t prediction64; + + prediction128 = veorq_s64(prediction128, prediction128); /* Reset to 0. */ + switch (order) + { + case 12: + case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8))); + case 10: + case 9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8))); + case 8: + case 7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4))); + case 6: + case 5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4))); + case 4: + case 3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0))); + case 2: + case 1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0))); + } + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s64(prediction128); + prediction64 = vshl_s64(prediction64, shift64); + prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0))); + + /* Our value should be sitting in prediction64[0]. We need to combine this with our SSE samples. */ + samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8); + samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0); + + /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */ + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); + } + + /* We store samples in groups of 4. */ + vst1q_s32(pDecodedSamples, samples128_0); + pDecodedSamples += 4; + } + + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamParts[0] &= riceParamMask; + riceParamParts[0] |= (zeroCountParts[0] << riceParam); + riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01]; + + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pSamplesOut != NULL); + + /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */ + if (lpcOrder > 0 && lpcOrder <= 12) { + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut); + } else { + return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut); + } + } else { + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); + } +} +#endif + +static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ +#if defined(DRFLAC_SUPPORT_SSE41) + if (drflac__gIsSSE41Supported) { + return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported) { + return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); + } else +#endif + { + /* Scalar fallback. */ + #if 0 + return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); + #else + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); + #endif + } +} + +/* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */ +static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam) +{ + drflac_uint32 i; + + DRFLAC_ASSERT(bs != NULL); + + for (i = 0; i < count; ++i) { + if (!drflac__seek_rice_parts(bs, riceParam)) { + return DRFLAC_FALSE; + } + } + + return DRFLAC_TRUE; +} + +#if defined(__clang__) +__attribute__((no_sanitize("signed-integer-overflow"))) +#endif +static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + drflac_uint32 i; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(unencodedBitsPerSample <= 31); /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */ + DRFLAC_ASSERT(pSamplesOut != NULL); + + for (i = 0; i < count; ++i) { + if (unencodedBitsPerSample > 0) { + if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) { + return DRFLAC_FALSE; + } + } else { + pSamplesOut[i] = 0; + } + + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + pSamplesOut[i] += drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i); + } else { + pSamplesOut[i] += drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i); + } + } + + return DRFLAC_TRUE; +} + + +/* +Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called +when the decoder is sitting at the very start of the RESIDUAL block. The first residuals will be ignored. The + and parameters are used to determine how many residual values need to be decoded. +*/ +static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) +{ + drflac_uint8 residualMethod; + drflac_uint8 partitionOrder; + drflac_uint32 samplesInPartition; + drflac_uint32 partitionsRemaining; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(blockSize != 0); + DRFLAC_ASSERT(pDecodedSamples != NULL); /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */ + + if (!drflac__read_uint8(bs, 2, &residualMethod)) { + return DRFLAC_FALSE; + } + + if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + return DRFLAC_FALSE; /* Unknown or unsupported residual coding method. */ + } + + /* Ignore the first values. */ + pDecodedSamples += lpcOrder; + + if (!drflac__read_uint8(bs, 4, &partitionOrder)) { + return DRFLAC_FALSE; + } + + /* + From the FLAC spec: + The Rice partition order in a Rice-coded residual section must be less than or equal to 8. + */ + if (partitionOrder > 8) { + return DRFLAC_FALSE; + } + + /* Validation check. */ + if ((blockSize / (1 << partitionOrder)) < lpcOrder) { + return DRFLAC_FALSE; + } + + samplesInPartition = (blockSize / (1 << partitionOrder)) - lpcOrder; + partitionsRemaining = (1 << partitionOrder); + for (;;) { + drflac_uint8 riceParam = 0; + if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { + if (!drflac__read_uint8(bs, 4, &riceParam)) { + return DRFLAC_FALSE; + } + if (riceParam == 15) { + riceParam = 0xFF; + } + } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + if (!drflac__read_uint8(bs, 5, &riceParam)) { + return DRFLAC_FALSE; + } + if (riceParam == 31) { + riceParam = 0xFF; + } + } + + if (riceParam != 0xFF) { + if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) { + return DRFLAC_FALSE; + } + } else { + drflac_uint8 unencodedBitsPerSample = 0; + if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) { + return DRFLAC_FALSE; + } + + if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) { + return DRFLAC_FALSE; + } + } + + pDecodedSamples += samplesInPartition; + + if (partitionsRemaining == 1) { + break; + } + + partitionsRemaining -= 1; + + if (partitionOrder != 0) { + samplesInPartition = blockSize / (1 << partitionOrder); + } + } + + return DRFLAC_TRUE; +} + +/* +Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called +when the decoder is sitting at the very start of the RESIDUAL block. The first residuals will be set to 0. The + and parameters are used to determine how many residual values need to be decoded. +*/ +static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order) +{ + drflac_uint8 residualMethod; + drflac_uint8 partitionOrder; + drflac_uint32 samplesInPartition; + drflac_uint32 partitionsRemaining; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(blockSize != 0); + + if (!drflac__read_uint8(bs, 2, &residualMethod)) { + return DRFLAC_FALSE; + } + + if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + return DRFLAC_FALSE; /* Unknown or unsupported residual coding method. */ + } + + if (!drflac__read_uint8(bs, 4, &partitionOrder)) { + return DRFLAC_FALSE; + } + + /* + From the FLAC spec: + The Rice partition order in a Rice-coded residual section must be less than or equal to 8. + */ + if (partitionOrder > 8) { + return DRFLAC_FALSE; + } + + /* Validation check. */ + if ((blockSize / (1 << partitionOrder)) <= order) { + return DRFLAC_FALSE; + } + + samplesInPartition = (blockSize / (1 << partitionOrder)) - order; + partitionsRemaining = (1 << partitionOrder); + for (;;) + { + drflac_uint8 riceParam = 0; + if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { + if (!drflac__read_uint8(bs, 4, &riceParam)) { + return DRFLAC_FALSE; + } + if (riceParam == 15) { + riceParam = 0xFF; + } + } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + if (!drflac__read_uint8(bs, 5, &riceParam)) { + return DRFLAC_FALSE; + } + if (riceParam == 31) { + riceParam = 0xFF; + } + } + + if (riceParam != 0xFF) { + if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) { + return DRFLAC_FALSE; + } + } else { + drflac_uint8 unencodedBitsPerSample = 0; + if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) { + return DRFLAC_FALSE; + } + + if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) { + return DRFLAC_FALSE; + } + } + + + if (partitionsRemaining == 1) { + break; + } + + partitionsRemaining -= 1; + samplesInPartition = blockSize / (1 << partitionOrder); + } + + return DRFLAC_TRUE; +} + + +static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples) +{ + drflac_uint32 i; + + /* Only a single sample needs to be decoded here. */ + drflac_int32 sample; + if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { + return DRFLAC_FALSE; + } + + /* + We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely) + we'll want to look at a more efficient way. + */ + for (i = 0; i < blockSize; ++i) { + pDecodedSamples[i] = sample; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples) +{ + drflac_uint32 i; + + for (i = 0; i < blockSize; ++i) { + drflac_int32 sample; + if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { + return DRFLAC_FALSE; + } + + pDecodedSamples[i] = sample; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) +{ + drflac_uint32 i; + + static drflac_int32 lpcCoefficientsTable[5][4] = { + {0, 0, 0, 0}, + {1, 0, 0, 0}, + {2, -1, 0, 0}, + {3, -3, 1, 0}, + {4, -6, 4, -1} + }; + + /* Warm up samples and coefficients. */ + for (i = 0; i < lpcOrder; ++i) { + drflac_int32 sample; + if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { + return DRFLAC_FALSE; + } + + pDecodedSamples[i] = sample; + } + + if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, 4, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) { + return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) +{ + drflac_uint8 i; + drflac_uint8 lpcPrecision; + drflac_int8 lpcShift; + drflac_int32 coefficients[32]; + + /* Warm up samples. */ + for (i = 0; i < lpcOrder; ++i) { + drflac_int32 sample; + if (!drflac__read_int32(bs, bitsPerSample, &sample)) { + return DRFLAC_FALSE; + } + + pDecodedSamples[i] = sample; + } + + if (!drflac__read_uint8(bs, 4, &lpcPrecision)) { + return DRFLAC_FALSE; + } + if (lpcPrecision == 15) { + return DRFLAC_FALSE; /* Invalid. */ + } + lpcPrecision += 1; + + if (!drflac__read_int8(bs, 5, &lpcShift)) { + return DRFLAC_FALSE; + } + + /* + From the FLAC specification: + + Quantized linear predictor coefficient shift needed in bits (NOTE: this number is signed two's-complement) + + Emphasis on the "signed two's-complement". In practice there does not seem to be any encoders nor decoders supporting negative shifts. For now dr_flac is + not going to support negative shifts as I don't have any reference files. However, when a reference file comes through I will consider adding support. + */ + if (lpcShift < 0) { + return DRFLAC_FALSE; + } + + DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients)); + for (i = 0; i < lpcOrder; ++i) { + if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) { + return DRFLAC_FALSE; + } + } + + if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) { + return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + + +static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header) +{ + const drflac_uint32 sampleRateTable[12] = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000}; + const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1}; /* -1 = reserved. */ + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(header != NULL); + + /* Keep looping until we find a valid sync code. */ + for (;;) { + drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */ + drflac_uint8 reserved = 0; + drflac_uint8 blockingStrategy = 0; + drflac_uint8 blockSize = 0; + drflac_uint8 sampleRate = 0; + drflac_uint8 channelAssignment = 0; + drflac_uint8 bitsPerSample = 0; + drflac_bool32 isVariableBlockSize; + + if (!drflac__find_and_seek_to_next_sync_code(bs)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_uint8(bs, 1, &reserved)) { + return DRFLAC_FALSE; + } + if (reserved == 1) { + continue; + } + crc8 = drflac_crc8(crc8, reserved, 1); + + if (!drflac__read_uint8(bs, 1, &blockingStrategy)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, blockingStrategy, 1); + + if (!drflac__read_uint8(bs, 4, &blockSize)) { + return DRFLAC_FALSE; + } + if (blockSize == 0) { + continue; + } + crc8 = drflac_crc8(crc8, blockSize, 4); + + if (!drflac__read_uint8(bs, 4, &sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, sampleRate, 4); + + if (!drflac__read_uint8(bs, 4, &channelAssignment)) { + return DRFLAC_FALSE; + } + if (channelAssignment > 10) { + continue; + } + crc8 = drflac_crc8(crc8, channelAssignment, 4); + + if (!drflac__read_uint8(bs, 3, &bitsPerSample)) { + return DRFLAC_FALSE; + } + if (bitsPerSample == 3 || bitsPerSample == 7) { + continue; + } + crc8 = drflac_crc8(crc8, bitsPerSample, 3); + + + if (!drflac__read_uint8(bs, 1, &reserved)) { + return DRFLAC_FALSE; + } + if (reserved == 1) { + continue; + } + crc8 = drflac_crc8(crc8, reserved, 1); + + + isVariableBlockSize = blockingStrategy == 1; + if (isVariableBlockSize) { + drflac_uint64 pcmFrameNumber; + drflac_result result = drflac__read_utf8_coded_number(bs, &pcmFrameNumber, &crc8); + if (result != DRFLAC_SUCCESS) { + if (result == DRFLAC_AT_END) { + return DRFLAC_FALSE; + } else { + continue; + } + } + header->flacFrameNumber = 0; + header->pcmFrameNumber = pcmFrameNumber; + } else { + drflac_uint64 flacFrameNumber = 0; + drflac_result result = drflac__read_utf8_coded_number(bs, &flacFrameNumber, &crc8); + if (result != DRFLAC_SUCCESS) { + if (result == DRFLAC_AT_END) { + return DRFLAC_FALSE; + } else { + continue; + } + } + header->flacFrameNumber = (drflac_uint32)flacFrameNumber; /* <-- Safe cast. */ + header->pcmFrameNumber = 0; + } + + + DRFLAC_ASSERT(blockSize > 0); + if (blockSize == 1) { + header->blockSizeInPCMFrames = 192; + } else if (blockSize <= 5) { + DRFLAC_ASSERT(blockSize >= 2); + header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2)); + } else if (blockSize == 6) { + if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 8); + header->blockSizeInPCMFrames += 1; + } else if (blockSize == 7) { + if (!drflac__read_uint16(bs, 16, &header->blockSizeInPCMFrames)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16); + if (header->blockSizeInPCMFrames == 0xFFFF) { + return DRFLAC_FALSE; /* Frame is too big. This is the size of the frame minus 1. The STREAMINFO block defines the max block size which is 16-bits. Adding one will make it 17 bits and therefore too big. */ + } + header->blockSizeInPCMFrames += 1; + } else { + DRFLAC_ASSERT(blockSize >= 8); + header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8)); + } + + + if (sampleRate <= 11) { + header->sampleRate = sampleRateTable[sampleRate]; + } else if (sampleRate == 12) { + if (!drflac__read_uint32(bs, 8, &header->sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->sampleRate, 8); + header->sampleRate *= 1000; + } else if (sampleRate == 13) { + if (!drflac__read_uint32(bs, 16, &header->sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->sampleRate, 16); + } else if (sampleRate == 14) { + if (!drflac__read_uint32(bs, 16, &header->sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->sampleRate, 16); + header->sampleRate *= 10; + } else { + continue; /* Invalid. Assume an invalid block. */ + } + + + header->channelAssignment = channelAssignment; + + header->bitsPerSample = bitsPerSampleTable[bitsPerSample]; + if (header->bitsPerSample == 0) { + header->bitsPerSample = streaminfoBitsPerSample; + } + + if (header->bitsPerSample != streaminfoBitsPerSample) { + /* If this subframe has a different bitsPerSample then streaminfo or the first frame, reject it */ + return DRFLAC_FALSE; + } + + if (!drflac__read_uint8(bs, 8, &header->crc8)) { + return DRFLAC_FALSE; + } + +#ifndef DR_FLAC_NO_CRC + if (header->crc8 != crc8) { + continue; /* CRC mismatch. Loop back to the top and find the next sync code. */ + } +#endif + return DRFLAC_TRUE; + } +} + +static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe) +{ + drflac_uint8 header; + int type; + + if (!drflac__read_uint8(bs, 8, &header)) { + return DRFLAC_FALSE; + } + + /* First bit should always be 0. */ + if ((header & 0x80) != 0) { + return DRFLAC_FALSE; + } + + /* + Default to 0 for the LPC order. It's important that we always set this to 0 for non LPC + and FIXED subframes because we'll be using it in a generic validation check later. + */ + pSubframe->lpcOrder = 0; + + type = (header & 0x7E) >> 1; + if (type == 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT; + } else if (type == 1) { + pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM; + } else { + if ((type & 0x20) != 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_LPC; + pSubframe->lpcOrder = (drflac_uint8)(type & 0x1F) + 1; + } else if ((type & 0x08) != 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED; + pSubframe->lpcOrder = (drflac_uint8)(type & 0x07); + if (pSubframe->lpcOrder > 4) { + pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; + pSubframe->lpcOrder = 0; + } + } else { + pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; + } + } + + if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) { + return DRFLAC_FALSE; + } + + /* Wasted bits per sample. */ + pSubframe->wastedBitsPerSample = 0; + if ((header & 0x01) == 1) { + unsigned int wastedBitsPerSample; + if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) { + return DRFLAC_FALSE; + } + pSubframe->wastedBitsPerSample = (drflac_uint8)wastedBitsPerSample + 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut) +{ + drflac_subframe* pSubframe; + drflac_uint32 subframeBitsPerSample; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(frame != NULL); + + pSubframe = frame->subframes + subframeIndex; + if (!drflac__read_subframe_header(bs, pSubframe)) { + return DRFLAC_FALSE; + } + + /* Side channels require an extra bit per sample. Took a while to figure that one out... */ + subframeBitsPerSample = frame->header.bitsPerSample; + if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { + subframeBitsPerSample += 1; + } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { + subframeBitsPerSample += 1; + } + + if (subframeBitsPerSample > 32) { + /* libFLAC and ffmpeg reject 33-bit subframes as well */ + return DRFLAC_FALSE; + } + + /* Need to handle wasted bits per sample. */ + if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) { + return DRFLAC_FALSE; + } + subframeBitsPerSample -= pSubframe->wastedBitsPerSample; + + pSubframe->pSamplesS32 = pDecodedSamplesOut; + + /* + pDecodedSamplesOut will be pointing to a buffer that was allocated with enough memory to store + maxBlockSizeInPCMFrames samples (as specified in the FLAC header). We need to guard against an + overflow here. At a higher level we are checking maxBlockSizeInPCMFrames from the header, but + here we need to do an additional check to ensure this frame's block size fully encompasses any + warmup samples which is determined by the LPC order. For non LPC and FIXED subframes, the LPC + order will be have been set to 0 in drflac__read_subframe_header(). + */ + if (frame->header.blockSizeInPCMFrames < pSubframe->lpcOrder) { + return DRFLAC_FALSE; + } + + switch (pSubframe->subframeType) + { + case DRFLAC_SUBFRAME_CONSTANT: + { + drflac__decode_samples__constant(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32); + } break; + + case DRFLAC_SUBFRAME_VERBATIM: + { + drflac__decode_samples__verbatim(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32); + } break; + + case DRFLAC_SUBFRAME_FIXED: + { + drflac__decode_samples__fixed(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32); + } break; + + case DRFLAC_SUBFRAME_LPC: + { + drflac__decode_samples__lpc(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32); + } break; + + default: return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex) +{ + drflac_subframe* pSubframe; + drflac_uint32 subframeBitsPerSample; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(frame != NULL); + + pSubframe = frame->subframes + subframeIndex; + if (!drflac__read_subframe_header(bs, pSubframe)) { + return DRFLAC_FALSE; + } + + /* Side channels require an extra bit per sample. Took a while to figure that one out... */ + subframeBitsPerSample = frame->header.bitsPerSample; + if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { + subframeBitsPerSample += 1; + } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { + subframeBitsPerSample += 1; + } + + /* Need to handle wasted bits per sample. */ + if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) { + return DRFLAC_FALSE; + } + subframeBitsPerSample -= pSubframe->wastedBitsPerSample; + + pSubframe->pSamplesS32 = NULL; + + switch (pSubframe->subframeType) + { + case DRFLAC_SUBFRAME_CONSTANT: + { + if (!drflac__seek_bits(bs, subframeBitsPerSample)) { + return DRFLAC_FALSE; + } + } break; + + case DRFLAC_SUBFRAME_VERBATIM: + { + unsigned int bitsToSeek = frame->header.blockSizeInPCMFrames * subframeBitsPerSample; + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + } break; + + case DRFLAC_SUBFRAME_FIXED: + { + unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample; + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) { + return DRFLAC_FALSE; + } + } break; + + case DRFLAC_SUBFRAME_LPC: + { + drflac_uint8 lpcPrecision; + + unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample; + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_uint8(bs, 4, &lpcPrecision)) { + return DRFLAC_FALSE; + } + if (lpcPrecision == 15) { + return DRFLAC_FALSE; /* Invalid. */ + } + lpcPrecision += 1; + + + bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5; /* +5 for shift. */ + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) { + return DRFLAC_FALSE; + } + } break; + + default: return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + + +static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment) +{ + drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2}; + + DRFLAC_ASSERT(channelAssignment <= 10); + return lookup[channelAssignment]; +} + +static drflac_result drflac__decode_flac_frame(drflac* pFlac) +{ + int channelCount; + int i; + drflac_uint8 paddingSizeInBits; + drflac_uint16 desiredCRC16; +#ifndef DR_FLAC_NO_CRC + drflac_uint16 actualCRC16; +#endif + + /* This function should be called while the stream is sitting on the first byte after the frame header. */ + DRFLAC_ZERO_MEMORY(pFlac->currentFLACFrame.subframes, sizeof(pFlac->currentFLACFrame.subframes)); + + /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */ + if (pFlac->currentFLACFrame.header.blockSizeInPCMFrames > pFlac->maxBlockSizeInPCMFrames) { + return DRFLAC_ERROR; + } + + /* The number of channels in the frame must match the channel count from the STREAMINFO block. */ + channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + if (channelCount != (int)pFlac->channels) { + return DRFLAC_ERROR; + } + + for (i = 0; i < channelCount; ++i) { + if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i, pFlac->pDecodedSamples + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames * i))) { + return DRFLAC_ERROR; + } + } + + paddingSizeInBits = (drflac_uint8)(DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7); + if (paddingSizeInBits > 0) { + drflac_uint8 padding = 0; + if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) { + return DRFLAC_AT_END; + } + } + +#ifndef DR_FLAC_NO_CRC + actualCRC16 = drflac__flush_crc16(&pFlac->bs); +#endif + if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) { + return DRFLAC_AT_END; + } + +#ifndef DR_FLAC_NO_CRC + if (actualCRC16 != desiredCRC16) { + return DRFLAC_CRC_MISMATCH; /* CRC mismatch. */ + } +#endif + + pFlac->currentFLACFrame.pcmFramesRemaining = pFlac->currentFLACFrame.header.blockSizeInPCMFrames; + + return DRFLAC_SUCCESS; +} + +static drflac_result drflac__seek_flac_frame(drflac* pFlac) +{ + int channelCount; + int i; + drflac_uint16 desiredCRC16; +#ifndef DR_FLAC_NO_CRC + drflac_uint16 actualCRC16; +#endif + + channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + for (i = 0; i < channelCount; ++i) { + if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i)) { + return DRFLAC_ERROR; + } + } + + /* Padding. */ + if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) { + return DRFLAC_ERROR; + } + + /* CRC. */ +#ifndef DR_FLAC_NO_CRC + actualCRC16 = drflac__flush_crc16(&pFlac->bs); +#endif + if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) { + return DRFLAC_AT_END; + } + +#ifndef DR_FLAC_NO_CRC + if (actualCRC16 != desiredCRC16) { + return DRFLAC_CRC_MISMATCH; /* CRC mismatch. */ + } +#endif + + return DRFLAC_SUCCESS; +} + +static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac) +{ + DRFLAC_ASSERT(pFlac != NULL); + + for (;;) { + drflac_result result; + + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + + result = drflac__decode_flac_frame(pFlac); + if (result != DRFLAC_SUCCESS) { + if (result == DRFLAC_CRC_MISMATCH) { + continue; /* CRC mismatch. Skip to the next frame. */ + } else { + return DRFLAC_FALSE; + } + } + + return DRFLAC_TRUE; + } +} + +static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame) +{ + drflac_uint64 firstPCMFrame; + drflac_uint64 lastPCMFrame; + + DRFLAC_ASSERT(pFlac != NULL); + + firstPCMFrame = pFlac->currentFLACFrame.header.pcmFrameNumber; + if (firstPCMFrame == 0) { + firstPCMFrame = ((drflac_uint64)pFlac->currentFLACFrame.header.flacFrameNumber) * pFlac->maxBlockSizeInPCMFrames; + } + + lastPCMFrame = firstPCMFrame + pFlac->currentFLACFrame.header.blockSizeInPCMFrames; + if (lastPCMFrame > 0) { + lastPCMFrame -= 1; /* Needs to be zero based. */ + } + + if (pFirstPCMFrame) { + *pFirstPCMFrame = firstPCMFrame; + } + if (pLastPCMFrame) { + *pLastPCMFrame = lastPCMFrame; + } +} + +static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac) +{ + drflac_bool32 result; + + DRFLAC_ASSERT(pFlac != NULL); + + result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes); + + DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame)); + pFlac->currentPCMFrame = 0; + + return result; +} + +static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac) +{ + /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */ + DRFLAC_ASSERT(pFlac != NULL); + return drflac__seek_flac_frame(pFlac); +} + + +static drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek) +{ + drflac_uint64 pcmFramesRead = 0; + while (pcmFramesToSeek > 0) { + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + if (pFlac->currentFLACFrame.pcmFramesRemaining > pcmFramesToSeek) { + pcmFramesRead += pcmFramesToSeek; + pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)pcmFramesToSeek; /* <-- Safe cast. Will always be < currentFrame.pcmFramesRemaining < 65536. */ + pcmFramesToSeek = 0; + } else { + pcmFramesRead += pFlac->currentFLACFrame.pcmFramesRemaining; + pcmFramesToSeek -= pFlac->currentFLACFrame.pcmFramesRemaining; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + } + } + } + + pFlac->currentPCMFrame += pcmFramesRead; + return pcmFramesRead; +} + + +static drflac_bool32 drflac__seek_to_pcm_frame__brute_force(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_bool32 isMidFrame = DRFLAC_FALSE; + drflac_uint64 runningPCMFrameCount; + + DRFLAC_ASSERT(pFlac != NULL); + + /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */ + if (pcmFrameIndex >= pFlac->currentPCMFrame) { + /* Seeking forward. Need to seek from the current position. */ + runningPCMFrameCount = pFlac->currentPCMFrame; + + /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */ + if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } else { + isMidFrame = DRFLAC_TRUE; + } + } else { + /* Seeking backwards. Need to seek from the start of the file. */ + runningPCMFrameCount = 0; + + /* Move back to the start. */ + if (!drflac__seek_to_first_frame(pFlac)) { + return DRFLAC_FALSE; + } + + /* Decode the first frame in preparation for sample-exact seeking below. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } + + /* + We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its + header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame. + */ + for (;;) { + drflac_uint64 pcmFrameCountInThisFLACFrame; + drflac_uint64 firstPCMFrameInFLACFrame = 0; + drflac_uint64 lastPCMFrameInFLACFrame = 0; + + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); + + pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; + if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) { + /* + The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend + it never existed and keep iterating. + */ + drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount; + + if (!isMidFrame) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* We started seeking mid-frame which means we need to skip the frame decoding part. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; + } + } else { + /* + It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this + frame never existed and leave the running sample count untouched. + */ + if (!isMidFrame) { + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningPCMFrameCount += pcmFrameCountInThisFLACFrame; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* + We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with + drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header. + */ + runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + isMidFrame = DRFLAC_FALSE; + } + + /* If we are seeking to the end of the file and we've just hit it, we're done. */ + if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) { + return DRFLAC_TRUE; + } + } + + next_iteration: + /* Grab the next frame in preparation for the next iteration. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } +} + + +#if !defined(DR_FLAC_NO_CRC) +/* +We use an average compression ratio to determine our approximate start location. FLAC files are generally about 50%-70% the size of their +uncompressed counterparts so we'll use this as a basis. I'm going to split the middle and use a factor of 0.6 to determine the starting +location. +*/ +#define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f + +static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFlac, drflac_uint64 targetByte, drflac_uint64 rangeLo, drflac_uint64 rangeHi, drflac_uint64* pLastSuccessfulSeekOffset) +{ + DRFLAC_ASSERT(pFlac != NULL); + DRFLAC_ASSERT(pLastSuccessfulSeekOffset != NULL); + DRFLAC_ASSERT(targetByte >= rangeLo); + DRFLAC_ASSERT(targetByte <= rangeHi); + + *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes; + + for (;;) { + /* After rangeLo == rangeHi == targetByte fails, we need to break out. */ + drflac_uint64 lastTargetByte = targetByte; + + /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */ + if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) { + /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */ + if (targetByte == 0) { + drflac__seek_to_first_frame(pFlac); /* Try to recover. */ + return DRFLAC_FALSE; + } + + /* Halve the byte location and continue. */ + targetByte = rangeLo + ((rangeHi - rangeLo)/2); + rangeHi = targetByte; + } else { + /* Getting here should mean that we have seeked to an appropriate byte. */ + + /* Clear the details of the FLAC frame so we don't misreport data. */ + DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame)); + + /* + Now seek to the next FLAC frame. We need to decode the entire frame (not just the header) because it's possible for the header to incorrectly pass the + CRC check and return bad data. We need to decode the entire frame to be more certain. Although this seems unlikely, this has happened to me in testing + so it needs to stay this way for now. + */ +#if 1 + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + /* Halve the byte location and continue. */ + targetByte = rangeLo + ((rangeHi - rangeLo)/2); + rangeHi = targetByte; + } else { + break; + } +#else + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + /* Halve the byte location and continue. */ + targetByte = rangeLo + ((rangeHi - rangeLo)/2); + rangeHi = targetByte; + } else { + break; + } +#endif + } + + /* We already tried this byte and there are no more to try, break out. */ + if(targetByte == lastTargetByte) { + return DRFLAC_FALSE; + } + } + + /* The current PCM frame needs to be updated based on the frame we just seeked to. */ + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL); + + DRFLAC_ASSERT(targetByte <= rangeHi); + + *pLastSuccessfulSeekOffset = targetByte; + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 offset) +{ + /* This section of code would be used if we were only decoding the FLAC frame header when calling drflac__seek_to_approximate_flac_frame_to_byte(). */ +#if 0 + if (drflac__decode_flac_frame(pFlac) != DRFLAC_SUCCESS) { + /* We failed to decode this frame which may be due to it being corrupt. We'll just use the next valid FLAC frame. */ + if (drflac__read_and_decode_next_flac_frame(pFlac) == DRFLAC_FALSE) { + return DRFLAC_FALSE; + } + } +#endif + + return drflac__seek_forward_by_pcm_frames(pFlac, offset) == offset; +} + + +static drflac_bool32 drflac__seek_to_pcm_frame__binary_search_internal(drflac* pFlac, drflac_uint64 pcmFrameIndex, drflac_uint64 byteRangeLo, drflac_uint64 byteRangeHi) +{ + /* This assumes pFlac->currentPCMFrame is sitting on byteRangeLo upon entry. */ + + drflac_uint64 targetByte; + drflac_uint64 pcmRangeLo = pFlac->totalPCMFrameCount; + drflac_uint64 pcmRangeHi = 0; + drflac_uint64 lastSuccessfulSeekOffset = (drflac_uint64)-1; + drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo; + drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096; + + targetByte = byteRangeLo + (drflac_uint64)(((drflac_int64)((pcmFrameIndex - pFlac->currentPCMFrame) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO); + if (targetByte > byteRangeHi) { + targetByte = byteRangeHi; + } + + for (;;) { + if (drflac__seek_to_approximate_flac_frame_to_byte(pFlac, targetByte, byteRangeLo, byteRangeHi, &lastSuccessfulSeekOffset)) { + /* We found a FLAC frame. We need to check if it contains the sample we're looking for. */ + drflac_uint64 newPCMRangeLo; + drflac_uint64 newPCMRangeHi; + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &newPCMRangeLo, &newPCMRangeHi); + + /* If we selected the same frame, it means we should be pretty close. Just decode the rest. */ + if (pcmRangeLo == newPCMRangeLo) { + if (!drflac__seek_to_approximate_flac_frame_to_byte(pFlac, closestSeekOffsetBeforeTargetPCMFrame, closestSeekOffsetBeforeTargetPCMFrame, byteRangeHi, &lastSuccessfulSeekOffset)) { + break; /* Failed to seek to closest frame. */ + } + + if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) { + return DRFLAC_TRUE; + } else { + break; /* Failed to seek forward. */ + } + } + + pcmRangeLo = newPCMRangeLo; + pcmRangeHi = newPCMRangeHi; + + if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) { + /* The target PCM frame is in this FLAC frame. */ + if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame) ) { + return DRFLAC_TRUE; + } else { + break; /* Failed to seek to FLAC frame. */ + } + } else { + const float approxCompressionRatio = (drflac_int64)(lastSuccessfulSeekOffset - pFlac->firstFLACFramePosInBytes) / ((drflac_int64)(pcmRangeLo * pFlac->channels * pFlac->bitsPerSample)/8.0f); + + if (pcmRangeLo > pcmFrameIndex) { + /* We seeked too far forward. We need to move our target byte backward and try again. */ + byteRangeHi = lastSuccessfulSeekOffset; + if (byteRangeLo > byteRangeHi) { + byteRangeLo = byteRangeHi; + } + + targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2); + if (targetByte < byteRangeLo) { + targetByte = byteRangeLo; + } + } else /*if (pcmRangeHi < pcmFrameIndex)*/ { + /* We didn't seek far enough. We need to move our target byte forward and try again. */ + + /* If we're close enough we can just seek forward. */ + if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) { + if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) { + return DRFLAC_TRUE; + } else { + break; /* Failed to seek to FLAC frame. */ + } + } else { + byteRangeLo = lastSuccessfulSeekOffset; + if (byteRangeHi < byteRangeLo) { + byteRangeHi = byteRangeLo; + } + + targetByte = lastSuccessfulSeekOffset + (drflac_uint64)(((drflac_int64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * approxCompressionRatio); + if (targetByte > byteRangeHi) { + targetByte = byteRangeHi; + } + + if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) { + closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset; + } + } + } + } + } else { + /* Getting here is really bad. We just recover as best we can, but moving to the first frame in the stream, and then abort. */ + break; + } + } + + drflac__seek_to_first_frame(pFlac); /* <-- Try to recover. */ + return DRFLAC_FALSE; +} + +static drflac_bool32 drflac__seek_to_pcm_frame__binary_search(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_uint64 byteRangeLo; + drflac_uint64 byteRangeHi; + drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096; + + /* Our algorithm currently assumes the FLAC stream is currently sitting at the start. */ + if (drflac__seek_to_first_frame(pFlac) == DRFLAC_FALSE) { + return DRFLAC_FALSE; + } + + /* If we're close enough to the start, just move to the start and seek forward. */ + if (pcmFrameIndex < seekForwardThreshold) { + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFrameIndex) == pcmFrameIndex; + } + + /* + Our starting byte range is the byte position of the first FLAC frame and the approximate end of the file as if it were completely uncompressed. This ensures + the entire file is included, even though most of the time it'll exceed the end of the actual stream. This is OK as the frame searching logic will handle it. + */ + byteRangeLo = pFlac->firstFLACFramePosInBytes; + byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f); + + return drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi); +} +#endif /* !DR_FLAC_NO_CRC */ + +static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_uint32 iClosestSeekpoint = 0; + drflac_bool32 isMidFrame = DRFLAC_FALSE; + drflac_uint64 runningPCMFrameCount; + drflac_uint32 iSeekpoint; + + + DRFLAC_ASSERT(pFlac != NULL); + + if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) { + return DRFLAC_FALSE; + } + + /* Do not use the seektable if pcmFramIndex is not coverd by it. */ + if (pFlac->pSeekpoints[0].firstPCMFrame > pcmFrameIndex) { + return DRFLAC_FALSE; + } + + for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { + if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) { + break; + } + + iClosestSeekpoint = iSeekpoint; + } + + /* There's been cases where the seek table contains only zeros. We need to do some basic validation on the closest seekpoint. */ + if (pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount == 0 || pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount > pFlac->maxBlockSizeInPCMFrames) { + return DRFLAC_FALSE; + } + if (pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame > pFlac->totalPCMFrameCount && pFlac->totalPCMFrameCount > 0) { + return DRFLAC_FALSE; + } + +#if !defined(DR_FLAC_NO_CRC) + /* At this point we should know the closest seek point. We can use a binary search for this. We need to know the total sample count for this. */ + if (pFlac->totalPCMFrameCount > 0) { + drflac_uint64 byteRangeLo; + drflac_uint64 byteRangeHi; + + byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f); + byteRangeLo = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset; + + /* + If our closest seek point is not the last one, we only need to search between it and the next one. The section below calculates an appropriate starting + value for byteRangeHi which will clamp it appropriately. + + Note that the next seekpoint must have an offset greater than the closest seekpoint because otherwise our binary search algorithm will break down. There + have been cases where a seektable consists of seek points where every byte offset is set to 0 which causes problems. If this happens we need to abort. + */ + if (iClosestSeekpoint < pFlac->seekpointCount-1) { + drflac_uint32 iNextSeekpoint = iClosestSeekpoint + 1; + + /* Basic validation on the seekpoints to ensure they're usable. */ + if (pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset >= pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset || pFlac->pSeekpoints[iNextSeekpoint].pcmFrameCount == 0) { + return DRFLAC_FALSE; /* The next seekpoint doesn't look right. The seek table cannot be trusted from here. Abort. */ + } + + if (pFlac->pSeekpoints[iNextSeekpoint].firstPCMFrame != (((drflac_uint64)0xFFFFFFFF << 32) | 0xFFFFFFFF)) { /* Make sure it's not a placeholder seekpoint. */ + byteRangeHi = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset - 1; /* byteRangeHi must be zero based. */ + } + } + + if (drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) { + if (drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL); + + if (drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi)) { + return DRFLAC_TRUE; + } + } + } + } +#endif /* !DR_FLAC_NO_CRC */ + + /* Getting here means we need to use a slower algorithm because the binary search method failed or cannot be used. */ + + /* + If we are seeking forward and the closest seekpoint is _before_ the current sample, we just seek forward from where we are. Otherwise we start seeking + from the seekpoint's first sample. + */ + if (pcmFrameIndex >= pFlac->currentPCMFrame && pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame <= pFlac->currentPCMFrame) { + /* Optimized case. Just seek forward from where we are. */ + runningPCMFrameCount = pFlac->currentPCMFrame; + + /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */ + if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } else { + isMidFrame = DRFLAC_TRUE; + } + } else { + /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */ + runningPCMFrameCount = pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame; + + if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) { + return DRFLAC_FALSE; + } + + /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } + + for (;;) { + drflac_uint64 pcmFrameCountInThisFLACFrame; + drflac_uint64 firstPCMFrameInFLACFrame = 0; + drflac_uint64 lastPCMFrameInFLACFrame = 0; + + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); + + pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; + if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) { + /* + The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend + it never existed and keep iterating. + */ + drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount; + + if (!isMidFrame) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* We started seeking mid-frame which means we need to skip the frame decoding part. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; + } + } else { + /* + It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this + frame never existed and leave the running sample count untouched. + */ + if (!isMidFrame) { + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningPCMFrameCount += pcmFrameCountInThisFLACFrame; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* + We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with + drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header. + */ + runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + isMidFrame = DRFLAC_FALSE; + } + + /* If we are seeking to the end of the file and we've just hit it, we're done. */ + if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) { + return DRFLAC_TRUE; + } + } + + next_iteration: + /* Grab the next frame in preparation for the next iteration. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } +} + + +#ifndef DR_FLAC_NO_OGG +typedef struct +{ + drflac_uint8 capturePattern[4]; /* Should be "OggS" */ + drflac_uint8 structureVersion; /* Always 0. */ + drflac_uint8 headerType; + drflac_uint64 granulePosition; + drflac_uint32 serialNumber; + drflac_uint32 sequenceNumber; + drflac_uint32 checksum; + drflac_uint8 segmentCount; + drflac_uint8 segmentTable[255]; +} drflac_ogg_page_header; +#endif + +typedef struct +{ + drflac_read_proc onRead; + drflac_seek_proc onSeek; + drflac_tell_proc onTell; + drflac_meta_proc onMeta; + drflac_container container; + void* pUserData; + void* pUserDataMD; + drflac_uint32 sampleRate; + drflac_uint8 channels; + drflac_uint8 bitsPerSample; + drflac_uint64 totalPCMFrameCount; + drflac_uint16 maxBlockSizeInPCMFrames; + drflac_uint64 runningFilePos; + drflac_bool32 hasStreamInfoBlock; + drflac_bool32 hasMetadataBlocks; + drflac_bs bs; /* <-- A bit streamer is required for loading data during initialization. */ + drflac_frame_header firstFrameHeader; /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */ + +#ifndef DR_FLAC_NO_OGG + drflac_uint32 oggSerial; + drflac_uint64 oggFirstBytePos; + drflac_ogg_page_header oggBosHeader; +#endif +} drflac_init_info; + +static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize) +{ + blockHeader = drflac__be2host_32(blockHeader); + *isLastBlock = (drflac_uint8)((blockHeader & 0x80000000UL) >> 31); + *blockType = (drflac_uint8)((blockHeader & 0x7F000000UL) >> 24); + *blockSize = (blockHeader & 0x00FFFFFFUL); +} + +static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize) +{ + drflac_uint32 blockHeader; + + *blockSize = 0; + if (onRead(pUserData, &blockHeader, 4) != 4) { + return DRFLAC_FALSE; + } + + drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize); + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo) +{ + drflac_uint32 blockSizes; + drflac_uint64 frameSizes = 0; + drflac_uint64 importantProps; + drflac_uint8 md5[16]; + + /* min/max block size. */ + if (onRead(pUserData, &blockSizes, 4) != 4) { + return DRFLAC_FALSE; + } + + /* min/max frame size. */ + if (onRead(pUserData, &frameSizes, 6) != 6) { + return DRFLAC_FALSE; + } + + /* Sample rate, channels, bits per sample and total sample count. */ + if (onRead(pUserData, &importantProps, 8) != 8) { + return DRFLAC_FALSE; + } + + /* MD5 */ + if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) { + return DRFLAC_FALSE; + } + + blockSizes = drflac__be2host_32(blockSizes); + frameSizes = drflac__be2host_64(frameSizes); + importantProps = drflac__be2host_64(importantProps); + + pStreamInfo->minBlockSizeInPCMFrames = (drflac_uint16)((blockSizes & 0xFFFF0000) >> 16); + pStreamInfo->maxBlockSizeInPCMFrames = (drflac_uint16) (blockSizes & 0x0000FFFF); + pStreamInfo->minFrameSizeInPCMFrames = (drflac_uint32)((frameSizes & (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40); + pStreamInfo->maxFrameSizeInPCMFrames = (drflac_uint32)((frameSizes & (((drflac_uint64)0x00FFFFFF << 16) << 0)) >> 16); + pStreamInfo->sampleRate = (drflac_uint32)((importantProps & (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44); + pStreamInfo->channels = (drflac_uint8 )((importantProps & (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1; + pStreamInfo->bitsPerSample = (drflac_uint8 )((importantProps & (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1; + pStreamInfo->totalPCMFrameCount = ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF))); + DRFLAC_COPY_MEMORY(pStreamInfo->md5, md5, sizeof(md5)); + + return DRFLAC_TRUE; +} + + +static void* drflac__malloc_default(size_t sz, void* pUserData) +{ + (void)pUserData; + return DRFLAC_MALLOC(sz); +} + +static void* drflac__realloc_default(void* p, size_t sz, void* pUserData) +{ + (void)pUserData; + return DRFLAC_REALLOC(p, sz); +} + +static void drflac__free_default(void* p, void* pUserData) +{ + (void)pUserData; + DRFLAC_FREE(p); +} + + +static void* drflac__malloc_from_callbacks(size_t sz, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onMalloc != NULL) { + return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData); + } + + /* Try using realloc(). */ + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData); + } + + return NULL; +} + +static void* drflac__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData); + } + + /* Try emulating realloc() in terms of malloc()/free(). */ + if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) { + void* p2; + + p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData); + if (p2 == NULL) { + return NULL; + } + + if (p != NULL) { + DRFLAC_COPY_MEMORY(p2, p, szOld); + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + } + + return p2; + } + + return NULL; +} + +static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (p == NULL || pAllocationCallbacks == NULL) { + return; + } + + if (pAllocationCallbacks->onFree != NULL) { + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + } +} + + +static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeekpointCount, drflac_allocation_callbacks* pAllocationCallbacks) +{ + /* + We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that + we'll be sitting on byte 42. + */ + drflac_uint64 runningFilePos = 42; + drflac_uint64 seektablePos = 0; + drflac_uint32 seektableSize = 0; + + (void)onTell; + + for (;;) { + drflac_metadata metadata; + drflac_uint8 isLastBlock = 0; + drflac_uint8 blockType = 0; + drflac_uint32 blockSize; + if (drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize) == DRFLAC_FALSE) { + return DRFLAC_FALSE; + } + runningFilePos += 4; + + metadata.type = blockType; + metadata.rawDataSize = 0; + metadata.rawDataOffset = runningFilePos; + metadata.pRawData = NULL; + + switch (blockType) + { + case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION: + { + if (blockSize < 4) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + metadata.data.application.id = drflac__be2host_32(*(drflac_uint32*)pRawData); + metadata.data.application.pData = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32)); + metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32); + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE: + { + seektablePos = runningFilePos; + seektableSize = blockSize; + + if (onMeta) { + drflac_uint32 seekpointCount; + drflac_uint32 iSeekpoint; + void* pRawData; + + seekpointCount = blockSize/DRFLAC_SEEKPOINT_SIZE_IN_BYTES; + + pRawData = drflac__malloc_from_callbacks(seekpointCount * sizeof(drflac_seekpoint), pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + /* We need to read seekpoint by seekpoint and do some processing. */ + for (iSeekpoint = 0; iSeekpoint < seekpointCount; ++iSeekpoint) { + drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint; + + if (onRead(pUserData, pSeekpoint, DRFLAC_SEEKPOINT_SIZE_IN_BYTES) != DRFLAC_SEEKPOINT_SIZE_IN_BYTES) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + /* Endian swap. */ + pSeekpoint->firstPCMFrame = drflac__be2host_64(pSeekpoint->firstPCMFrame); + pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset); + pSeekpoint->pcmFrameCount = drflac__be2host_16(pSeekpoint->pcmFrameCount); + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + metadata.data.seektable.seekpointCount = seekpointCount; + metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData; + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT: + { + if (blockSize < 8) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData; + const char* pRunningData; + const char* pRunningDataEnd; + drflac_uint32 i; + + pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + + pRunningData = (const char*)pRawData; + pRunningDataEnd = (const char*)pRawData + blockSize; + + metadata.data.vorbis_comment.vendorLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4; + + /* Need space for the rest of the block */ + if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + metadata.data.vorbis_comment.vendor = pRunningData; pRunningData += metadata.data.vorbis_comment.vendorLength; + metadata.data.vorbis_comment.commentCount = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4; + + /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */ + if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + metadata.data.vorbis_comment.pComments = pRunningData; + + /* Check that the comments section is valid before passing it to the callback */ + for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) { + drflac_uint32 commentLength; + + if (pRunningDataEnd - pRunningData < 4) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + commentLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4; + if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + pRunningData += commentLength; + } + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET: + { + if (blockSize < 396) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData; + const char* pRunningData; + const char* pRunningDataEnd; + size_t bufferSize; + drflac_uint8 iTrack; + drflac_uint8 iIndex; + void* pTrackData; + + /* + This needs to be loaded in two passes. The first pass is used to calculate the size of the memory allocation + we need for storing the necessary data. The second pass will fill that buffer with usable data. + */ + pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + + pRunningData = (const char*)pRawData; + pRunningDataEnd = (const char*)pRawData + blockSize; + + DRFLAC_COPY_MEMORY(metadata.data.cuesheet.catalog, pRunningData, 128); pRunningData += 128; + metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8; + metadata.data.cuesheet.isCD = (pRunningData[0] & 0x80) != 0; pRunningData += 259; + metadata.data.cuesheet.trackCount = pRunningData[0]; pRunningData += 1; + metadata.data.cuesheet.pTrackData = NULL; /* Will be filled later. */ + + /* Pass 1: Calculate the size of the buffer for the track data. */ + { + const char* pRunningDataSaved = pRunningData; /* Will be restored at the end in preparation for the second pass. */ + + bufferSize = metadata.data.cuesheet.trackCount * DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES; + + for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) { + drflac_uint8 indexCount; + drflac_uint32 indexPointSize; + + if (pRunningDataEnd - pRunningData < DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + /* Skip to the index point count */ + pRunningData += 35; + + indexCount = pRunningData[0]; + pRunningData += 1; + + bufferSize += indexCount * sizeof(drflac_cuesheet_track_index); + + /* Quick validation check. */ + indexPointSize = indexCount * DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES; + if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + pRunningData += indexPointSize; + } + + pRunningData = pRunningDataSaved; + } + + /* Pass 2: Allocate a buffer and fill the data. Validation was done in the step above so can be skipped. */ + { + char* pRunningTrackData; + + pTrackData = drflac__malloc_from_callbacks(bufferSize, pAllocationCallbacks); + if (pTrackData == NULL) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + pRunningTrackData = (char*)pTrackData; + + for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) { + drflac_uint8 indexCount; + + DRFLAC_COPY_MEMORY(pRunningTrackData, pRunningData, DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES); + pRunningData += DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES-1; /* Skip forward, but not beyond the last byte in the CUESHEET_TRACK block which is the index count. */ + pRunningTrackData += DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES-1; + + /* Grab the index count for the next part. */ + indexCount = pRunningData[0]; + pRunningData += 1; + pRunningTrackData += 1; + + /* Extract each track index. */ + for (iIndex = 0; iIndex < indexCount; ++iIndex) { + drflac_cuesheet_track_index* pTrackIndex = (drflac_cuesheet_track_index*)pRunningTrackData; + + DRFLAC_COPY_MEMORY(pRunningTrackData, pRunningData, DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES); + pRunningData += DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES; + pRunningTrackData += sizeof(drflac_cuesheet_track_index); + + pTrackIndex->offset = drflac__be2host_64(pTrackIndex->offset); + } + } + + metadata.data.cuesheet.pTrackData = pTrackData; + } + + /* The original data is no longer needed. */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + pRawData = NULL; + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pTrackData, pAllocationCallbacks); + pTrackData = NULL; + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_PICTURE: + { + if (blockSize < 32) { + return DRFLAC_FALSE; + } + + if (onMeta) { + drflac_bool32 result = DRFLAC_TRUE; + drflac_uint32 blockSizeRemaining = blockSize; + char* pMime = NULL; + char* pDescription = NULL; + void* pPictureData = NULL; + + if (blockSizeRemaining < 4 || onRead(pUserData, &metadata.data.picture.type, 4) != 4) { + result = DRFLAC_FALSE; + goto done_flac; + } + blockSizeRemaining -= 4; + metadata.data.picture.type = drflac__be2host_32(metadata.data.picture.type); + + + if (blockSizeRemaining < 4 || onRead(pUserData, &metadata.data.picture.mimeLength, 4) != 4) { + result = DRFLAC_FALSE; + goto done_flac; + } + blockSizeRemaining -= 4; + metadata.data.picture.mimeLength = drflac__be2host_32(metadata.data.picture.mimeLength); + + pMime = (char*)drflac__malloc_from_callbacks(metadata.data.picture.mimeLength + 1, pAllocationCallbacks); /* +1 for null terminator. */ + if (pMime == NULL) { + result = DRFLAC_FALSE; + goto done_flac; + } + + if (blockSizeRemaining < metadata.data.picture.mimeLength || onRead(pUserData, pMime, metadata.data.picture.mimeLength) != metadata.data.picture.mimeLength) { + result = DRFLAC_FALSE; + goto done_flac; + } + blockSizeRemaining -= metadata.data.picture.mimeLength; + pMime[metadata.data.picture.mimeLength] = '\0'; /* Null terminate for safety. */ + metadata.data.picture.mime = (const char*)pMime; + + + if (blockSizeRemaining < 4 || onRead(pUserData, &metadata.data.picture.descriptionLength, 4) != 4) { + result = DRFLAC_FALSE; + goto done_flac; + } + blockSizeRemaining -= 4; + metadata.data.picture.descriptionLength = drflac__be2host_32(metadata.data.picture.descriptionLength); + + pDescription = (char*)drflac__malloc_from_callbacks(metadata.data.picture.descriptionLength + 1, pAllocationCallbacks); /* +1 for null terminator. */ + if (pDescription == NULL) { + result = DRFLAC_FALSE; + goto done_flac; + } + + if (blockSizeRemaining < metadata.data.picture.descriptionLength || onRead(pUserData, pDescription, metadata.data.picture.descriptionLength) != metadata.data.picture.descriptionLength) { + result = DRFLAC_FALSE; + goto done_flac; + } + blockSizeRemaining -= metadata.data.picture.descriptionLength; + pDescription[metadata.data.picture.descriptionLength] = '\0'; /* Null terminate for safety. */ + metadata.data.picture.description = (const char*)pDescription; + + + if (blockSizeRemaining < 4 || onRead(pUserData, &metadata.data.picture.width, 4) != 4) { + result = DRFLAC_FALSE; + goto done_flac; + } + blockSizeRemaining -= 4; + metadata.data.picture.width = drflac__be2host_32(metadata.data.picture.width); + + if (blockSizeRemaining < 4 || onRead(pUserData, &metadata.data.picture.height, 4) != 4) { + result = DRFLAC_FALSE; + goto done_flac; + } + blockSizeRemaining -= 4; + metadata.data.picture.height = drflac__be2host_32(metadata.data.picture.height); + + if (blockSizeRemaining < 4 || onRead(pUserData, &metadata.data.picture.colorDepth, 4) != 4) { + result = DRFLAC_FALSE; + goto done_flac; + } + blockSizeRemaining -= 4; + metadata.data.picture.colorDepth = drflac__be2host_32(metadata.data.picture.colorDepth); + + if (blockSizeRemaining < 4 || onRead(pUserData, &metadata.data.picture.indexColorCount, 4) != 4) { + result = DRFLAC_FALSE; + goto done_flac; + } + blockSizeRemaining -= 4; + metadata.data.picture.indexColorCount = drflac__be2host_32(metadata.data.picture.indexColorCount); + + + /* Picture data. */ + if (blockSizeRemaining < 4 || onRead(pUserData, &metadata.data.picture.pictureDataSize, 4) != 4) { + result = DRFLAC_FALSE; + goto done_flac; + } + blockSizeRemaining -= 4; + metadata.data.picture.pictureDataSize = drflac__be2host_32(metadata.data.picture.pictureDataSize); + + if (blockSizeRemaining < metadata.data.picture.pictureDataSize) { + result = DRFLAC_FALSE; + goto done_flac; + } + + /* For the actual image data we want to store the offset to the start of the stream. */ + metadata.data.picture.pictureDataOffset = runningFilePos + (blockSize - blockSizeRemaining); + + /* + For the allocation of image data, we can allow memory allocation to fail, in which case we just leave + the pointer as null. If it fails, we need to fall back to seeking past the image data. + */ + #ifndef DR_FLAC_NO_PICTURE_METADATA_MALLOC + pPictureData = drflac__malloc_from_callbacks(metadata.data.picture.pictureDataSize, pAllocationCallbacks); + if (pPictureData != NULL) { + if (onRead(pUserData, pPictureData, metadata.data.picture.pictureDataSize) != metadata.data.picture.pictureDataSize) { + result = DRFLAC_FALSE; + goto done_flac; + } + } else + #endif + { + /* Allocation failed. We need to seek past the picture data. */ + if (!onSeek(pUserData, metadata.data.picture.pictureDataSize, DRFLAC_SEEK_CUR)) { + result = DRFLAC_FALSE; + goto done_flac; + } + } + + blockSizeRemaining -= metadata.data.picture.pictureDataSize; + metadata.data.picture.pPictureData = (const drflac_uint8*)pPictureData; + + + /* Only fire the callback if we actually have a way to read the image data. We must have either a valid offset, or a valid data pointer. */ + if (metadata.data.picture.pictureDataOffset != 0 || metadata.data.picture.pPictureData != NULL) { + onMeta(pUserDataMD, &metadata); + } else { + /* Don't have a valid offset or data pointer, so just pretend we don't have a picture metadata. */ + } + + done_flac: + drflac__free_from_callbacks(pMime, pAllocationCallbacks); + drflac__free_from_callbacks(pDescription, pAllocationCallbacks); + drflac__free_from_callbacks(pPictureData, pAllocationCallbacks); + + if (result != DRFLAC_TRUE) { + return DRFLAC_FALSE; + } + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_PADDING: + { + if (onMeta) { + metadata.data.padding.unused = 0; + + /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */ + if (!onSeek(pUserData, blockSize, DRFLAC_SEEK_CUR)) { + isLastBlock = DRFLAC_TRUE; /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */ + } else { + onMeta(pUserDataMD, &metadata); + } + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_INVALID: + { + /* Invalid chunk. Just skip over this one. */ + if (onMeta) { + if (!onSeek(pUserData, blockSize, DRFLAC_SEEK_CUR)) { + isLastBlock = DRFLAC_TRUE; /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */ + } + } + } break; + + default: + { + /* + It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we + can at the very least report the chunk to the application and let it look at the raw data. + */ + if (onMeta) { + void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData != NULL) { + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + } else { + /* Allocation failed. We need to seek past the block. */ + if (!onSeek(pUserData, blockSize, DRFLAC_SEEK_CUR)) { + return DRFLAC_FALSE; + } + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + } + + /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */ + if (onMeta == NULL && blockSize > 0) { + if (!onSeek(pUserData, blockSize, DRFLAC_SEEK_CUR)) { + isLastBlock = DRFLAC_TRUE; + } + } + + runningFilePos += blockSize; + if (isLastBlock) { + break; + } + } + + *pSeektablePos = seektablePos; + *pSeekpointCount = seektableSize / DRFLAC_SEEKPOINT_SIZE_IN_BYTES; + *pFirstFramePos = runningFilePos; + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed) +{ + /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */ + + drflac_uint8 isLastBlock; + drflac_uint8 blockType; + drflac_uint32 blockSize; + + (void)onSeek; + + pInit->container = drflac_container_native; + + /* The first metadata block should be the STREAMINFO block. */ + if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { + return DRFLAC_FALSE; + } + + if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) { + if (!relaxed) { + /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */ + return DRFLAC_FALSE; + } else { + /* + Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined + for that frame. + */ + pInit->hasStreamInfoBlock = DRFLAC_FALSE; + pInit->hasMetadataBlocks = DRFLAC_FALSE; + + if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) { + return DRFLAC_FALSE; /* Couldn't find a frame. */ + } + + if (pInit->firstFrameHeader.bitsPerSample == 0) { + return DRFLAC_FALSE; /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */ + } + + pInit->sampleRate = pInit->firstFrameHeader.sampleRate; + pInit->channels = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment); + pInit->bitsPerSample = pInit->firstFrameHeader.bitsPerSample; + pInit->maxBlockSizeInPCMFrames = 65535; /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */ + return DRFLAC_TRUE; + } + } else { + drflac_streaminfo streaminfo; + if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) { + return DRFLAC_FALSE; + } + + pInit->hasStreamInfoBlock = DRFLAC_TRUE; + pInit->sampleRate = streaminfo.sampleRate; + pInit->channels = streaminfo.channels; + pInit->bitsPerSample = streaminfo.bitsPerSample; + pInit->totalPCMFrameCount = streaminfo.totalPCMFrameCount; + pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames; /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */ + pInit->hasMetadataBlocks = !isLastBlock; + + if (onMeta) { + drflac_metadata metadata; + metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO; + metadata.pRawData = NULL; + metadata.rawDataSize = 0; + metadata.data.streaminfo = streaminfo; + onMeta(pUserDataMD, &metadata); + } + + return DRFLAC_TRUE; + } +} + +#ifndef DR_FLAC_NO_OGG +#define DRFLAC_OGG_MAX_PAGE_SIZE 65307 +#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32 1605413199 /* CRC-32 of "OggS". */ + +typedef enum +{ + drflac_ogg_recover_on_crc_mismatch, + drflac_ogg_fail_on_crc_mismatch +} drflac_ogg_crc_mismatch_recovery; + +#ifndef DR_FLAC_NO_CRC +static drflac_uint32 drflac__crc32_table[] = { + 0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L, + 0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L, + 0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L, + 0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL, + 0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L, + 0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L, + 0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L, + 0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL, + 0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L, + 0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L, + 0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L, + 0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL, + 0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L, + 0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L, + 0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L, + 0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL, + 0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL, + 0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L, + 0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L, + 0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL, + 0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL, + 0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L, + 0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L, + 0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL, + 0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL, + 0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L, + 0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L, + 0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL, + 0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL, + 0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L, + 0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L, + 0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL, + 0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L, + 0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL, + 0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL, + 0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L, + 0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L, + 0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL, + 0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL, + 0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L, + 0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L, + 0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL, + 0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL, + 0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L, + 0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L, + 0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL, + 0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL, + 0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L, + 0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L, + 0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL, + 0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L, + 0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L, + 0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L, + 0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL, + 0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L, + 0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L, + 0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L, + 0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL, + 0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L, + 0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L, + 0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L, + 0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL, + 0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L, + 0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L +}; +#endif + +static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data) +{ +#ifndef DR_FLAC_NO_CRC + return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data]; +#else + (void)data; + return crc32; +#endif +} + +#if 0 +static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data) +{ + crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF)); + crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF)); + crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 8) & 0xFF)); + crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 0) & 0xFF)); + return crc32; +} + +static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data) +{ + crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF)); + crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 0) & 0xFFFFFFFF)); + return crc32; +} +#endif + +static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize) +{ + /* This can be optimized. */ + drflac_uint32 i; + for (i = 0; i < dataSize; ++i) { + crc32 = drflac_crc32_byte(crc32, pData[i]); + } + return crc32; +} + + +static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4]) +{ + return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S'; +} + +static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader) +{ + return 27 + pHeader->segmentCount; +} + +static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader) +{ + drflac_uint32 pageBodySize = 0; + int i; + + for (i = 0; i < pHeader->segmentCount; ++i) { + pageBodySize += pHeader->segmentTable[i]; + } + + return pageBodySize; +} + +static drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32) +{ + drflac_uint8 data[23]; + drflac_uint32 i; + + DRFLAC_ASSERT(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32); + + if (onRead(pUserData, data, 23) != 23) { + return DRFLAC_AT_END; + } + *pBytesRead += 23; + + /* + It's not actually used, but set the capture pattern to 'OggS' for completeness. Not doing this will cause static analysers to complain about + us trying to access uninitialized data. We could alternatively just comment out this member of the drflac_ogg_page_header structure, but I + like to have it map to the structure of the underlying data. + */ + pHeader->capturePattern[0] = 'O'; + pHeader->capturePattern[1] = 'g'; + pHeader->capturePattern[2] = 'g'; + pHeader->capturePattern[3] = 'S'; + + pHeader->structureVersion = data[0]; + pHeader->headerType = data[1]; + DRFLAC_COPY_MEMORY(&pHeader->granulePosition, &data[ 2], 8); + DRFLAC_COPY_MEMORY(&pHeader->serialNumber, &data[10], 4); + DRFLAC_COPY_MEMORY(&pHeader->sequenceNumber, &data[14], 4); + DRFLAC_COPY_MEMORY(&pHeader->checksum, &data[18], 4); + pHeader->segmentCount = data[22]; + + /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */ + data[18] = 0; + data[19] = 0; + data[20] = 0; + data[21] = 0; + + for (i = 0; i < 23; ++i) { + *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]); + } + + + if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) { + return DRFLAC_AT_END; + } + *pBytesRead += pHeader->segmentCount; + + for (i = 0; i < pHeader->segmentCount; ++i) { + *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]); + } + + return DRFLAC_SUCCESS; +} + +static drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32) +{ + drflac_uint8 id[4]; + + *pBytesRead = 0; + + if (onRead(pUserData, id, 4) != 4) { + return DRFLAC_AT_END; + } + *pBytesRead += 4; + + /* We need to read byte-by-byte until we find the OggS capture pattern. */ + for (;;) { + if (drflac_ogg__is_capture_pattern(id)) { + drflac_result result; + + *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32; + + result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32); + if (result == DRFLAC_SUCCESS) { + return DRFLAC_SUCCESS; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + continue; + } else { + return result; + } + } + } else { + /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */ + id[0] = id[1]; + id[1] = id[2]; + id[2] = id[3]; + if (onRead(pUserData, &id[3], 1) != 1) { + return DRFLAC_AT_END; + } + *pBytesRead += 1; + } + } +} + + +/* +The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works +in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed +in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type +dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from +the physical Ogg bitstream are converted and delivered in native FLAC format. +*/ +typedef struct +{ + drflac_read_proc onRead; /* The original onRead callback from drflac_open() and family. */ + drflac_seek_proc onSeek; /* The original onSeek callback from drflac_open() and family. */ + drflac_tell_proc onTell; /* The original onTell callback from drflac_open() and family. */ + void* pUserData; /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */ + drflac_uint64 currentBytePos; /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */ + drflac_uint64 firstBytePos; /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */ + drflac_uint32 serialNumber; /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */ + drflac_ogg_page_header bosPageHeader; /* Used for seeking. */ + drflac_ogg_page_header currentPageHeader; + drflac_uint32 bytesRemainingInPage; + drflac_uint32 pageDataSize; + drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE]; +} drflac_oggbs; /* oggbs = Ogg Bitstream */ + +static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead) +{ + size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead); + oggbs->currentBytePos += bytesActuallyRead; + + return bytesActuallyRead; +} + +static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin) +{ + if (origin == DRFLAC_SEEK_SET) { + if (offset <= 0x7FFFFFFF) { + if (!oggbs->onSeek(oggbs->pUserData, (int)offset, DRFLAC_SEEK_SET)) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos = offset; + + return DRFLAC_TRUE; + } else { + if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, DRFLAC_SEEK_SET)) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos = offset; + + return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, DRFLAC_SEEK_CUR); + } + } else { + while (offset > 0x7FFFFFFF) { + if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, DRFLAC_SEEK_CUR)) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos += 0x7FFFFFFF; + offset -= 0x7FFFFFFF; + } + + if (!oggbs->onSeek(oggbs->pUserData, (int)offset, DRFLAC_SEEK_CUR)) { /* <-- Safe cast thanks to the loop above. */ + return DRFLAC_FALSE; + } + oggbs->currentBytePos += offset; + + return DRFLAC_TRUE; + } +} + +static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod) +{ + drflac_ogg_page_header header; + for (;;) { + drflac_uint32 crc32 = 0; + drflac_uint32 bytesRead; + drflac_uint32 pageBodySize; +#ifndef DR_FLAC_NO_CRC + drflac_uint32 actualCRC32; +#endif + + if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos += bytesRead; + + pageBodySize = drflac_ogg__get_page_body_size(&header); + if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) { + continue; /* Invalid page size. Assume it's corrupted and just move to the next page. */ + } + + if (header.serialNumber != oggbs->serialNumber) { + /* It's not a FLAC page. Skip it. */ + if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, DRFLAC_SEEK_CUR)) { + return DRFLAC_FALSE; + } + continue; + } + + + /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */ + if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) { + return DRFLAC_FALSE; + } + oggbs->pageDataSize = pageBodySize; + +#ifndef DR_FLAC_NO_CRC + actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize); + if (actualCRC32 != header.checksum) { + if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) { + continue; /* CRC mismatch. Skip this page. */ + } else { + /* + Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we + go to the next valid page to ensure we're in a good state, but return false to let the caller know that the + seek did not fully complete. + */ + drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch); + return DRFLAC_FALSE; + } + } +#else + (void)recoveryMethod; /* <-- Silence a warning. */ +#endif + + oggbs->currentPageHeader = header; + oggbs->bytesRemainingInPage = pageBodySize; + return DRFLAC_TRUE; + } +} + +/* Function below is unused at the moment, but I might be re-adding it later. */ +#if 0 +static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg) +{ + drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage; + drflac_uint8 iSeg = 0; + drflac_uint32 iByte = 0; + while (iByte < bytesConsumedInPage) { + drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg]; + if (iByte + segmentSize > bytesConsumedInPage) { + break; + } else { + iSeg += 1; + iByte += segmentSize; + } + } + + *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte); + return iSeg; +} + +static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs) +{ + /* The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. */ + for (;;) { + drflac_bool32 atEndOfPage = DRFLAC_FALSE; + + drflac_uint8 bytesRemainingInSeg; + drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg); + + drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg; + for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) { + drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg]; + if (segmentSize < 255) { + if (iSeg == oggbs->currentPageHeader.segmentCount-1) { + atEndOfPage = DRFLAC_TRUE; + } + + break; + } + + bytesToEndOfPacketOrPage += segmentSize; + } + + /* + At this point we will have found either the packet or the end of the page. If were at the end of the page we'll + want to load the next page and keep searching for the end of the packet. + */ + drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, DRFLAC_SEEK_CUR); + oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage; + + if (atEndOfPage) { + /* + We're potentially at the next packet, but we need to check the next page first to be sure because the packet may + straddle pages. + */ + if (!drflac_oggbs__goto_next_page(oggbs)) { + return DRFLAC_FALSE; + } + + /* If it's a fresh packet it most likely means we're at the next packet. */ + if ((oggbs->currentPageHeader.headerType & 0x01) == 0) { + return DRFLAC_TRUE; + } + } else { + /* We're at the next packet. */ + return DRFLAC_TRUE; + } + } +} + +static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs) +{ + /* The bitstream should be sitting on the first byte just after the header of the frame. */ + + /* What we're actually doing here is seeking to the start of the next packet. */ + return drflac_oggbs__seek_to_next_packet(oggbs); +} +#endif + +static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + drflac_oggbs* oggbs = (drflac_oggbs*)pUserData; + drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut; + size_t bytesRead = 0; + + DRFLAC_ASSERT(oggbs != NULL); + DRFLAC_ASSERT(pRunningBufferOut != NULL); + + /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */ + while (bytesRead < bytesToRead) { + size_t bytesRemainingToRead = bytesToRead - bytesRead; + + if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) { + DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead); + bytesRead += bytesRemainingToRead; + oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead; + break; + } + + /* If we get here it means some of the requested data is contained in the next pages. */ + if (oggbs->bytesRemainingInPage > 0) { + DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage); + bytesRead += oggbs->bytesRemainingInPage; + pRunningBufferOut += oggbs->bytesRemainingInPage; + oggbs->bytesRemainingInPage = 0; + } + + DRFLAC_ASSERT(bytesRemainingToRead > 0); + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { + break; /* Failed to go to the next page. Might have simply hit the end of the stream. */ + } + } + + return bytesRead; +} + +static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin) +{ + drflac_oggbs* oggbs = (drflac_oggbs*)pUserData; + int bytesSeeked = 0; + + DRFLAC_ASSERT(oggbs != NULL); + DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */ + + /* Seeking is always forward which makes things a lot simpler. */ + if (origin == DRFLAC_SEEK_SET) { + if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, DRFLAC_SEEK_SET)) { + return DRFLAC_FALSE; + } + + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) { + return DRFLAC_FALSE; + } + + return drflac__on_seek_ogg(pUserData, offset, DRFLAC_SEEK_CUR); + } else if (origin == DRFLAC_SEEK_CUR) { + while (bytesSeeked < offset) { + int bytesRemainingToSeek = offset - bytesSeeked; + DRFLAC_ASSERT(bytesRemainingToSeek >= 0); + + if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) { + bytesSeeked += bytesRemainingToSeek; + (void)bytesSeeked; /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */ + oggbs->bytesRemainingInPage -= bytesRemainingToSeek; + break; + } + + /* If we get here it means some of the requested data is contained in the next pages. */ + if (oggbs->bytesRemainingInPage > 0) { + bytesSeeked += (int)oggbs->bytesRemainingInPage; + oggbs->bytesRemainingInPage = 0; + } + + DRFLAC_ASSERT(bytesRemainingToSeek > 0); + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) { + /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */ + return DRFLAC_FALSE; + } + } + } else if (origin == DRFLAC_SEEK_END) { + /* Seeking to the end is not supported. */ + return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__on_tell_ogg(void* pUserData, drflac_int64* pCursor) +{ + /* + Not implemented for Ogg containers because we don't currently track the byte position of the logical bitstream. To support this, we'll need + to track the position in drflac__on_read_ogg and drflac__on_seek_ogg. + */ + (void)pUserData; + (void)pCursor; + return DRFLAC_FALSE; +} + + +static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + drflac_uint64 originalBytePos; + drflac_uint64 runningGranulePosition; + drflac_uint64 runningFrameBytePos; + drflac_uint64 runningPCMFrameCount; + + DRFLAC_ASSERT(oggbs != NULL); + + originalBytePos = oggbs->currentBytePos; /* For recovery. Points to the OggS identifier. */ + + /* First seek to the first frame. */ + if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes)) { + return DRFLAC_FALSE; + } + oggbs->bytesRemainingInPage = 0; + + runningGranulePosition = 0; + for (;;) { + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { + drflac_oggbs__seek_physical(oggbs, originalBytePos, DRFLAC_SEEK_SET); + return DRFLAC_FALSE; /* Never did find that sample... */ + } + + runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize; + if (oggbs->currentPageHeader.granulePosition >= pcmFrameIndex) { + break; /* The sample is somewhere in the previous page. */ + } + + /* + At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we + disregard any pages that do not begin a fresh packet. + */ + if ((oggbs->currentPageHeader.headerType & 0x01) == 0) { /* <-- Is it a fresh page? */ + if (oggbs->currentPageHeader.segmentTable[0] >= 2) { + drflac_uint8 firstBytesInPage[2]; + firstBytesInPage[0] = oggbs->pageData[0]; + firstBytesInPage[1] = oggbs->pageData[1]; + + if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) { /* <-- Does the page begin with a frame's sync code? */ + runningGranulePosition = oggbs->currentPageHeader.granulePosition; + } + + continue; + } + } + } + + /* + We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the + start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of + a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until + we find the one containing the target sample. + */ + if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, DRFLAC_SEEK_SET)) { + return DRFLAC_FALSE; + } + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { + return DRFLAC_FALSE; + } + + /* + At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep + looping over these frames until we find the one containing the sample we're after. + */ + runningPCMFrameCount = runningGranulePosition; + for (;;) { + /* + There are two ways to find the sample and seek past irrelevant frames: + 1) Use the native FLAC decoder. + 2) Use Ogg's framing system. + + Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to + do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code + duplication for the decoding of frame headers. + + Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg + bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the + standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks + the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read + using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to + avoid the use of the drflac_bs object. + + Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons: + 1) Seeking is already partially accelerated using Ogg's paging system in the code block above. + 2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon. + 3) Simplicity. + */ + drflac_uint64 firstPCMFrameInFLACFrame = 0; + drflac_uint64 lastPCMFrameInFLACFrame = 0; + drflac_uint64 pcmFrameCountInThisFrame; + + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); + + pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; + + /* If we are seeking to the end of the file and we've just hit it, we're done. */ + if (pcmFrameIndex == pFlac->totalPCMFrameCount && (runningPCMFrameCount + pcmFrameCountInThisFrame) == pFlac->totalPCMFrameCount) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + pFlac->currentPCMFrame = pcmFrameIndex; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + return DRFLAC_TRUE; + } else { + return DRFLAC_FALSE; + } + } + + if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) { + /* + The sample should be in this FLAC frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend + it never existed and keep iterating. + */ + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ + drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount); /* <-- Safe cast because the maximum number of samples in a frame is 65535. */ + if (pcmFramesToDecode == 0) { + return DRFLAC_TRUE; + } + + pFlac->currentPCMFrame = runningPCMFrameCount; + + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ + } else { + if (result == DRFLAC_CRC_MISMATCH) { + continue; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* + It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this + frame never existed and leave the running sample count untouched. + */ + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningPCMFrameCount += pcmFrameCountInThisFrame; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + continue; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } + } +} + + + +static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed) +{ + drflac_ogg_page_header header; + drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32; + drflac_uint32 bytesRead = 0; + + /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */ + (void)relaxed; + + pInit->container = drflac_container_ogg; + pInit->oggFirstBytePos = 0; + + /* + We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the + stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if + any match the FLAC specification. Important to keep in mind that the stream may be multiplexed. + */ + if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { + return DRFLAC_FALSE; + } + pInit->runningFilePos += bytesRead; + + for (;;) { + int pageBodySize; + + /* Break if we're past the beginning of stream page. */ + if ((header.headerType & 0x02) == 0) { + return DRFLAC_FALSE; + } + + /* Check if it's a FLAC header. */ + pageBodySize = drflac_ogg__get_page_body_size(&header); + if (pageBodySize == 51) { /* 51 = the lacing value of the FLAC header packet. */ + /* It could be a FLAC page... */ + drflac_uint32 bytesRemainingInPage = pageBodySize; + drflac_uint8 packetType; + + if (onRead(pUserData, &packetType, 1) != 1) { + return DRFLAC_FALSE; + } + + bytesRemainingInPage -= 1; + if (packetType == 0x7F) { + /* Increasingly more likely to be a FLAC page... */ + drflac_uint8 sig[4]; + if (onRead(pUserData, sig, 4) != 4) { + return DRFLAC_FALSE; + } + + bytesRemainingInPage -= 4; + if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') { + /* Almost certainly a FLAC page... */ + drflac_uint8 mappingVersion[2]; + if (onRead(pUserData, mappingVersion, 2) != 2) { + return DRFLAC_FALSE; + } + + if (mappingVersion[0] != 1) { + return DRFLAC_FALSE; /* Only supporting version 1.x of the Ogg mapping. */ + } + + /* + The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to + be handling it in a generic way based on the serial number and packet types. + */ + if (!onSeek(pUserData, 2, DRFLAC_SEEK_CUR)) { + return DRFLAC_FALSE; + } + + /* Expecting the native FLAC signature "fLaC". */ + if (onRead(pUserData, sig, 4) != 4) { + return DRFLAC_FALSE; + } + + if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') { + /* The remaining data in the page should be the STREAMINFO block. */ + drflac_streaminfo streaminfo; + drflac_uint8 isLastBlock; + drflac_uint8 blockType; + drflac_uint32 blockSize; + if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { + return DRFLAC_FALSE; + } + + if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) { + return DRFLAC_FALSE; /* Invalid block type. First block must be the STREAMINFO block. */ + } + + if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) { + /* Success! */ + pInit->hasStreamInfoBlock = DRFLAC_TRUE; + pInit->sampleRate = streaminfo.sampleRate; + pInit->channels = streaminfo.channels; + pInit->bitsPerSample = streaminfo.bitsPerSample; + pInit->totalPCMFrameCount = streaminfo.totalPCMFrameCount; + pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames; + pInit->hasMetadataBlocks = !isLastBlock; + + if (onMeta) { + drflac_metadata metadata; + metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO; + metadata.pRawData = NULL; + metadata.rawDataSize = 0; + metadata.data.streaminfo = streaminfo; + onMeta(pUserDataMD, &metadata); + } + + pInit->runningFilePos += pageBodySize; + pInit->oggFirstBytePos = pInit->runningFilePos - 79; /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */ + pInit->oggSerial = header.serialNumber; + pInit->oggBosHeader = header; + break; + } else { + /* Failed to read STREAMINFO block. Aww, so close... */ + return DRFLAC_FALSE; + } + } else { + /* Invalid file. */ + return DRFLAC_FALSE; + } + } else { + /* Not a FLAC header. Skip it. */ + if (!onSeek(pUserData, bytesRemainingInPage, DRFLAC_SEEK_CUR)) { + return DRFLAC_FALSE; + } + } + } else { + /* Not a FLAC header. Seek past the entire page and move on to the next. */ + if (!onSeek(pUserData, bytesRemainingInPage, DRFLAC_SEEK_CUR)) { + return DRFLAC_FALSE; + } + } + } else { + if (!onSeek(pUserData, pageBodySize, DRFLAC_SEEK_CUR)) { + return DRFLAC_FALSE; + } + } + + pInit->runningFilePos += pageBodySize; + + + /* Read the header of the next page. */ + if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { + return DRFLAC_FALSE; + } + pInit->runningFilePos += bytesRead; + } + + /* + If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next + packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the + Ogg bistream object. + */ + pInit->hasMetadataBlocks = DRFLAC_TRUE; /* <-- Always have at least VORBIS_COMMENT metadata block. */ + return DRFLAC_TRUE; +} +#endif + +static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD) +{ + drflac_bool32 relaxed; + drflac_uint8 id[4]; + + if (pInit == NULL || onRead == NULL || onSeek == NULL) { /* <-- onTell is optional. */ + return DRFLAC_FALSE; + } + + DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit)); + pInit->onRead = onRead; + pInit->onSeek = onSeek; + pInit->onTell = onTell; + pInit->onMeta = onMeta; + pInit->container = container; + pInit->pUserData = pUserData; + pInit->pUserDataMD = pUserDataMD; + + pInit->bs.onRead = onRead; + pInit->bs.onSeek = onSeek; + pInit->bs.onTell = onTell; + pInit->bs.pUserData = pUserData; + drflac__reset_cache(&pInit->bs); + + + /* If the container is explicitly defined then we can try opening in relaxed mode. */ + relaxed = container != drflac_container_unknown; + + /* Skip over any ID3 tags. */ + for (;;) { + if (onRead(pUserData, id, 4) != 4) { + return DRFLAC_FALSE; /* Ran out of data. */ + } + pInit->runningFilePos += 4; + + if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') { + drflac_uint8 header[6]; + drflac_uint8 flags; + drflac_uint32 headerSize; + + if (onRead(pUserData, header, 6) != 6) { + return DRFLAC_FALSE; /* Ran out of data. */ + } + pInit->runningFilePos += 6; + + flags = header[1]; + + DRFLAC_COPY_MEMORY(&headerSize, header+2, 4); + headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize)); + if (flags & 0x10) { + headerSize += 10; + } + + if (!onSeek(pUserData, headerSize, DRFLAC_SEEK_CUR)) { + return DRFLAC_FALSE; /* Failed to seek past the tag. */ + } + pInit->runningFilePos += headerSize; + } else { + break; + } + } + + if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') { + return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#ifndef DR_FLAC_NO_OGG + if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') { + return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#endif + + /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */ + if (relaxed) { + if (container == drflac_container_native) { + return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#ifndef DR_FLAC_NO_OGG + if (container == drflac_container_ogg) { + return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#endif + } + + /* Unsupported container. */ + return DRFLAC_FALSE; +} + +static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit) +{ + DRFLAC_ASSERT(pFlac != NULL); + DRFLAC_ASSERT(pInit != NULL); + + DRFLAC_ZERO_MEMORY(pFlac, sizeof(*pFlac)); + pFlac->bs = pInit->bs; + pFlac->onMeta = pInit->onMeta; + pFlac->pUserDataMD = pInit->pUserDataMD; + pFlac->maxBlockSizeInPCMFrames = pInit->maxBlockSizeInPCMFrames; + pFlac->sampleRate = pInit->sampleRate; + pFlac->channels = (drflac_uint8)pInit->channels; + pFlac->bitsPerSample = (drflac_uint8)pInit->bitsPerSample; + pFlac->totalPCMFrameCount = pInit->totalPCMFrameCount; + pFlac->container = pInit->container; +} + + +static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac_init_info init; + drflac_uint32 allocationSize; + drflac_uint32 wholeSIMDVectorCountPerChannel; + drflac_uint32 decodedSamplesAllocationSize; +#ifndef DR_FLAC_NO_OGG + drflac_oggbs* pOggbs = NULL; +#endif + drflac_uint64 firstFramePos; + drflac_uint64 seektablePos; + drflac_uint32 seekpointCount; + drflac_allocation_callbacks allocationCallbacks; + drflac* pFlac; + + /* CPU support first. */ + drflac__init_cpu_caps(); + + if (!drflac__init_private(&init, onRead, onSeek, onTell, onMeta, container, pUserData, pUserDataMD)) { + return NULL; + } + + if (pAllocationCallbacks != NULL) { + allocationCallbacks = *pAllocationCallbacks; + if (allocationCallbacks.onFree == NULL || (allocationCallbacks.onMalloc == NULL && allocationCallbacks.onRealloc == NULL)) { + return NULL; /* Invalid allocation callbacks. */ + } + } else { + allocationCallbacks.pUserData = NULL; + allocationCallbacks.onMalloc = drflac__malloc_default; + allocationCallbacks.onRealloc = drflac__realloc_default; + allocationCallbacks.onFree = drflac__free_default; + } + + + /* + The size of the allocation for the drflac object needs to be large enough to fit the following: + 1) The main members of the drflac structure + 2) A block of memory large enough to store the decoded samples of the largest frame in the stream + 3) If the container is Ogg, a drflac_oggbs object + + The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration + the different SIMD instruction sets. + */ + allocationSize = sizeof(drflac); + + /* + The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector + we are supporting. + */ + if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) { + wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))); + } else { + wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1; + } + + decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels; + + allocationSize += decodedSamplesAllocationSize; + allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE; /* Allocate extra bytes to ensure we have enough for alignment. */ + +#ifndef DR_FLAC_NO_OGG + /* There's additional data required for Ogg streams. */ + if (init.container == drflac_container_ogg) { + allocationSize += sizeof(drflac_oggbs); + + pOggbs = (drflac_oggbs*)drflac__malloc_from_callbacks(sizeof(*pOggbs), &allocationCallbacks); + if (pOggbs == NULL) { + return NULL; /*DRFLAC_OUT_OF_MEMORY;*/ + } + + DRFLAC_ZERO_MEMORY(pOggbs, sizeof(*pOggbs)); + pOggbs->onRead = onRead; + pOggbs->onSeek = onSeek; + pOggbs->onTell = onTell; + pOggbs->pUserData = pUserData; + pOggbs->currentBytePos = init.oggFirstBytePos; + pOggbs->firstBytePos = init.oggFirstBytePos; + pOggbs->serialNumber = init.oggSerial; + pOggbs->bosPageHeader = init.oggBosHeader; + pOggbs->bytesRemainingInPage = 0; + } +#endif + + /* + This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to + consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading + and decoding the metadata. + */ + firstFramePos = 42; /* <-- We know we are at byte 42 at this point. */ + seektablePos = 0; + seekpointCount = 0; + if (init.hasMetadataBlocks) { + drflac_read_proc onReadOverride = onRead; + drflac_seek_proc onSeekOverride = onSeek; + drflac_tell_proc onTellOverride = onTell; + void* pUserDataOverride = pUserData; + +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) { + onReadOverride = drflac__on_read_ogg; + onSeekOverride = drflac__on_seek_ogg; + onTellOverride = drflac__on_tell_ogg; + pUserDataOverride = (void*)pOggbs; + } +#endif + + if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onTellOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seekpointCount, &allocationCallbacks)) { + #ifndef DR_FLAC_NO_OGG + drflac__free_from_callbacks(pOggbs, &allocationCallbacks); + #endif + return NULL; + } + + allocationSize += seekpointCount * sizeof(drflac_seekpoint); + } + + + pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks); + if (pFlac == NULL) { + #ifndef DR_FLAC_NO_OGG + drflac__free_from_callbacks(pOggbs, &allocationCallbacks); + #endif + return NULL; + } + + drflac__init_from_info(pFlac, &init); + pFlac->allocationCallbacks = allocationCallbacks; + pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE); + +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) { + drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + (seekpointCount * sizeof(drflac_seekpoint))); + DRFLAC_COPY_MEMORY(pInternalOggbs, pOggbs, sizeof(*pOggbs)); + + /* At this point the pOggbs object has been handed over to pInternalOggbs and can be freed. */ + drflac__free_from_callbacks(pOggbs, &allocationCallbacks); + pOggbs = NULL; + + /* The Ogg bistream needs to be layered on top of the original bitstream. */ + pFlac->bs.onRead = drflac__on_read_ogg; + pFlac->bs.onSeek = drflac__on_seek_ogg; + pFlac->bs.onTell = drflac__on_tell_ogg; + pFlac->bs.pUserData = (void*)pInternalOggbs; + pFlac->_oggbs = (void*)pInternalOggbs; + } +#endif + + pFlac->firstFLACFramePosInBytes = firstFramePos; + + /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */ +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) + { + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } + else +#endif + { + /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */ + if (seektablePos != 0) { + pFlac->seekpointCount = seekpointCount; + pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize); + + DRFLAC_ASSERT(pFlac->bs.onSeek != NULL); + DRFLAC_ASSERT(pFlac->bs.onRead != NULL); + + /* Seek to the seektable, then just read directly into our seektable buffer. */ + if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, DRFLAC_SEEK_SET)) { + drflac_uint32 iSeekpoint; + + for (iSeekpoint = 0; iSeekpoint < seekpointCount; iSeekpoint += 1) { + if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints + iSeekpoint, DRFLAC_SEEKPOINT_SIZE_IN_BYTES) == DRFLAC_SEEKPOINT_SIZE_IN_BYTES) { + /* Endian swap. */ + pFlac->pSeekpoints[iSeekpoint].firstPCMFrame = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame); + pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset); + pFlac->pSeekpoints[iSeekpoint].pcmFrameCount = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount); + } else { + /* Failed to read the seektable. Pretend we don't have one. */ + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + break; + } + } + + /* We need to seek back to where we were. If this fails it's a critical error. */ + if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, DRFLAC_SEEK_SET)) { + drflac__free_from_callbacks(pFlac, &allocationCallbacks); + return NULL; + } + } else { + /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */ + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } + } + } + + + /* + If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode + the first frame. + */ + if (!init.hasStreamInfoBlock) { + pFlac->currentFLACFrame.header = init.firstFrameHeader; + for (;;) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + break; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + drflac__free_from_callbacks(pFlac, &allocationCallbacks); + return NULL; + } + continue; + } else { + drflac__free_from_callbacks(pFlac, &allocationCallbacks); + return NULL; + } + } + } + } + + return pFlac; +} + + + +#ifndef DR_FLAC_NO_STDIO +#include +#ifndef DR_FLAC_NO_WCHAR +#include /* For wcslen(), wcsrtombs() */ +#endif + +/* Errno */ +/* drflac_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */ +#include +static drflac_result drflac_result_from_errno(int e) +{ + switch (e) + { + case 0: return DRFLAC_SUCCESS; + #ifdef EPERM + case EPERM: return DRFLAC_INVALID_OPERATION; + #endif + #ifdef ENOENT + case ENOENT: return DRFLAC_DOES_NOT_EXIST; + #endif + #ifdef ESRCH + case ESRCH: return DRFLAC_DOES_NOT_EXIST; + #endif + #ifdef EINTR + case EINTR: return DRFLAC_INTERRUPT; + #endif + #ifdef EIO + case EIO: return DRFLAC_IO_ERROR; + #endif + #ifdef ENXIO + case ENXIO: return DRFLAC_DOES_NOT_EXIST; + #endif + #ifdef E2BIG + case E2BIG: return DRFLAC_INVALID_ARGS; + #endif + #ifdef ENOEXEC + case ENOEXEC: return DRFLAC_INVALID_FILE; + #endif + #ifdef EBADF + case EBADF: return DRFLAC_INVALID_FILE; + #endif + #ifdef ECHILD + case ECHILD: return DRFLAC_ERROR; + #endif + #ifdef EAGAIN + case EAGAIN: return DRFLAC_UNAVAILABLE; + #endif + #ifdef ENOMEM + case ENOMEM: return DRFLAC_OUT_OF_MEMORY; + #endif + #ifdef EACCES + case EACCES: return DRFLAC_ACCESS_DENIED; + #endif + #ifdef EFAULT + case EFAULT: return DRFLAC_BAD_ADDRESS; + #endif + #ifdef ENOTBLK + case ENOTBLK: return DRFLAC_ERROR; + #endif + #ifdef EBUSY + case EBUSY: return DRFLAC_BUSY; + #endif + #ifdef EEXIST + case EEXIST: return DRFLAC_ALREADY_EXISTS; + #endif + #ifdef EXDEV + case EXDEV: return DRFLAC_ERROR; + #endif + #ifdef ENODEV + case ENODEV: return DRFLAC_DOES_NOT_EXIST; + #endif + #ifdef ENOTDIR + case ENOTDIR: return DRFLAC_NOT_DIRECTORY; + #endif + #ifdef EISDIR + case EISDIR: return DRFLAC_IS_DIRECTORY; + #endif + #ifdef EINVAL + case EINVAL: return DRFLAC_INVALID_ARGS; + #endif + #ifdef ENFILE + case ENFILE: return DRFLAC_TOO_MANY_OPEN_FILES; + #endif + #ifdef EMFILE + case EMFILE: return DRFLAC_TOO_MANY_OPEN_FILES; + #endif + #ifdef ENOTTY + case ENOTTY: return DRFLAC_INVALID_OPERATION; + #endif + #ifdef ETXTBSY + case ETXTBSY: return DRFLAC_BUSY; + #endif + #ifdef EFBIG + case EFBIG: return DRFLAC_TOO_BIG; + #endif + #ifdef ENOSPC + case ENOSPC: return DRFLAC_NO_SPACE; + #endif + #ifdef ESPIPE + case ESPIPE: return DRFLAC_BAD_SEEK; + #endif + #ifdef EROFS + case EROFS: return DRFLAC_ACCESS_DENIED; + #endif + #ifdef EMLINK + case EMLINK: return DRFLAC_TOO_MANY_LINKS; + #endif + #ifdef EPIPE + case EPIPE: return DRFLAC_BAD_PIPE; + #endif + #ifdef EDOM + case EDOM: return DRFLAC_OUT_OF_RANGE; + #endif + #ifdef ERANGE + case ERANGE: return DRFLAC_OUT_OF_RANGE; + #endif + #ifdef EDEADLK + case EDEADLK: return DRFLAC_DEADLOCK; + #endif + #ifdef ENAMETOOLONG + case ENAMETOOLONG: return DRFLAC_PATH_TOO_LONG; + #endif + #ifdef ENOLCK + case ENOLCK: return DRFLAC_ERROR; + #endif + #ifdef ENOSYS + case ENOSYS: return DRFLAC_NOT_IMPLEMENTED; + #endif + #if defined(ENOTEMPTY) && ENOTEMPTY != EEXIST /* In AIX, ENOTEMPTY and EEXIST use the same value. */ + case ENOTEMPTY: return DRFLAC_DIRECTORY_NOT_EMPTY; + #endif + #ifdef ELOOP + case ELOOP: return DRFLAC_TOO_MANY_LINKS; + #endif + #ifdef ENOMSG + case ENOMSG: return DRFLAC_NO_MESSAGE; + #endif + #ifdef EIDRM + case EIDRM: return DRFLAC_ERROR; + #endif + #ifdef ECHRNG + case ECHRNG: return DRFLAC_ERROR; + #endif + #ifdef EL2NSYNC + case EL2NSYNC: return DRFLAC_ERROR; + #endif + #ifdef EL3HLT + case EL3HLT: return DRFLAC_ERROR; + #endif + #ifdef EL3RST + case EL3RST: return DRFLAC_ERROR; + #endif + #ifdef ELNRNG + case ELNRNG: return DRFLAC_OUT_OF_RANGE; + #endif + #ifdef EUNATCH + case EUNATCH: return DRFLAC_ERROR; + #endif + #ifdef ENOCSI + case ENOCSI: return DRFLAC_ERROR; + #endif + #ifdef EL2HLT + case EL2HLT: return DRFLAC_ERROR; + #endif + #ifdef EBADE + case EBADE: return DRFLAC_ERROR; + #endif + #ifdef EBADR + case EBADR: return DRFLAC_ERROR; + #endif + #ifdef EXFULL + case EXFULL: return DRFLAC_ERROR; + #endif + #ifdef ENOANO + case ENOANO: return DRFLAC_ERROR; + #endif + #ifdef EBADRQC + case EBADRQC: return DRFLAC_ERROR; + #endif + #ifdef EBADSLT + case EBADSLT: return DRFLAC_ERROR; + #endif + #ifdef EBFONT + case EBFONT: return DRFLAC_INVALID_FILE; + #endif + #ifdef ENOSTR + case ENOSTR: return DRFLAC_ERROR; + #endif + #ifdef ENODATA + case ENODATA: return DRFLAC_NO_DATA_AVAILABLE; + #endif + #ifdef ETIME + case ETIME: return DRFLAC_TIMEOUT; + #endif + #ifdef ENOSR + case ENOSR: return DRFLAC_NO_DATA_AVAILABLE; + #endif + #ifdef ENONET + case ENONET: return DRFLAC_NO_NETWORK; + #endif + #ifdef ENOPKG + case ENOPKG: return DRFLAC_ERROR; + #endif + #ifdef EREMOTE + case EREMOTE: return DRFLAC_ERROR; + #endif + #ifdef ENOLINK + case ENOLINK: return DRFLAC_ERROR; + #endif + #ifdef EADV + case EADV: return DRFLAC_ERROR; + #endif + #ifdef ESRMNT + case ESRMNT: return DRFLAC_ERROR; + #endif + #ifdef ECOMM + case ECOMM: return DRFLAC_ERROR; + #endif + #ifdef EPROTO + case EPROTO: return DRFLAC_ERROR; + #endif + #ifdef EMULTIHOP + case EMULTIHOP: return DRFLAC_ERROR; + #endif + #ifdef EDOTDOT + case EDOTDOT: return DRFLAC_ERROR; + #endif + #ifdef EBADMSG + case EBADMSG: return DRFLAC_BAD_MESSAGE; + #endif + #ifdef EOVERFLOW + case EOVERFLOW: return DRFLAC_TOO_BIG; + #endif + #ifdef ENOTUNIQ + case ENOTUNIQ: return DRFLAC_NOT_UNIQUE; + #endif + #ifdef EBADFD + case EBADFD: return DRFLAC_ERROR; + #endif + #ifdef EREMCHG + case EREMCHG: return DRFLAC_ERROR; + #endif + #ifdef ELIBACC + case ELIBACC: return DRFLAC_ACCESS_DENIED; + #endif + #ifdef ELIBBAD + case ELIBBAD: return DRFLAC_INVALID_FILE; + #endif + #ifdef ELIBSCN + case ELIBSCN: return DRFLAC_INVALID_FILE; + #endif + #ifdef ELIBMAX + case ELIBMAX: return DRFLAC_ERROR; + #endif + #ifdef ELIBEXEC + case ELIBEXEC: return DRFLAC_ERROR; + #endif + #ifdef EILSEQ + case EILSEQ: return DRFLAC_INVALID_DATA; + #endif + #ifdef ERESTART + case ERESTART: return DRFLAC_ERROR; + #endif + #ifdef ESTRPIPE + case ESTRPIPE: return DRFLAC_ERROR; + #endif + #ifdef EUSERS + case EUSERS: return DRFLAC_ERROR; + #endif + #ifdef ENOTSOCK + case ENOTSOCK: return DRFLAC_NOT_SOCKET; + #endif + #ifdef EDESTADDRREQ + case EDESTADDRREQ: return DRFLAC_NO_ADDRESS; + #endif + #ifdef EMSGSIZE + case EMSGSIZE: return DRFLAC_TOO_BIG; + #endif + #ifdef EPROTOTYPE + case EPROTOTYPE: return DRFLAC_BAD_PROTOCOL; + #endif + #ifdef ENOPROTOOPT + case ENOPROTOOPT: return DRFLAC_PROTOCOL_UNAVAILABLE; + #endif + #ifdef EPROTONOSUPPORT + case EPROTONOSUPPORT: return DRFLAC_PROTOCOL_NOT_SUPPORTED; + #endif + #ifdef ESOCKTNOSUPPORT + case ESOCKTNOSUPPORT: return DRFLAC_SOCKET_NOT_SUPPORTED; + #endif + #ifdef EOPNOTSUPP + case EOPNOTSUPP: return DRFLAC_INVALID_OPERATION; + #endif + #ifdef EPFNOSUPPORT + case EPFNOSUPPORT: return DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED; + #endif + #ifdef EAFNOSUPPORT + case EAFNOSUPPORT: return DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED; + #endif + #ifdef EADDRINUSE + case EADDRINUSE: return DRFLAC_ALREADY_IN_USE; + #endif + #ifdef EADDRNOTAVAIL + case EADDRNOTAVAIL: return DRFLAC_ERROR; + #endif + #ifdef ENETDOWN + case ENETDOWN: return DRFLAC_NO_NETWORK; + #endif + #ifdef ENETUNREACH + case ENETUNREACH: return DRFLAC_NO_NETWORK; + #endif + #ifdef ENETRESET + case ENETRESET: return DRFLAC_NO_NETWORK; + #endif + #ifdef ECONNABORTED + case ECONNABORTED: return DRFLAC_NO_NETWORK; + #endif + #ifdef ECONNRESET + case ECONNRESET: return DRFLAC_CONNECTION_RESET; + #endif + #ifdef ENOBUFS + case ENOBUFS: return DRFLAC_NO_SPACE; + #endif + #ifdef EISCONN + case EISCONN: return DRFLAC_ALREADY_CONNECTED; + #endif + #ifdef ENOTCONN + case ENOTCONN: return DRFLAC_NOT_CONNECTED; + #endif + #ifdef ESHUTDOWN + case ESHUTDOWN: return DRFLAC_ERROR; + #endif + #ifdef ETOOMANYREFS + case ETOOMANYREFS: return DRFLAC_ERROR; + #endif + #ifdef ETIMEDOUT + case ETIMEDOUT: return DRFLAC_TIMEOUT; + #endif + #ifdef ECONNREFUSED + case ECONNREFUSED: return DRFLAC_CONNECTION_REFUSED; + #endif + #ifdef EHOSTDOWN + case EHOSTDOWN: return DRFLAC_NO_HOST; + #endif + #ifdef EHOSTUNREACH + case EHOSTUNREACH: return DRFLAC_NO_HOST; + #endif + #ifdef EALREADY + case EALREADY: return DRFLAC_IN_PROGRESS; + #endif + #ifdef EINPROGRESS + case EINPROGRESS: return DRFLAC_IN_PROGRESS; + #endif + #ifdef ESTALE + case ESTALE: return DRFLAC_INVALID_FILE; + #endif + #ifdef EUCLEAN + case EUCLEAN: return DRFLAC_ERROR; + #endif + #ifdef ENOTNAM + case ENOTNAM: return DRFLAC_ERROR; + #endif + #ifdef ENAVAIL + case ENAVAIL: return DRFLAC_ERROR; + #endif + #ifdef EISNAM + case EISNAM: return DRFLAC_ERROR; + #endif + #ifdef EREMOTEIO + case EREMOTEIO: return DRFLAC_IO_ERROR; + #endif + #ifdef EDQUOT + case EDQUOT: return DRFLAC_NO_SPACE; + #endif + #ifdef ENOMEDIUM + case ENOMEDIUM: return DRFLAC_DOES_NOT_EXIST; + #endif + #ifdef EMEDIUMTYPE + case EMEDIUMTYPE: return DRFLAC_ERROR; + #endif + #ifdef ECANCELED + case ECANCELED: return DRFLAC_CANCELLED; + #endif + #ifdef ENOKEY + case ENOKEY: return DRFLAC_ERROR; + #endif + #ifdef EKEYEXPIRED + case EKEYEXPIRED: return DRFLAC_ERROR; + #endif + #ifdef EKEYREVOKED + case EKEYREVOKED: return DRFLAC_ERROR; + #endif + #ifdef EKEYREJECTED + case EKEYREJECTED: return DRFLAC_ERROR; + #endif + #ifdef EOWNERDEAD + case EOWNERDEAD: return DRFLAC_ERROR; + #endif + #ifdef ENOTRECOVERABLE + case ENOTRECOVERABLE: return DRFLAC_ERROR; + #endif + #ifdef ERFKILL + case ERFKILL: return DRFLAC_ERROR; + #endif + #ifdef EHWPOISON + case EHWPOISON: return DRFLAC_ERROR; + #endif + default: return DRFLAC_ERROR; + } +} +/* End Errno */ + +/* fopen */ +static drflac_result drflac_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode) +{ +#if defined(_MSC_VER) && _MSC_VER >= 1400 + errno_t err; +#endif + + if (ppFile != NULL) { + *ppFile = NULL; /* Safety. */ + } + + if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) { + return DRFLAC_INVALID_ARGS; + } + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + err = fopen_s(ppFile, pFilePath, pOpenMode); + if (err != 0) { + return drflac_result_from_errno(err); + } +#else +#if defined(_WIN32) || defined(__APPLE__) + *ppFile = fopen(pFilePath, pOpenMode); +#else + #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE) + *ppFile = fopen64(pFilePath, pOpenMode); + #else + *ppFile = fopen(pFilePath, pOpenMode); + #endif +#endif + if (*ppFile == NULL) { + drflac_result result = drflac_result_from_errno(errno); + if (result == DRFLAC_SUCCESS) { + result = DRFLAC_ERROR; /* Just a safety check to make sure we never ever return success when pFile == NULL. */ + } + + return result; + } +#endif + + return DRFLAC_SUCCESS; +} + +/* +_wfopen() isn't always available in all compilation environments. + + * Windows only. + * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back). + * MinGW-64 (both 32- and 64-bit) seems to support it. + * MinGW wraps it in !defined(__STRICT_ANSI__). + * OpenWatcom wraps it in !defined(_NO_EXT_KEYS). + +This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs() +fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support. +*/ +#if defined(_WIN32) + #if defined(_MSC_VER) || defined(__MINGW64__) || (!defined(__STRICT_ANSI__) && !defined(_NO_EXT_KEYS)) + #define DRFLAC_HAS_WFOPEN + #endif +#endif + +#ifndef DR_FLAC_NO_WCHAR +static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (ppFile != NULL) { + *ppFile = NULL; /* Safety. */ + } + + if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) { + return DRFLAC_INVALID_ARGS; + } + +#if defined(DRFLAC_HAS_WFOPEN) + { + /* Use _wfopen() on Windows. */ + #if defined(_MSC_VER) && _MSC_VER >= 1400 + errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode); + if (err != 0) { + return drflac_result_from_errno(err); + } + #else + *ppFile = _wfopen(pFilePath, pOpenMode); + if (*ppFile == NULL) { + return drflac_result_from_errno(errno); + } + #endif + (void)pAllocationCallbacks; + } +#else + /* + Use fopen() on anything other than Windows. Requires a conversion. This is annoying because + fopen() is locale specific. The only real way I can think of to do this is with wcsrtombs(). Note + that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for + maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler + error I'll look into improving compatibility. + */ + + /* + Some compilers don't support wchar_t or wcsrtombs() which we're using below. In this case we just + need to abort with an error. If you encounter a compiler lacking such support, add it to this list + and submit a bug report and it'll be added to the library upstream. + */ + #if defined(__DJGPP__) + { + /* Nothing to do here. This will fall through to the error check below. */ + } + #else + { + mbstate_t mbs; + size_t lenMB; + const wchar_t* pFilePathTemp = pFilePath; + char* pFilePathMB = NULL; + char pOpenModeMB[32] = {0}; + + /* Get the length first. */ + DRFLAC_ZERO_OBJECT(&mbs); + lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs); + if (lenMB == (size_t)-1) { + return drflac_result_from_errno(errno); + } + + pFilePathMB = (char*)drflac__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks); + if (pFilePathMB == NULL) { + return DRFLAC_OUT_OF_MEMORY; + } + + pFilePathTemp = pFilePath; + DRFLAC_ZERO_OBJECT(&mbs); + wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs); + + /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */ + { + size_t i = 0; + for (;;) { + if (pOpenMode[i] == 0) { + pOpenModeMB[i] = '\0'; + break; + } + + pOpenModeMB[i] = (char)pOpenMode[i]; + i += 1; + } + } + + *ppFile = fopen(pFilePathMB, pOpenModeMB); + + drflac__free_from_callbacks(pFilePathMB, pAllocationCallbacks); + } + #endif + + if (*ppFile == NULL) { + return DRFLAC_ERROR; + } +#endif + + return DRFLAC_SUCCESS; +} +#endif +/* End fopen */ + +static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData); +} + +static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin) +{ + int whence = SEEK_SET; + if (origin == DRFLAC_SEEK_CUR) { + whence = SEEK_CUR; + } else if (origin == DRFLAC_SEEK_END) { + whence = SEEK_END; + } + + return fseek((FILE*)pUserData, offset, whence) == 0; +} + +static drflac_bool32 drflac__on_tell_stdio(void* pUserData, drflac_int64* pCursor) +{ + FILE* pFileStdio = (FILE*)pUserData; + drflac_int64 result; + + /* These were all validated at a higher level. */ + DRFLAC_ASSERT(pFileStdio != NULL); + DRFLAC_ASSERT(pCursor != NULL); + +#if defined(_WIN32) && !defined(NXDK) + #if defined(_MSC_VER) && _MSC_VER > 1200 + result = _ftelli64(pFileStdio); + #else + result = ftell(pFileStdio); + #endif +#else + result = ftell(pFileStdio); +#endif + + *pCursor = result; + + return DRFLAC_TRUE; +} + + + +DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + FILE* pFile; + + if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) { + return NULL; + } + + pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, drflac__on_tell_stdio, (void*)pFile, pAllocationCallbacks); + if (pFlac == NULL) { + fclose(pFile); + return NULL; + } + + return pFlac; +} + +#ifndef DR_FLAC_NO_WCHAR +DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + FILE* pFile; + + if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) { + return NULL; + } + + pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, drflac__on_tell_stdio, (void*)pFile, pAllocationCallbacks); + if (pFlac == NULL) { + fclose(pFile); + return NULL; + } + + return pFlac; +} +#endif + +DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + FILE* pFile; + + if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) { + return NULL; + } + + pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, drflac__on_tell_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + fclose(pFile); + return pFlac; + } + + return pFlac; +} + +#ifndef DR_FLAC_NO_WCHAR +DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + FILE* pFile; + + if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) { + return NULL; + } + + pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, drflac__on_tell_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + fclose(pFile); + return pFlac; + } + + return pFlac; +} +#endif +#endif /* DR_FLAC_NO_STDIO */ + +static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; + size_t bytesRemaining; + + DRFLAC_ASSERT(memoryStream != NULL); + DRFLAC_ASSERT(memoryStream->dataSize >= memoryStream->currentReadPos); + + bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos; + if (bytesToRead > bytesRemaining) { + bytesToRead = bytesRemaining; + } + + if (bytesToRead > 0) { + DRFLAC_COPY_MEMORY(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead); + memoryStream->currentReadPos += bytesToRead; + } + + return bytesToRead; +} + +static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin) +{ + drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; + drflac_int64 newCursor; + + DRFLAC_ASSERT(memoryStream != NULL); + + if (origin == DRFLAC_SEEK_SET) { + newCursor = 0; + } else if (origin == DRFLAC_SEEK_CUR) { + newCursor = (drflac_int64)memoryStream->currentReadPos; + } else if (origin == DRFLAC_SEEK_END) { + newCursor = (drflac_int64)memoryStream->dataSize; + } else { + DRFLAC_ASSERT(!"Invalid seek origin"); + return DRFLAC_FALSE; + } + + newCursor += offset; + + if (newCursor < 0) { + return DRFLAC_FALSE; /* Trying to seek prior to the start of the buffer. */ + } + if ((size_t)newCursor > memoryStream->dataSize) { + return DRFLAC_FALSE; /* Trying to seek beyond the end of the buffer. */ + } + + memoryStream->currentReadPos = (size_t)newCursor; + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__on_tell_memory(void* pUserData, drflac_int64* pCursor) +{ + drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; + + DRFLAC_ASSERT(memoryStream != NULL); + DRFLAC_ASSERT(pCursor != NULL); + + *pCursor = (drflac_int64)memoryStream->currentReadPos; + return DRFLAC_TRUE; +} + +DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac__memory_stream memoryStream; + drflac* pFlac; + + memoryStream.data = (const drflac_uint8*)pData; + memoryStream.dataSize = dataSize; + memoryStream.currentReadPos = 0; + pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, drflac__on_tell_memory, &memoryStream, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + pFlac->memoryStream = memoryStream; + + /* This is an awful hack... */ +#ifndef DR_FLAC_NO_OGG + if (pFlac->container == drflac_container_ogg) + { + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + oggbs->pUserData = &pFlac->memoryStream; + } + else +#endif + { + pFlac->bs.pUserData = &pFlac->memoryStream; + } + + return pFlac; +} + +DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac__memory_stream memoryStream; + drflac* pFlac; + + memoryStream.data = (const drflac_uint8*)pData; + memoryStream.dataSize = dataSize; + memoryStream.currentReadPos = 0; + pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, drflac__on_tell_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + pFlac->memoryStream = memoryStream; + + /* This is an awful hack... */ +#ifndef DR_FLAC_NO_OGG + if (pFlac->container == drflac_container_ogg) + { + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + oggbs->pUserData = &pFlac->memoryStream; + } + else +#endif + { + pFlac->bs.pUserData = &pFlac->memoryStream; + } + + return pFlac; +} + + + +DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, onTell, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks); +} +DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, onTell, NULL, container, pUserData, pUserData, pAllocationCallbacks); +} + +DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, onTell, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks); +} +DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, onTell, onMeta, container, pUserData, pUserData, pAllocationCallbacks); +} + +DRFLAC_API void drflac_close(drflac* pFlac) +{ + if (pFlac == NULL) { + return; + } + +#ifndef DR_FLAC_NO_STDIO + /* + If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file() + was used by looking at the callbacks. + */ + if (pFlac->bs.onRead == drflac__on_read_stdio) { + fclose((FILE*)pFlac->bs.pUserData); + } + +#ifndef DR_FLAC_NO_OGG + /* Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. */ + if (pFlac->container == drflac_container_ogg) { + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + DRFLAC_ASSERT(pFlac->bs.onRead == drflac__on_read_ogg); + + if (oggbs->onRead == drflac__on_read_stdio) { + fclose((FILE*)oggbs->pUserData); + } + } +#endif +#endif + + drflac__free_from_callbacks(pFlac, &pFlac->allocationCallbacks); +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_uint32 left = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 right0 = left0 - side0; + drflac_uint32 right1 = left1 - side1; + drflac_uint32 right2 = left2 - side2; + drflac_uint32 right3 = left3 - side3; + + pOutputSamples[i*8+0] = (drflac_int32)left0; + pOutputSamples[i*8+1] = (drflac_int32)right0; + pOutputSamples[i*8+2] = (drflac_int32)left1; + pOutputSamples[i*8+3] = (drflac_int32)right1; + pOutputSamples[i*8+4] = (drflac_int32)left2; + pOutputSamples[i*8+5] = (drflac_int32)right2; + pOutputSamples[i*8+6] = (drflac_int32)left3; + pOutputSamples[i*8+7] = (drflac_int32)right3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i right = _mm_sub_epi32(left, side); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t left; + uint32x4_t side; + uint32x4_t right; + + left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + right = vsubq_u32(left, side); + + drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_uint32 side = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 side0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 side1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 side2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 side3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 left0 = right0 + side0; + drflac_uint32 left1 = right1 + side1; + drflac_uint32 left2 = right2 + side2; + drflac_uint32 left3 = right3 + side3; + + pOutputSamples[i*8+0] = (drflac_int32)left0; + pOutputSamples[i*8+1] = (drflac_int32)right0; + pOutputSamples[i*8+2] = (drflac_int32)left1; + pOutputSamples[i*8+3] = (drflac_int32)right1; + pOutputSamples[i*8+4] = (drflac_int32)left2; + pOutputSamples[i*8+5] = (drflac_int32)right2; + pOutputSamples[i*8+6] = (drflac_int32)left3; + pOutputSamples[i*8+7] = (drflac_int32)right3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + for (i = 0; i < frameCount4; ++i) { + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i left = _mm_add_epi32(right, side); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t side; + uint32x4_t right; + uint32x4_t left; + + side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + left = vaddq_u32(right, side); + + drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample); + pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_int32 shift = unusedBitsPerSample; + + if (shift > 0) { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = (mid0 + side0) << shift; + temp1L = (mid1 + side1) << shift; + temp2L = (mid2 + side2) << shift; + temp3L = (mid3 + side3) << shift; + + temp0R = (mid0 - side0) << shift; + temp1R = (mid1 - side1) << shift; + temp2R = (mid2 - side2) << shift; + temp3R = (mid3 - side3) << shift; + + pOutputSamples[i*8+0] = (drflac_int32)temp0L; + pOutputSamples[i*8+1] = (drflac_int32)temp0R; + pOutputSamples[i*8+2] = (drflac_int32)temp1L; + pOutputSamples[i*8+3] = (drflac_int32)temp1R; + pOutputSamples[i*8+4] = (drflac_int32)temp2L; + pOutputSamples[i*8+5] = (drflac_int32)temp2R; + pOutputSamples[i*8+6] = (drflac_int32)temp3L; + pOutputSamples[i*8+7] = (drflac_int32)temp3R; + } + } else { + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1); + temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1); + temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1); + temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1); + + temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1); + temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1); + temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1); + temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1); + + pOutputSamples[i*8+0] = (drflac_int32)temp0L; + pOutputSamples[i*8+1] = (drflac_int32)temp0R; + pOutputSamples[i*8+2] = (drflac_int32)temp1L; + pOutputSamples[i*8+3] = (drflac_int32)temp1R; + pOutputSamples[i*8+4] = (drflac_int32)temp2L; + pOutputSamples[i*8+5] = (drflac_int32)temp2R; + pOutputSamples[i*8+6] = (drflac_int32)temp3L; + pOutputSamples[i*8+7] = (drflac_int32)temp3R; + } + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample); + pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_int32 shift = unusedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); + right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1; + pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1; + } + } else { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); + right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift); + pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift); + } + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_int32 shift = unusedBitsPerSample; + int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */ + int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */ + uint32x4_t one4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + one4 = vdupq_n_u32(1); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + uint32x4_t mid; + uint32x4_t side; + int32x4_t left; + int32x4_t right; + + mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4)); + + left = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1); + right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1); + + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1; + pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1; + } + } else { + int32x4_t shift4; + + shift -= 1; + shift4 = vdupq_n_s32(shift); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t mid; + uint32x4_t side; + int32x4_t left; + int32x4_t right; + + mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4)); + + left = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4)); + right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4)); + + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift); + pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift); + } + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)); + pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1; + + pOutputSamples[i*8+0] = (drflac_int32)tempL0; + pOutputSamples[i*8+1] = (drflac_int32)tempR0; + pOutputSamples[i*8+2] = (drflac_int32)tempL1; + pOutputSamples[i*8+3] = (drflac_int32)tempR1; + pOutputSamples[i*8+4] = (drflac_int32)tempL2; + pOutputSamples[i*8+5] = (drflac_int32)tempR2; + pOutputSamples[i*8+6] = (drflac_int32)tempL3; + pOutputSamples[i*8+7] = (drflac_int32)tempR3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0); + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0); + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1); + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + int32x4_t shift4_0 = vdupq_n_s32(shift0); + int32x4_t shift4_1 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + int32x4_t left; + int32x4_t right; + + left = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift4_0)); + right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift4_1)); + + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0); + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut) +{ + drflac_uint64 framesRead; + drflac_uint32 unusedBitsPerSample; + + if (pFlac == NULL || framesToRead == 0) { + return 0; + } + + if (pBufferOut == NULL) { + return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); + } + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 32); + unusedBitsPerSample = 32 - pFlac->bitsPerSample; + + framesRead = 0; + while (framesToRead > 0) { + /* If we've run out of samples in this frame, go to the next. */ + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; + drflac_uint64 frameCountThisIteration = framesToRead; + + if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { + frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; + } + + if (channelCount == 2) { + const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; + const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; + + switch (pFlac->currentFLACFrame.header.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + drflac_read_pcm_frames_s32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + drflac_read_pcm_frames_s32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + drflac_read_pcm_frames_s32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + drflac_read_pcm_frames_s32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + } + } else { + /* Generic interleaving. */ + drflac_uint64 i; + for (i = 0; i < frameCountThisIteration; ++i) { + unsigned int j; + for (j = 0; j < channelCount; ++j) { + pBufferOut[(i*channelCount)+j] = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)); + } + } + } + + framesRead += frameCountThisIteration; + pBufferOut += frameCountThisIteration * channelCount; + framesToRead -= frameCountThisIteration; + pFlac->currentPCMFrame += frameCountThisIteration; + pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration; + } + } + + return framesRead; +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_uint32 left = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_uint32 right = left - side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 right0 = left0 - side0; + drflac_uint32 right1 = left1 - side1; + drflac_uint32 right2 = left2 - side2; + drflac_uint32 right3 = left3 - side3; + + left0 >>= 16; + left1 >>= 16; + left2 >>= 16; + left3 >>= 16; + + right0 >>= 16; + right1 >>= 16; + right2 >>= 16; + right3 >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)left0; + pOutputSamples[i*8+1] = (drflac_int16)right0; + pOutputSamples[i*8+2] = (drflac_int16)left1; + pOutputSamples[i*8+3] = (drflac_int16)right1; + pOutputSamples[i*8+4] = (drflac_int16)left2; + pOutputSamples[i*8+5] = (drflac_int16)right2; + pOutputSamples[i*8+6] = (drflac_int16)left3; + pOutputSamples[i*8+7] = (drflac_int16)right3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i right = _mm_sub_epi32(left, side); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t left; + uint32x4_t side; + uint32x4_t right; + + left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + right = vsubq_u32(left, side); + + left = vshrq_n_u32(left, 16); + right = vshrq_n_u32(right, 16); + + drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s16__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_uint32 side = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_uint32 left = right + side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 side0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 side1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 side2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 side3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 left0 = right0 + side0; + drflac_uint32 left1 = right1 + side1; + drflac_uint32 left2 = right2 + side2; + drflac_uint32 left3 = right3 + side3; + + left0 >>= 16; + left1 >>= 16; + left2 >>= 16; + left3 >>= 16; + + right0 >>= 16; + right1 >>= 16; + right2 >>= 16; + right3 >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)left0; + pOutputSamples[i*8+1] = (drflac_int16)right0; + pOutputSamples[i*8+2] = (drflac_int16)left1; + pOutputSamples[i*8+3] = (drflac_int16)right1; + pOutputSamples[i*8+4] = (drflac_int16)left2; + pOutputSamples[i*8+5] = (drflac_int16)right2; + pOutputSamples[i*8+6] = (drflac_int16)left3; + pOutputSamples[i*8+7] = (drflac_int16)right3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + for (i = 0; i < frameCount4; ++i) { + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i left = _mm_add_epi32(right, side); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t side; + uint32x4_t right; + uint32x4_t left; + + side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + left = vaddq_u32(right, side); + + left = vshrq_n_u32(left, 16); + right = vshrq_n_u32(right, 16); + + drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s16__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + drflac_uint32 mid = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample; + + if (shift > 0) { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = (mid0 + side0) << shift; + temp1L = (mid1 + side1) << shift; + temp2L = (mid2 + side2) << shift; + temp3L = (mid3 + side3) << shift; + + temp0R = (mid0 - side0) << shift; + temp1R = (mid1 - side1) << shift; + temp2R = (mid2 - side2) << shift; + temp3R = (mid3 - side3) << shift; + + temp0L >>= 16; + temp1L >>= 16; + temp2L >>= 16; + temp3L >>= 16; + + temp0R >>= 16; + temp1R >>= 16; + temp2R >>= 16; + temp3R >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)temp0L; + pOutputSamples[i*8+1] = (drflac_int16)temp0R; + pOutputSamples[i*8+2] = (drflac_int16)temp1L; + pOutputSamples[i*8+3] = (drflac_int16)temp1R; + pOutputSamples[i*8+4] = (drflac_int16)temp2L; + pOutputSamples[i*8+5] = (drflac_int16)temp2R; + pOutputSamples[i*8+6] = (drflac_int16)temp3L; + pOutputSamples[i*8+7] = (drflac_int16)temp3R; + } + } else { + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = ((drflac_int32)(mid0 + side0) >> 1); + temp1L = ((drflac_int32)(mid1 + side1) >> 1); + temp2L = ((drflac_int32)(mid2 + side2) >> 1); + temp3L = ((drflac_int32)(mid3 + side3) >> 1); + + temp0R = ((drflac_int32)(mid0 - side0) >> 1); + temp1R = ((drflac_int32)(mid1 - side1) >> 1); + temp2R = ((drflac_int32)(mid2 - side2) >> 1); + temp3R = ((drflac_int32)(mid3 - side3) >> 1); + + temp0L >>= 16; + temp1L >>= 16; + temp2L >>= 16; + temp3L >>= 16; + + temp0R >>= 16; + temp1R >>= 16; + temp2R >>= 16; + temp3R >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)temp0L; + pOutputSamples[i*8+1] = (drflac_int16)temp0R; + pOutputSamples[i*8+2] = (drflac_int16)temp1L; + pOutputSamples[i*8+3] = (drflac_int16)temp1R; + pOutputSamples[i*8+4] = (drflac_int16)temp2L; + pOutputSamples[i*8+5] = (drflac_int16)temp2R; + pOutputSamples[i*8+6] = (drflac_int16)temp3L; + pOutputSamples[i*8+7] = (drflac_int16)temp3R; + } + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); + right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16); + } + } else { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); + right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16); + } + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample; + int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */ + int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */ + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + uint32x4_t mid; + uint32x4_t side; + int32x4_t left; + int32x4_t right; + + mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1))); + + left = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1); + right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1); + + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16); + } + } else { + int32x4_t shift4; + + shift -= 1; + shift4 = vdupq_n_s32(shift); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t mid; + uint32x4_t side; + int32x4_t left; + int32x4_t right; + + mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1))); + + left = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4)); + right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4)); + + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16); + } + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s16__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) >> 16); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1; + + tempL0 >>= 16; + tempL1 >>= 16; + tempL2 >>= 16; + tempL3 >>= 16; + + tempR0 >>= 16; + tempR1 >>= 16; + tempR2 >>= 16; + tempR3 >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)tempL0; + pOutputSamples[i*8+1] = (drflac_int16)tempR0; + pOutputSamples[i*8+2] = (drflac_int16)tempL1; + pOutputSamples[i*8+3] = (drflac_int16)tempR1; + pOutputSamples[i*8+4] = (drflac_int16)tempL2; + pOutputSamples[i*8+5] = (drflac_int16)tempR2; + pOutputSamples[i*8+6] = (drflac_int16)tempL3; + pOutputSamples[i*8+7] = (drflac_int16)tempR3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + /* At this point we have results. We can now pack and interleave these into a single __m128i object and then store the in the output buffer. */ + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16); + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + int32x4_t shift0_4 = vdupq_n_s32(shift0); + int32x4_t shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + int32x4_t left; + int32x4_t right; + + left = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4)); + right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4)); + + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut) +{ + drflac_uint64 framesRead; + drflac_uint32 unusedBitsPerSample; + + if (pFlac == NULL || framesToRead == 0) { + return 0; + } + + if (pBufferOut == NULL) { + return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); + } + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 32); + unusedBitsPerSample = 32 - pFlac->bitsPerSample; + + framesRead = 0; + while (framesToRead > 0) { + /* If we've run out of samples in this frame, go to the next. */ + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; + drflac_uint64 frameCountThisIteration = framesToRead; + + if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { + frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; + } + + if (channelCount == 2) { + const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; + const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; + + switch (pFlac->currentFLACFrame.header.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + drflac_read_pcm_frames_s16__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + drflac_read_pcm_frames_s16__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + drflac_read_pcm_frames_s16__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + drflac_read_pcm_frames_s16__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + } + } else { + /* Generic interleaving. */ + drflac_uint64 i; + for (i = 0; i < frameCountThisIteration; ++i) { + unsigned int j; + for (j = 0; j < channelCount; ++j) { + drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)); + pBufferOut[(i*channelCount)+j] = (drflac_int16)(sampleS32 >> 16); + } + } + } + + framesRead += frameCountThisIteration; + pBufferOut += frameCountThisIteration * channelCount; + framesToRead -= frameCountThisIteration; + pFlac->currentPCMFrame += frameCountThisIteration; + pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration; + } + } + + return framesRead; +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_uint32 left = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (float)((drflac_int32)left / 2147483648.0); + pOutputSamples[i*2+1] = (float)((drflac_int32)right / 2147483648.0); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + float factor = 1 / 2147483648.0; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 right0 = left0 - side0; + drflac_uint32 right1 = left1 - side1; + drflac_uint32 right2 = left2 - side2; + drflac_uint32 right3 = left3 - side3; + + pOutputSamples[i*8+0] = (drflac_int32)left0 * factor; + pOutputSamples[i*8+1] = (drflac_int32)right0 * factor; + pOutputSamples[i*8+2] = (drflac_int32)left1 * factor; + pOutputSamples[i*8+3] = (drflac_int32)right1 * factor; + pOutputSamples[i*8+4] = (drflac_int32)left2 * factor; + pOutputSamples[i*8+5] = (drflac_int32)right2 * factor; + pOutputSamples[i*8+6] = (drflac_int32)left3 * factor; + pOutputSamples[i*8+7] = (drflac_int32)right3 * factor; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left * factor; + pOutputSamples[i*2+1] = (drflac_int32)right * factor; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + __m128 factor; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor = _mm_set1_ps(1.0f / 8388608.0f); + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i right = _mm_sub_epi32(left, side); + __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor); + __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left / 8388608.0f; + pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + float32x4_t factor4; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor4 = vdupq_n_f32(1.0f / 8388608.0f); + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t left; + uint32x4_t side; + uint32x4_t right; + float32x4_t leftf; + float32x4_t rightf; + + left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + right = vsubq_u32(left, side); + leftf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left / 8388608.0f; + pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_uint32 side = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (float)((drflac_int32)left / 2147483648.0); + pOutputSamples[i*2+1] = (float)((drflac_int32)right / 2147483648.0); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + float factor = 1 / 2147483648.0; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 side0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 side1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 side2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 side3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 left0 = right0 + side0; + drflac_uint32 left1 = right1 + side1; + drflac_uint32 left2 = right2 + side2; + drflac_uint32 left3 = right3 + side3; + + pOutputSamples[i*8+0] = (drflac_int32)left0 * factor; + pOutputSamples[i*8+1] = (drflac_int32)right0 * factor; + pOutputSamples[i*8+2] = (drflac_int32)left1 * factor; + pOutputSamples[i*8+3] = (drflac_int32)right1 * factor; + pOutputSamples[i*8+4] = (drflac_int32)left2 * factor; + pOutputSamples[i*8+5] = (drflac_int32)right2 * factor; + pOutputSamples[i*8+6] = (drflac_int32)left3 * factor; + pOutputSamples[i*8+7] = (drflac_int32)right3 * factor; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left * factor; + pOutputSamples[i*2+1] = (drflac_int32)right * factor; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + __m128 factor; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor = _mm_set1_ps(1.0f / 8388608.0f); + + for (i = 0; i < frameCount4; ++i) { + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i left = _mm_add_epi32(right, side); + __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor); + __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left / 8388608.0f; + pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + float32x4_t factor4; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor4 = vdupq_n_f32(1.0f / 8388608.0f); + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t side; + uint32x4_t right; + uint32x4_t left; + float32x4_t leftf; + float32x4_t rightf; + + side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + left = vaddq_u32(right, side); + leftf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left / 8388608.0f; + pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + drflac_uint32 mid = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (float)((((drflac_int32)(mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0); + pOutputSamples[i*2+1] = (float)((((drflac_int32)(mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample; + float factor = 1 / 2147483648.0; + + if (shift > 0) { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = (mid0 + side0) << shift; + temp1L = (mid1 + side1) << shift; + temp2L = (mid2 + side2) << shift; + temp3L = (mid3 + side3) << shift; + + temp0R = (mid0 - side0) << shift; + temp1R = (mid1 - side1) << shift; + temp2R = (mid2 - side2) << shift; + temp3R = (mid3 - side3) << shift; + + pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor; + pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor; + pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor; + pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor; + pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor; + pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor; + pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor; + pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor; + } + } else { + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1); + temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1); + temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1); + temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1); + + temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1); + temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1); + temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1); + temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1); + + pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor; + pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor; + pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor; + pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor; + pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor; + pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor; + pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor; + pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor; + } + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) * factor; + pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) * factor; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample - 8; + float factor; + __m128 factor128; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor = 1.0f / 8388608.0f; + factor128 = _mm_set1_ps(factor); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i tempL; + __m128i tempR; + __m128 leftf; + __m128 rightf; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + tempL = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); + tempR = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); + + leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128); + rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor; + pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor; + } + } else { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i tempL; + __m128i tempR; + __m128 leftf; + __m128 rightf; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + tempL = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); + tempR = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); + + leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128); + rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor; + pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor; + } + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample - 8; + float factor; + float32x4_t factor4; + int32x4_t shift4; + int32x4_t wbps0_4; /* Wasted Bits Per Sample */ + int32x4_t wbps1_4; /* Wasted Bits Per Sample */ + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor = 1.0f / 8388608.0f; + factor4 = vdupq_n_f32(factor); + wbps0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + wbps1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + int32x4_t lefti; + int32x4_t righti; + float32x4_t leftf; + float32x4_t rightf; + + uint32x4_t mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4); + uint32x4_t side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1))); + + lefti = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1); + righti = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1); + + leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor; + pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor; + } + } else { + shift -= 1; + shift4 = vdupq_n_s32(shift); + for (i = 0; i < frameCount4; ++i) { + uint32x4_t mid; + uint32x4_t side; + int32x4_t lefti; + int32x4_t righti; + float32x4_t leftf; + float32x4_t rightf; + + mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1))); + + lefti = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4)); + righti = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4)); + + leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor; + pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor; + } + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + pOutputSamples[i*2+0] = (float)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0); + pOutputSamples[i*2+1] = (float)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + float factor = 1 / 2147483648.0; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1; + + pOutputSamples[i*8+0] = (drflac_int32)tempL0 * factor; + pOutputSamples[i*8+1] = (drflac_int32)tempR0 * factor; + pOutputSamples[i*8+2] = (drflac_int32)tempL1 * factor; + pOutputSamples[i*8+3] = (drflac_int32)tempR1 * factor; + pOutputSamples[i*8+4] = (drflac_int32)tempL2 * factor; + pOutputSamples[i*8+5] = (drflac_int32)tempR2 * factor; + pOutputSamples[i*8+6] = (drflac_int32)tempL3 * factor; + pOutputSamples[i*8+7] = (drflac_int32)tempR3 * factor; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor; + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + + float factor = 1.0f / 8388608.0f; + __m128 factor128 = _mm_set1_ps(factor); + + for (i = 0; i < frameCount4; ++i) { + __m128i lefti; + __m128i righti; + __m128 leftf; + __m128 rightf; + + lefti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + righti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + + leftf = _mm_mul_ps(_mm_cvtepi32_ps(lefti), factor128); + rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor; + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + + float factor = 1.0f / 8388608.0f; + float32x4_t factor4 = vdupq_n_f32(factor); + int32x4_t shift0_4 = vdupq_n_s32(shift0); + int32x4_t shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + int32x4_t lefti; + int32x4_t righti; + float32x4_t leftf; + float32x4_t rightf; + + lefti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4)); + righti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4)); + + leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor; + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut) +{ + drflac_uint64 framesRead; + drflac_uint32 unusedBitsPerSample; + + if (pFlac == NULL || framesToRead == 0) { + return 0; + } + + if (pBufferOut == NULL) { + return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); + } + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 32); + unusedBitsPerSample = 32 - pFlac->bitsPerSample; + + framesRead = 0; + while (framesToRead > 0) { + /* If we've run out of samples in this frame, go to the next. */ + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; + drflac_uint64 frameCountThisIteration = framesToRead; + + if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { + frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; + } + + if (channelCount == 2) { + const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; + const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; + + switch (pFlac->currentFLACFrame.header.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + } + } else { + /* Generic interleaving. */ + drflac_uint64 i; + for (i = 0; i < frameCountThisIteration; ++i) { + unsigned int j; + for (j = 0; j < channelCount; ++j) { + drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)); + pBufferOut[(i*channelCount)+j] = (float)(sampleS32 / 2147483648.0); + } + } + } + + framesRead += frameCountThisIteration; + pBufferOut += frameCountThisIteration * channelCount; + framesToRead -= frameCountThisIteration; + pFlac->currentPCMFrame += frameCountThisIteration; + pFlac->currentFLACFrame.pcmFramesRemaining -= (unsigned int)frameCountThisIteration; + } + } + + return framesRead; +} + + +DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + if (pFlac == NULL) { + return DRFLAC_FALSE; + } + + /* Don't do anything if we're already on the seek point. */ + if (pFlac->currentPCMFrame == pcmFrameIndex) { + return DRFLAC_TRUE; + } + + /* + If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present + when the decoder was opened. + */ + if (pFlac->firstFLACFramePosInBytes == 0) { + return DRFLAC_FALSE; + } + + if (pcmFrameIndex == 0) { + pFlac->currentPCMFrame = 0; + return drflac__seek_to_first_frame(pFlac); + } else { + drflac_bool32 wasSuccessful = DRFLAC_FALSE; + drflac_uint64 originalPCMFrame = pFlac->currentPCMFrame; + + /* Clamp the sample to the end. */ + if (pcmFrameIndex > pFlac->totalPCMFrameCount) { + pcmFrameIndex = pFlac->totalPCMFrameCount; + } + + /* If the target sample and the current sample are in the same frame we just move the position forward. */ + if (pcmFrameIndex > pFlac->currentPCMFrame) { + /* Forward. */ + drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex - pFlac->currentPCMFrame); + if (pFlac->currentFLACFrame.pcmFramesRemaining > offset) { + pFlac->currentFLACFrame.pcmFramesRemaining -= offset; + pFlac->currentPCMFrame = pcmFrameIndex; + return DRFLAC_TRUE; + } + } else { + /* Backward. */ + drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentPCMFrame - pcmFrameIndex); + drflac_uint32 currentFLACFramePCMFrameCount = pFlac->currentFLACFrame.header.blockSizeInPCMFrames; + drflac_uint32 currentFLACFramePCMFramesConsumed = currentFLACFramePCMFrameCount - pFlac->currentFLACFrame.pcmFramesRemaining; + if (currentFLACFramePCMFramesConsumed > offsetAbs) { + pFlac->currentFLACFrame.pcmFramesRemaining += offsetAbs; + pFlac->currentPCMFrame = pcmFrameIndex; + return DRFLAC_TRUE; + } + } + + /* + Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so + we'll instead use Ogg's natural seeking facility. + */ +#ifndef DR_FLAC_NO_OGG + if (pFlac->container == drflac_container_ogg) + { + wasSuccessful = drflac_ogg__seek_to_pcm_frame(pFlac, pcmFrameIndex); + } + else +#endif + { + /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */ + if (/*!wasSuccessful && */!pFlac->_noSeekTableSeek) { + wasSuccessful = drflac__seek_to_pcm_frame__seek_table(pFlac, pcmFrameIndex); + } + +#if !defined(DR_FLAC_NO_CRC) + /* Fall back to binary search if seek table seeking fails. This requires the length of the stream to be known. */ + if (!wasSuccessful && !pFlac->_noBinarySearchSeek && pFlac->totalPCMFrameCount > 0) { + wasSuccessful = drflac__seek_to_pcm_frame__binary_search(pFlac, pcmFrameIndex); + } +#endif + + /* Fall back to brute force if all else fails. */ + if (!wasSuccessful && !pFlac->_noBruteForceSeek) { + wasSuccessful = drflac__seek_to_pcm_frame__brute_force(pFlac, pcmFrameIndex); + } + } + + if (wasSuccessful) { + pFlac->currentPCMFrame = pcmFrameIndex; + } else { + /* Seek failed. Try putting the decoder back to it's original state. */ + if (drflac_seek_to_pcm_frame(pFlac, originalPCMFrame) == DRFLAC_FALSE) { + /* Failed to seek back to the original PCM frame. Fall back to 0. */ + drflac_seek_to_pcm_frame(pFlac, 0); + } + } + + return wasSuccessful; + } +} + + + +/* High Level APIs */ + +/* SIZE_MAX */ +#if defined(SIZE_MAX) + #define DRFLAC_SIZE_MAX SIZE_MAX +#else + #if defined(DRFLAC_64BIT) + #define DRFLAC_SIZE_MAX ((drflac_uint64)0xFFFFFFFFFFFFFFFF) + #else + #define DRFLAC_SIZE_MAX 0xFFFFFFFF + #endif +#endif +/* End SIZE_MAX */ + + +/* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */ +#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \ +static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\ +{ \ + type* pSampleData = NULL; \ + drflac_uint64 totalPCMFrameCount; \ + \ + DRFLAC_ASSERT(pFlac != NULL); \ + \ + totalPCMFrameCount = pFlac->totalPCMFrameCount; \ + \ + if (totalPCMFrameCount == 0) { \ + type buffer[4096]; \ + drflac_uint64 pcmFramesRead; \ + size_t sampleDataBufferSize = sizeof(buffer); \ + \ + pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks); \ + if (pSampleData == NULL) { \ + goto on_error; \ + } \ + \ + while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) { \ + if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) { \ + type* pNewSampleData; \ + size_t newSampleDataBufferSize; \ + \ + newSampleDataBufferSize = sampleDataBufferSize * 2; \ + pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks); \ + if (pNewSampleData == NULL) { \ + drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks); \ + goto on_error; \ + } \ + \ + sampleDataBufferSize = newSampleDataBufferSize; \ + pSampleData = pNewSampleData; \ + } \ + \ + DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type))); \ + totalPCMFrameCount += pcmFramesRead; \ + } \ + \ + /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to \ + protect those ears from random noise! */ \ + DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type))); \ + } else { \ + drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type); \ + if (dataSize > (drflac_uint64)DRFLAC_SIZE_MAX) { \ + goto on_error; /* The decoded data is too big. */ \ + } \ + \ + pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks); /* <-- Safe cast as per the check above. */ \ + if (pSampleData == NULL) { \ + goto on_error; \ + } \ + \ + totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData); \ + } \ + \ + if (sampleRateOut) *sampleRateOut = pFlac->sampleRate; \ + if (channelsOut) *channelsOut = pFlac->channels; \ + if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount; \ + \ + drflac_close(pFlac); \ + return pSampleData; \ + \ +on_error: \ + drflac_close(pFlac); \ + return NULL; \ +} + +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32) +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16) +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float) + +DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalPCMFrameCountOut) { + *totalPCMFrameCountOut = 0; + } + + pFlac = drflac_open(onRead, onSeek, onTell, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); +} + +DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalPCMFrameCountOut) { + *totalPCMFrameCountOut = 0; + } + + pFlac = drflac_open(onRead, onSeek, onTell, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); +} + +DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalPCMFrameCountOut) { + *totalPCMFrameCountOut = 0; + } + + pFlac = drflac_open(onRead, onSeek, onTell, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); +} + +#ifndef DR_FLAC_NO_STDIO +DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_file(filename, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount); +} + +DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_file(filename, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount); +} + +DRFLAC_API float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_file(filename, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount); +} +#endif + +DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount); +} + +DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount); +} + +DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount); +} + + +DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks != NULL) { + drflac__free_from_callbacks(p, pAllocationCallbacks); + } else { + drflac__free_default(p, NULL); + } +} + + + + +DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments) +{ + if (pIter == NULL) { + return; + } + + pIter->countRemaining = commentCount; + pIter->pRunningData = (const char*)pComments; +} + +DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut) +{ + drflac_int32 length; + const char* pComment; + + /* Safety. */ + if (pCommentLengthOut) { + *pCommentLengthOut = 0; + } + + if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) { + return NULL; + } + + length = drflac__le2host_32_ptr_unaligned(pIter->pRunningData); + pIter->pRunningData += 4; + + pComment = pIter->pRunningData; + pIter->pRunningData += length; + pIter->countRemaining -= 1; + + if (pCommentLengthOut) { + *pCommentLengthOut = length; + } + + return pComment; +} + + + + +DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData) +{ + if (pIter == NULL) { + return; + } + + pIter->countRemaining = trackCount; + pIter->pRunningData = (const char*)pTrackData; +} + +DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack) +{ + drflac_cuesheet_track cuesheetTrack; + const char* pRunningData; + drflac_uint64 offsetHi; + drflac_uint64 offsetLo; + + if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) { + return DRFLAC_FALSE; + } + + pRunningData = pIter->pRunningData; + + offsetHi = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + offsetLo = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + cuesheetTrack.offset = offsetLo | (offsetHi << 32); + cuesheetTrack.trackNumber = pRunningData[0]; pRunningData += 1; + DRFLAC_COPY_MEMORY(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC)); pRunningData += 12; + cuesheetTrack.isAudio = (pRunningData[0] & 0x80) != 0; + cuesheetTrack.preEmphasis = (pRunningData[0] & 0x40) != 0; pRunningData += 14; + cuesheetTrack.indexCount = pRunningData[0]; pRunningData += 1; + cuesheetTrack.pIndexPoints = (const drflac_cuesheet_track_index*)pRunningData; pRunningData += cuesheetTrack.indexCount * sizeof(drflac_cuesheet_track_index); + + pIter->pRunningData = pRunningData; + pIter->countRemaining -= 1; + + if (pCuesheetTrack) { + *pCuesheetTrack = cuesheetTrack; + } + + return DRFLAC_TRUE; +} + +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) + #pragma GCC diagnostic pop +#endif +#endif /* dr_flac_c */ +#endif /* DR_FLAC_IMPLEMENTATION */ + + +/* +REVISION HISTORY +================ +v0.13.2 - TBD + - Improve robustness of the parsing of picture metadata to improve support for memory constrained embedded devices. + - Fix a warning about an assigned by unused variable. + +v0.13.1 - 2025-09-10 + - Fix an error with the NXDK build. + +v0.13.0 - 2025-07-23 + - API CHANGE: Seek origin enums have been renamed to match the naming convention used by other dr_libs libraries: + - drflac_seek_origin_start -> DRFLAC_SEEK_SET + - drflac_seek_origin_current -> DRFLAC_SEEK_CUR + - DRFLAC_SEEK_END (new) + - API CHANGE: A new seek origin has been added to allow seeking from the end of the file. If you implement your own `onSeek` callback, you should now detect and handle `DRFLAC_SEEK_END`. If seeking to the end is not supported, return `DRFLAC_FALSE`. If you only use `*_open_file()` or `*_open_memory()`, you need not change anything. + - API CHANGE: An `onTell` callback has been added to the following functions: + - drflac_open() + - drflac_open_relaxed() + - drflac_open_with_metadata() + - drflac_open_with_metadata_relaxed() + - drflac_open_and_read_pcm_frames_s32() + - drflac_open_and_read_pcm_frames_s16() + - drflac_open_and_read_pcm_frames_f32() + - Fix compilation for AIX OS. + +v0.12.43 - 2024-12-17 + - Fix a possible buffer overflow during decoding. + - Improve detection of ARM64EC + +v0.12.42 - 2023-11-02 + - Fix build for ARMv6-M. + - Fix a compilation warning with GCC. + +v0.12.41 - 2023-06-17 + - Fix an incorrect date in revision history. No functional change. + +v0.12.40 - 2023-05-22 + - Minor code restructure. No functional change. + +v0.12.39 - 2022-09-17 + - Fix compilation with DJGPP. + - Fix compilation error with Visual Studio 2019 and the ARM build. + - Fix an error with SSE 4.1 detection. + - Add support for disabling wchar_t with DR_WAV_NO_WCHAR. + - Improve compatibility with compilers which lack support for explicit struct packing. + - Improve compatibility with low-end and embedded hardware by reducing the amount of stack + allocation when loading an Ogg encapsulated file. + +v0.12.38 - 2022-04-10 + - Fix compilation error on older versions of GCC. + +v0.12.37 - 2022-02-12 + - Improve ARM detection. + +v0.12.36 - 2022-02-07 + - Fix a compilation error with the ARM build. + +v0.12.35 - 2022-02-06 + - Fix a bug due to underestimating the amount of precision required for the prediction stage. + - Fix some bugs found from fuzz testing. + +v0.12.34 - 2022-01-07 + - Fix some misalignment bugs when reading metadata. + +v0.12.33 - 2021-12-22 + - Fix a bug with seeking when the seek table does not start at PCM frame 0. + +v0.12.32 - 2021-12-11 + - Fix a warning with Clang. + +v0.12.31 - 2021-08-16 + - Silence some warnings. + +v0.12.30 - 2021-07-31 + - Fix platform detection for ARM64. + +v0.12.29 - 2021-04-02 + - Fix a bug where the running PCM frame index is set to an invalid value when over-seeking. + - Fix a decoding error due to an incorrect validation check. + +v0.12.28 - 2021-02-21 + - Fix a warning due to referencing _MSC_VER when it is undefined. + +v0.12.27 - 2021-01-31 + - Fix a static analysis warning. + +v0.12.26 - 2021-01-17 + - Fix a compilation warning due to _BSD_SOURCE being deprecated. + +v0.12.25 - 2020-12-26 + - Update documentation. + +v0.12.24 - 2020-11-29 + - Fix ARM64/NEON detection when compiling with MSVC. + +v0.12.23 - 2020-11-21 + - Fix compilation with OpenWatcom. + +v0.12.22 - 2020-11-01 + - Fix an error with the previous release. + +v0.12.21 - 2020-11-01 + - Fix a possible deadlock when seeking. + - Improve compiler support for older versions of GCC. + +v0.12.20 - 2020-09-08 + - Fix a compilation error on older compilers. + +v0.12.19 - 2020-08-30 + - Fix a bug due to an undefined 32-bit shift. + +v0.12.18 - 2020-08-14 + - Fix a crash when compiling with clang-cl. + +v0.12.17 - 2020-08-02 + - Simplify sized types. + +v0.12.16 - 2020-07-25 + - Fix a compilation warning. + +v0.12.15 - 2020-07-06 + - Check for negative LPC shifts and return an error. + +v0.12.14 - 2020-06-23 + - Add include guard for the implementation section. + +v0.12.13 - 2020-05-16 + - Add compile-time and run-time version querying. + - DRFLAC_VERSION_MINOR + - DRFLAC_VERSION_MAJOR + - DRFLAC_VERSION_REVISION + - DRFLAC_VERSION_STRING + - drflac_version() + - drflac_version_string() + +v0.12.12 - 2020-04-30 + - Fix compilation errors with VC6. + +v0.12.11 - 2020-04-19 + - Fix some pedantic warnings. + - Fix some undefined behaviour warnings. + +v0.12.10 - 2020-04-10 + - Fix some bugs when trying to seek with an invalid seek table. + +v0.12.9 - 2020-04-05 + - Fix warnings. + +v0.12.8 - 2020-04-04 + - Add drflac_open_file_w() and drflac_open_file_with_metadata_w(). + - Fix some static analysis warnings. + - Minor documentation updates. + +v0.12.7 - 2020-03-14 + - Fix compilation errors with VC6. + +v0.12.6 - 2020-03-07 + - Fix compilation error with Visual Studio .NET 2003. + +v0.12.5 - 2020-01-30 + - Silence some static analysis warnings. + +v0.12.4 - 2020-01-29 + - Silence some static analysis warnings. + +v0.12.3 - 2019-12-02 + - Fix some warnings when compiling with GCC and the -Og flag. + - Fix a crash in out-of-memory situations. + - Fix potential integer overflow bug. + - Fix some static analysis warnings. + - Fix a possible crash when using custom memory allocators without a custom realloc() implementation. + - Fix a bug with binary search seeking where the bits per sample is not a multiple of 8. + +v0.12.2 - 2019-10-07 + - Internal code clean up. + +v0.12.1 - 2019-09-29 + - Fix some Clang Static Analyzer warnings. + - Fix an unused variable warning. + +v0.12.0 - 2019-09-23 + - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation + routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs: + - drflac_open() + - drflac_open_relaxed() + - drflac_open_with_metadata() + - drflac_open_with_metadata_relaxed() + - drflac_open_file() + - drflac_open_file_with_metadata() + - drflac_open_memory() + - drflac_open_memory_with_metadata() + - drflac_open_and_read_pcm_frames_s32() + - drflac_open_and_read_pcm_frames_s16() + - drflac_open_and_read_pcm_frames_f32() + - drflac_open_file_and_read_pcm_frames_s32() + - drflac_open_file_and_read_pcm_frames_s16() + - drflac_open_file_and_read_pcm_frames_f32() + - drflac_open_memory_and_read_pcm_frames_s32() + - drflac_open_memory_and_read_pcm_frames_s16() + - drflac_open_memory_and_read_pcm_frames_f32() + Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use + DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. + - Remove deprecated APIs: + - drflac_read_s32() + - drflac_read_s16() + - drflac_read_f32() + - drflac_seek_to_sample() + - drflac_open_and_decode_s32() + - drflac_open_and_decode_s16() + - drflac_open_and_decode_f32() + - drflac_open_and_decode_file_s32() + - drflac_open_and_decode_file_s16() + - drflac_open_and_decode_file_f32() + - drflac_open_and_decode_memory_s32() + - drflac_open_and_decode_memory_s16() + - drflac_open_and_decode_memory_f32() + - Remove drflac.totalSampleCount which is now replaced with drflac.totalPCMFrameCount. You can emulate drflac.totalSampleCount + by doing pFlac->totalPCMFrameCount*pFlac->channels. + - Rename drflac.currentFrame to drflac.currentFLACFrame to remove ambiguity with PCM frames. + - Fix errors when seeking to the end of a stream. + - Optimizations to seeking. + - SSE improvements and optimizations. + - ARM NEON optimizations. + - Optimizations to drflac_read_pcm_frames_s16(). + - Optimizations to drflac_read_pcm_frames_s32(). + +v0.11.10 - 2019-06-26 + - Fix a compiler error. + +v0.11.9 - 2019-06-16 + - Silence some ThreadSanitizer warnings. + +v0.11.8 - 2019-05-21 + - Fix warnings. + +v0.11.7 - 2019-05-06 + - C89 fixes. + +v0.11.6 - 2019-05-05 + - Add support for C89. + - Fix a compiler warning when CRC is disabled. + - Change license to choice of public domain or MIT-0. + +v0.11.5 - 2019-04-19 + - Fix a compiler error with GCC. + +v0.11.4 - 2019-04-17 + - Fix some warnings with GCC when compiling with -std=c99. + +v0.11.3 - 2019-04-07 + - Silence warnings with GCC. + +v0.11.2 - 2019-03-10 + - Fix a warning. + +v0.11.1 - 2019-02-17 + - Fix a potential bug with seeking. + +v0.11.0 - 2018-12-16 + - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with + drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take + and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by + dividing it by the channel count, and then do the same with the return value. + - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as + the changes to drflac_read_*() apply. + - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as + the changes to drflac_read_*() apply. + - Optimizations. + +v0.10.0 - 2018-09-11 + - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you + need to do it yourself via the callback API. + - Fix the clang build. + - Fix undefined behavior. + - Fix errors with CUESHEET metdata blocks. + - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the + Vorbis comment API. + - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams. + - Minor optimizations. + +v0.9.11 - 2018-08-29 + - Fix a bug with sample reconstruction. + +v0.9.10 - 2018-08-07 + - Improve 64-bit detection. + +v0.9.9 - 2018-08-05 + - Fix C++ build on older versions of GCC. + +v0.9.8 - 2018-07-24 + - Fix compilation errors. + +v0.9.7 - 2018-07-05 + - Fix a warning. + +v0.9.6 - 2018-06-29 + - Fix some typos. + +v0.9.5 - 2018-06-23 + - Fix some warnings. + +v0.9.4 - 2018-06-14 + - Optimizations to seeking. + - Clean up. + +v0.9.3 - 2018-05-22 + - Bug fix. + +v0.9.2 - 2018-05-12 + - Fix a compilation error due to a missing break statement. + +v0.9.1 - 2018-04-29 + - Fix compilation error with Clang. + +v0.9 - 2018-04-24 + - Fix Clang build. + - Start using major.minor.revision versioning. + +v0.8g - 2018-04-19 + - Fix build on non-x86/x64 architectures. + +v0.8f - 2018-02-02 + - Stop pretending to support changing rate/channels mid stream. + +v0.8e - 2018-02-01 + - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream. + - Fix a crash the the Rice partition order is invalid. + +v0.8d - 2017-09-22 + - Add support for decoding streams with ID3 tags. ID3 tags are just skipped. + +v0.8c - 2017-09-07 + - Fix warning on non-x86/x64 architectures. + +v0.8b - 2017-08-19 + - Fix build on non-x86/x64 architectures. + +v0.8a - 2017-08-13 + - A small optimization for the Clang build. + +v0.8 - 2017-08-12 + - API CHANGE: Rename dr_* types to drflac_*. + - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation. + - Add support for custom implementations of malloc(), realloc(), etc. + - Add CRC checking to Ogg encapsulated streams. + - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported. + - Bug fixes. + +v0.7 - 2017-07-23 + - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed(). + +v0.6 - 2017-07-22 + - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they + never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame. + +v0.5 - 2017-07-16 + - Fix typos. + - Change drflac_bool* types to unsigned. + - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC. + +v0.4f - 2017-03-10 + - Fix a couple of bugs with the bitstreaming code. + +v0.4e - 2017-02-17 + - Fix some warnings. + +v0.4d - 2016-12-26 + - Add support for 32-bit floating-point PCM decoding. + - Use drflac_int* and drflac_uint* sized types to improve compiler support. + - Minor improvements to documentation. + +v0.4c - 2016-12-26 + - Add support for signed 16-bit integer PCM decoding. + +v0.4b - 2016-10-23 + - A minor change to drflac_bool8 and drflac_bool32 types. + +v0.4a - 2016-10-11 + - Rename drBool32 to drflac_bool32 for styling consistency. + +v0.4 - 2016-09-29 + - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type. + - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32(). + - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to + keep it consistent with drflac_audio. + +v0.3f - 2016-09-21 + - Fix a warning with GCC. + +v0.3e - 2016-09-18 + - Fixed a bug where GCC 4.3+ was not getting properly identified. + - Fixed a few typos. + - Changed date formats to ISO 8601 (YYYY-MM-DD). + +v0.3d - 2016-06-11 + - Minor clean up. + +v0.3c - 2016-05-28 + - Fixed compilation error. + +v0.3b - 2016-05-16 + - Fixed Linux/GCC build. + - Updated documentation. + +v0.3a - 2016-05-15 + - Minor fixes to documentation. + +v0.3 - 2016-05-11 + - Optimizations. Now at about parity with the reference implementation on 32-bit builds. + - Lots of clean up. + +v0.2b - 2016-05-10 + - Bug fixes. + +v0.2a - 2016-05-10 + - Made drflac_open_and_decode() more robust. + - Removed an unused debugging variable + +v0.2 - 2016-05-09 + - Added support for Ogg encapsulation. + - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek + should be relative to the start or the current position. Also changes the seeking rules such that + seeking offsets will never be negative. + - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count. + +v0.1b - 2016-05-07 + - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize. + - Removed a stale comment. + +v0.1a - 2016-05-05 + - Minor formatting changes. + - Fixed a warning on the GCC build. + +v0.1 - 2016-05-03 + - Initial versioned release. +*/ + +/* +This software is available as a choice of the following licenses. Choose +whichever you prefer. + +=============================================================================== +ALTERNATIVE 1 - Public Domain (www.unlicense.org) +=============================================================================== +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. + +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to + +=============================================================================== +ALTERNATIVE 2 - MIT No Attribution +=============================================================================== +Copyright 2023 David Reid + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ diff --git a/src/lib/dr_mp3.h b/src/lib/dr_mp3.h new file mode 100644 index 0000000..e3bae7a --- /dev/null +++ b/src/lib/dr_mp3.h @@ -0,0 +1,5369 @@ +/* +MP3 audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file. +dr_mp3 - v0.7.2 - TBD + +David Reid - mackron@gmail.com + +GitHub: https://github.com/mackron/dr_libs + +Based on minimp3 (https://github.com/lieff/minimp3) which is where the real work was done. See the bottom of this file for differences between minimp3 and dr_mp3. +*/ + +/* +Introduction +============= +dr_mp3 is a single file library. To use it, do something like the following in one .c file. + + ```c + #define DR_MP3_IMPLEMENTATION + #include "dr_mp3.h" + ``` + +You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, do something like the following: + + ```c + drmp3 mp3; + if (!drmp3_init_file(&mp3, "MySong.mp3", NULL)) { + // Failed to open file + } + + ... + + drmp3_uint64 framesRead = drmp3_read_pcm_frames_f32(pMP3, framesToRead, pFrames); + ``` + +The drmp3 object is transparent so you can get access to the channel count and sample rate like so: + + ``` + drmp3_uint32 channels = mp3.channels; + drmp3_uint32 sampleRate = mp3.sampleRate; + ``` + +The example above initializes a decoder from a file, but you can also initialize it from a block of memory and read and seek callbacks with +`drmp3_init_memory()` and `drmp3_init()` respectively. + +You do not need to do any annoying memory management when reading PCM frames - this is all managed internally. You can request any number of PCM frames in each +call to `drmp3_read_pcm_frames_f32()` and it will return as many PCM frames as it can, up to the requested amount. + +You can also decode an entire file in one go with `drmp3_open_and_read_pcm_frames_f32()`, `drmp3_open_memory_and_read_pcm_frames_f32()` and +`drmp3_open_file_and_read_pcm_frames_f32()`. + + +Build Options +============= +#define these options before including this file. + +#define DR_MP3_NO_STDIO + Disable drmp3_init_file(), etc. + +#define DR_MP3_NO_SIMD + Disable SIMD optimizations. +*/ + +#ifndef dr_mp3_h +#define dr_mp3_h + +#ifdef __cplusplus +extern "C" { +#endif + +#define DRMP3_STRINGIFY(x) #x +#define DRMP3_XSTRINGIFY(x) DRMP3_STRINGIFY(x) + +#define DRMP3_VERSION_MAJOR 0 +#define DRMP3_VERSION_MINOR 7 +#define DRMP3_VERSION_REVISION 2 +#define DRMP3_VERSION_STRING DRMP3_XSTRINGIFY(DRMP3_VERSION_MAJOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_MINOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_REVISION) + +#include /* For size_t. */ + +/* Sized Types */ +typedef signed char drmp3_int8; +typedef unsigned char drmp3_uint8; +typedef signed short drmp3_int16; +typedef unsigned short drmp3_uint16; +typedef signed int drmp3_int32; +typedef unsigned int drmp3_uint32; +#if defined(_MSC_VER) && !defined(__clang__) + typedef signed __int64 drmp3_int64; + typedef unsigned __int64 drmp3_uint64; +#else + #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wlong-long" + #if defined(__clang__) + #pragma GCC diagnostic ignored "-Wc++11-long-long" + #endif + #endif + typedef signed long long drmp3_int64; + typedef unsigned long long drmp3_uint64; + #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) + #pragma GCC diagnostic pop + #endif +#endif +#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) || defined(__powerpc64__) + typedef drmp3_uint64 drmp3_uintptr; +#else + typedef drmp3_uint32 drmp3_uintptr; +#endif +typedef drmp3_uint8 drmp3_bool8; +typedef drmp3_uint32 drmp3_bool32; +#define DRMP3_TRUE 1 +#define DRMP3_FALSE 0 + +/* Weird shifting syntax is for VC6 compatibility. */ +#define DRMP3_UINT64_MAX (((drmp3_uint64)0xFFFFFFFF << 32) | (drmp3_uint64)0xFFFFFFFF) +/* End Sized Types */ + +/* Decorations */ +#if !defined(DRMP3_API) + #if defined(DRMP3_DLL) + #if defined(_WIN32) + #define DRMP3_DLL_IMPORT __declspec(dllimport) + #define DRMP3_DLL_EXPORT __declspec(dllexport) + #define DRMP3_DLL_PRIVATE static + #else + #if defined(__GNUC__) && __GNUC__ >= 4 + #define DRMP3_DLL_IMPORT __attribute__((visibility("default"))) + #define DRMP3_DLL_EXPORT __attribute__((visibility("default"))) + #define DRMP3_DLL_PRIVATE __attribute__((visibility("hidden"))) + #else + #define DRMP3_DLL_IMPORT + #define DRMP3_DLL_EXPORT + #define DRMP3_DLL_PRIVATE static + #endif + #endif + + #if defined(DR_MP3_IMPLEMENTATION) + #define DRMP3_API DRMP3_DLL_EXPORT + #else + #define DRMP3_API DRMP3_DLL_IMPORT + #endif + #define DRMP3_PRIVATE DRMP3_DLL_PRIVATE + #else + #define DRMP3_API extern + #define DRMP3_PRIVATE static + #endif +#endif +/* End Decorations */ + +/* Result Codes */ +typedef drmp3_int32 drmp3_result; +#define DRMP3_SUCCESS 0 +#define DRMP3_ERROR -1 /* A generic error. */ +#define DRMP3_INVALID_ARGS -2 +#define DRMP3_INVALID_OPERATION -3 +#define DRMP3_OUT_OF_MEMORY -4 +#define DRMP3_OUT_OF_RANGE -5 +#define DRMP3_ACCESS_DENIED -6 +#define DRMP3_DOES_NOT_EXIST -7 +#define DRMP3_ALREADY_EXISTS -8 +#define DRMP3_TOO_MANY_OPEN_FILES -9 +#define DRMP3_INVALID_FILE -10 +#define DRMP3_TOO_BIG -11 +#define DRMP3_PATH_TOO_LONG -12 +#define DRMP3_NAME_TOO_LONG -13 +#define DRMP3_NOT_DIRECTORY -14 +#define DRMP3_IS_DIRECTORY -15 +#define DRMP3_DIRECTORY_NOT_EMPTY -16 +#define DRMP3_END_OF_FILE -17 +#define DRMP3_NO_SPACE -18 +#define DRMP3_BUSY -19 +#define DRMP3_IO_ERROR -20 +#define DRMP3_INTERRUPT -21 +#define DRMP3_UNAVAILABLE -22 +#define DRMP3_ALREADY_IN_USE -23 +#define DRMP3_BAD_ADDRESS -24 +#define DRMP3_BAD_SEEK -25 +#define DRMP3_BAD_PIPE -26 +#define DRMP3_DEADLOCK -27 +#define DRMP3_TOO_MANY_LINKS -28 +#define DRMP3_NOT_IMPLEMENTED -29 +#define DRMP3_NO_MESSAGE -30 +#define DRMP3_BAD_MESSAGE -31 +#define DRMP3_NO_DATA_AVAILABLE -32 +#define DRMP3_INVALID_DATA -33 +#define DRMP3_TIMEOUT -34 +#define DRMP3_NO_NETWORK -35 +#define DRMP3_NOT_UNIQUE -36 +#define DRMP3_NOT_SOCKET -37 +#define DRMP3_NO_ADDRESS -38 +#define DRMP3_BAD_PROTOCOL -39 +#define DRMP3_PROTOCOL_UNAVAILABLE -40 +#define DRMP3_PROTOCOL_NOT_SUPPORTED -41 +#define DRMP3_PROTOCOL_FAMILY_NOT_SUPPORTED -42 +#define DRMP3_ADDRESS_FAMILY_NOT_SUPPORTED -43 +#define DRMP3_SOCKET_NOT_SUPPORTED -44 +#define DRMP3_CONNECTION_RESET -45 +#define DRMP3_ALREADY_CONNECTED -46 +#define DRMP3_NOT_CONNECTED -47 +#define DRMP3_CONNECTION_REFUSED -48 +#define DRMP3_NO_HOST -49 +#define DRMP3_IN_PROGRESS -50 +#define DRMP3_CANCELLED -51 +#define DRMP3_MEMORY_ALREADY_MAPPED -52 +#define DRMP3_AT_END -53 +/* End Result Codes */ + +#define DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME 1152 +#define DRMP3_MAX_SAMPLES_PER_FRAME (DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME*2) + +/* Inline */ +#ifdef _MSC_VER + #define DRMP3_INLINE __forceinline +#elif defined(__GNUC__) + /* + I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when + the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some + case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the + command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue + I am using "__inline__" only when we're compiling in strict ANSI mode. + */ + #if defined(__STRICT_ANSI__) + #define DRMP3_GNUC_INLINE_HINT __inline__ + #else + #define DRMP3_GNUC_INLINE_HINT inline + #endif + + #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__) + #define DRMP3_INLINE DRMP3_GNUC_INLINE_HINT __attribute__((always_inline)) + #else + #define DRMP3_INLINE DRMP3_GNUC_INLINE_HINT + #endif +#elif defined(__WATCOMC__) + #define DRMP3_INLINE __inline +#else + #define DRMP3_INLINE +#endif +/* End Inline */ + + +DRMP3_API void drmp3_version(drmp3_uint32* pMajor, drmp3_uint32* pMinor, drmp3_uint32* pRevision); +DRMP3_API const char* drmp3_version_string(void); + + +/* Allocation Callbacks */ +typedef struct +{ + void* pUserData; + void* (* onMalloc)(size_t sz, void* pUserData); + void* (* onRealloc)(void* p, size_t sz, void* pUserData); + void (* onFree)(void* p, void* pUserData); +} drmp3_allocation_callbacks; +/* End Allocation Callbacks */ + + +/* +Low Level Push API +================== +*/ +#define DRMP3_MAX_BITRESERVOIR_BYTES 511 +#define DRMP3_MAX_FREE_FORMAT_FRAME_SIZE 2304 /* more than ISO spec's */ +#define DRMP3_MAX_L3_FRAME_PAYLOAD_BYTES DRMP3_MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */ + +typedef struct +{ + int frame_bytes, channels, sample_rate, layer, bitrate_kbps; +} drmp3dec_frame_info; + +typedef struct +{ + const drmp3_uint8 *buf; + int pos, limit; +} drmp3_bs; + +typedef struct +{ + const drmp3_uint8 *sfbtab; + drmp3_uint16 part_23_length, big_values, scalefac_compress; + drmp3_uint8 global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb; + drmp3_uint8 table_select[3], region_count[3], subblock_gain[3]; + drmp3_uint8 preflag, scalefac_scale, count1_table, scfsi; +} drmp3_L3_gr_info; + +typedef struct +{ + drmp3_bs bs; + drmp3_uint8 maindata[DRMP3_MAX_BITRESERVOIR_BYTES + DRMP3_MAX_L3_FRAME_PAYLOAD_BYTES]; + drmp3_L3_gr_info gr_info[4]; + float grbuf[2][576], scf[40], syn[18 + 15][2*32]; + drmp3_uint8 ist_pos[2][39]; +} drmp3dec_scratch; + +typedef struct +{ + float mdct_overlap[2][9*32], qmf_state[15*2*32]; + int reserv, free_format_bytes; + drmp3_uint8 header[4], reserv_buf[511]; + drmp3dec_scratch scratch; +} drmp3dec; + +/* Initializes a low level decoder. */ +DRMP3_API void drmp3dec_init(drmp3dec *dec); + +/* Reads a frame from a low level decoder. */ +DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int mp3_bytes, void *pcm, drmp3dec_frame_info *info); + +/* Helper for converting between f32 and s16. */ +DRMP3_API void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, size_t num_samples); + + + +/* +Main API (Pull API) +=================== +*/ +typedef enum +{ + DRMP3_SEEK_SET, + DRMP3_SEEK_CUR, + DRMP3_SEEK_END +} drmp3_seek_origin; + +typedef struct +{ + drmp3_uint64 seekPosInBytes; /* Points to the first byte of an MP3 frame. */ + drmp3_uint64 pcmFrameIndex; /* The index of the PCM frame this seek point targets. */ + drmp3_uint16 mp3FramesToDiscard; /* The number of whole MP3 frames to be discarded before pcmFramesToDiscard. */ + drmp3_uint16 pcmFramesToDiscard; /* The number of leading samples to read and discard. These are discarded after mp3FramesToDiscard. */ +} drmp3_seek_point; + +typedef enum +{ + DRMP3_METADATA_TYPE_ID3V1, + DRMP3_METADATA_TYPE_ID3V2, + DRMP3_METADATA_TYPE_APE, + DRMP3_METADATA_TYPE_XING, + DRMP3_METADATA_TYPE_VBRI +} drmp3_metadata_type; + +typedef struct +{ + drmp3_metadata_type type; + const void* pRawData; /* A pointer to the raw data. */ + size_t rawDataSize; +} drmp3_metadata; + + +/* +Callback for when data is read. Return value is the number of bytes actually read. + +pUserData [in] The user data that was passed to drmp3_init(), and family. +pBufferOut [out] The output buffer. +bytesToRead [in] The number of bytes to read. + +Returns the number of bytes actually read. + +A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until +either the entire bytesToRead is filled or you have reached the end of the stream. +*/ +typedef size_t (* drmp3_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead); + +/* +Callback for when data needs to be seeked. + +pUserData [in] The user data that was passed to drmp3_init(), and family. +offset [in] The number of bytes to move, relative to the origin. Can be negative. +origin [in] The origin of the seek. + +Returns whether or not the seek was successful. +*/ +typedef drmp3_bool32 (* drmp3_seek_proc)(void* pUserData, int offset, drmp3_seek_origin origin); + +/* +Callback for retrieving the current cursor position. + +pUserData [in] The user data that was passed to drmp3_init(), and family. +pCursor [out] The cursor position in bytes from the start of the stream. + +Returns whether or not the cursor position was successfully retrieved. +*/ +typedef drmp3_bool32 (* drmp3_tell_proc)(void* pUserData, drmp3_int64* pCursor); + + +/* +Callback for when metadata is read. + +Only the raw data is provided. The client is responsible for parsing the contents of the data themsevles. +*/ +typedef void (* drmp3_meta_proc)(void* pUserData, const drmp3_metadata* pMetadata); + + +typedef struct +{ + drmp3_uint32 channels; + drmp3_uint32 sampleRate; +} drmp3_config; + +typedef struct +{ + drmp3dec decoder; + drmp3_uint32 channels; + drmp3_uint32 sampleRate; + drmp3_read_proc onRead; + drmp3_seek_proc onSeek; + drmp3_meta_proc onMeta; + void* pUserData; + void* pUserDataMeta; + drmp3_allocation_callbacks allocationCallbacks; + drmp3_uint32 mp3FrameChannels; /* The number of channels in the currently loaded MP3 frame. Internal use only. */ + drmp3_uint32 mp3FrameSampleRate; /* The sample rate of the currently loaded MP3 frame. Internal use only. */ + drmp3_uint32 pcmFramesConsumedInMP3Frame; + drmp3_uint32 pcmFramesRemainingInMP3Frame; + drmp3_uint8 pcmFrames[sizeof(float)*DRMP3_MAX_SAMPLES_PER_FRAME]; /* <-- Multipled by sizeof(float) to ensure there's enough room for DR_MP3_FLOAT_OUTPUT. */ + drmp3_uint64 currentPCMFrame; /* The current PCM frame, globally. */ + drmp3_uint64 streamCursor; /* The current byte the decoder is sitting on in the raw stream. */ + drmp3_uint64 streamLength; /* The length of the stream in bytes. dr_mp3 will not read beyond this. If a ID3v1 or APE tag is present, this will be set to the first byte of the tag. */ + drmp3_uint64 streamStartOffset; /* The offset of the start of the MP3 data. This is used for skipping ID3v2 and VBR tags. */ + drmp3_seek_point* pSeekPoints; /* NULL by default. Set with drmp3_bind_seek_table(). Memory is owned by the client. dr_mp3 will never attempt to free this pointer. */ + drmp3_uint32 seekPointCount; /* The number of items in pSeekPoints. When set to 0 assumes to no seek table. Defaults to zero. */ + drmp3_uint32 delayInPCMFrames; + drmp3_uint32 paddingInPCMFrames; + drmp3_uint64 totalPCMFrameCount; /* Set to DRMP3_UINT64_MAX if the length is unknown. Includes delay and padding. */ + drmp3_bool32 isVBR; + drmp3_bool32 isCBR; + size_t dataSize; + size_t dataCapacity; + size_t dataConsumed; + drmp3_uint8* pData; + drmp3_bool32 atEnd; + struct + { + const drmp3_uint8* pData; + size_t dataSize; + size_t currentReadPos; + } memory; /* Only used for decoders that were opened against a block of memory. */ +} drmp3; + +/* +Initializes an MP3 decoder. + +onRead [in] The function to call when data needs to be read from the client. +onSeek [in] The function to call when the read position of the client data needs to move. +onTell [in] The function to call when the read position of the client data needs to be retrieved. +pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek. + +Returns true if successful; false otherwise. + +Close the loader with drmp3_uninit(). + +See also: drmp3_init_file(), drmp3_init_memory(), drmp3_uninit() +*/ +DRMP3_API drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, drmp3_meta_proc onMeta, void* pUserData, const drmp3_allocation_callbacks* pAllocationCallbacks); + +/* +Initializes an MP3 decoder from a block of memory. + +This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for +the lifetime of the drmp3 object. + +The buffer should contain the contents of the entire MP3 file. +*/ +DRMP3_API drmp3_bool32 drmp3_init_memory_with_metadata(drmp3* pMP3, const void* pData, size_t dataSize, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks); +DRMP3_API drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t dataSize, const drmp3_allocation_callbacks* pAllocationCallbacks); + +#ifndef DR_MP3_NO_STDIO +/* +Initializes an MP3 decoder from a file. + +This holds the internal FILE object until drmp3_uninit() is called. Keep this in mind if you're caching drmp3 +objects because the operating system may restrict the number of file handles an application can have open at +any given time. +*/ +DRMP3_API drmp3_bool32 drmp3_init_file_with_metadata(drmp3* pMP3, const char* pFilePath, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks); +DRMP3_API drmp3_bool32 drmp3_init_file_with_metadata_w(drmp3* pMP3, const wchar_t* pFilePath, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks); + +DRMP3_API drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks); +DRMP3_API drmp3_bool32 drmp3_init_file_w(drmp3* pMP3, const wchar_t* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks); +#endif + +/* +Uninitializes an MP3 decoder. +*/ +DRMP3_API void drmp3_uninit(drmp3* pMP3); + +/* +Reads PCM frames as interleaved 32-bit IEEE floating point PCM. + +Note that framesToRead specifies the number of PCM frames to read, _not_ the number of MP3 frames. +*/ +DRMP3_API drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut); + +/* +Reads PCM frames as interleaved signed 16-bit integer PCM. + +Note that framesToRead specifies the number of PCM frames to read, _not_ the number of MP3 frames. +*/ +DRMP3_API drmp3_uint64 drmp3_read_pcm_frames_s16(drmp3* pMP3, drmp3_uint64 framesToRead, drmp3_int16* pBufferOut); + +/* +Seeks to a specific frame. + +Note that this is _not_ an MP3 frame, but rather a PCM frame. +*/ +DRMP3_API drmp3_bool32 drmp3_seek_to_pcm_frame(drmp3* pMP3, drmp3_uint64 frameIndex); + +/* +Calculates the total number of PCM frames in the MP3 stream. Cannot be used for infinite streams such as internet +radio. Runs in linear time. Returns 0 on error. +*/ +DRMP3_API drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3); + +/* +Calculates the total number of MP3 frames in the MP3 stream. Cannot be used for infinite streams such as internet +radio. Runs in linear time. Returns 0 on error. +*/ +DRMP3_API drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3); + +/* +Calculates the total number of MP3 and PCM frames in the MP3 stream. Cannot be used for infinite streams such as internet +radio. Runs in linear time. Returns 0 on error. + +This is equivalent to calling drmp3_get_mp3_frame_count() and drmp3_get_pcm_frame_count() except that it's more efficient. +*/ +DRMP3_API drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint64* pMP3FrameCount, drmp3_uint64* pPCMFrameCount); + +/* +Calculates the seekpoints based on PCM frames. This is slow. + +pSeekpoint count is a pointer to a uint32 containing the seekpoint count. On input it contains the desired count. +On output it contains the actual count. The reason for this design is that the client may request too many +seekpoints, in which case dr_mp3 will return a corrected count. + +Note that seektable seeking is not quite sample exact when the MP3 stream contains inconsistent sample rates. +*/ +DRMP3_API drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pSeekPointCount, drmp3_seek_point* pSeekPoints); + +/* +Binds a seek table to the decoder. + +This does _not_ make a copy of pSeekPoints - it only references it. It is up to the application to ensure this +remains valid while it is bound to the decoder. + +Use drmp3_calculate_seek_points() to calculate the seek points. +*/ +DRMP3_API drmp3_bool32 drmp3_bind_seek_table(drmp3* pMP3, drmp3_uint32 seekPointCount, drmp3_seek_point* pSeekPoints); + + +/* +Opens an decodes an entire MP3 stream as a single operation. + +On output pConfig will receive the channel count and sample rate of the stream. + +Free the returned pointer with drmp3_free(). +*/ +DRMP3_API float* drmp3_open_and_read_pcm_frames_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); +DRMP3_API drmp3_int16* drmp3_open_and_read_pcm_frames_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); + +DRMP3_API float* drmp3_open_memory_and_read_pcm_frames_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); +DRMP3_API drmp3_int16* drmp3_open_memory_and_read_pcm_frames_s16(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); + +#ifndef DR_MP3_NO_STDIO +DRMP3_API float* drmp3_open_file_and_read_pcm_frames_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); +DRMP3_API drmp3_int16* drmp3_open_file_and_read_pcm_frames_s16(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); +#endif + +/* +Allocates a block of memory on the heap. +*/ +DRMP3_API void* drmp3_malloc(size_t sz, const drmp3_allocation_callbacks* pAllocationCallbacks); + +/* +Frees any memory that was allocated by a public drmp3 API. +*/ +DRMP3_API void drmp3_free(void* p, const drmp3_allocation_callbacks* pAllocationCallbacks); + +#ifdef __cplusplus +} +#endif +#endif /* dr_mp3_h */ + + +/************************************************************************************************************************************************************ + ************************************************************************************************************************************************************ + + IMPLEMENTATION + + ************************************************************************************************************************************************************ + ************************************************************************************************************************************************************/ +#if defined(DR_MP3_IMPLEMENTATION) +#ifndef dr_mp3_c +#define dr_mp3_c + +#include +#include +#include /* For INT_MAX */ + +DRMP3_API void drmp3_version(drmp3_uint32* pMajor, drmp3_uint32* pMinor, drmp3_uint32* pRevision) +{ + if (pMajor) { + *pMajor = DRMP3_VERSION_MAJOR; + } + + if (pMinor) { + *pMinor = DRMP3_VERSION_MINOR; + } + + if (pRevision) { + *pRevision = DRMP3_VERSION_REVISION; + } +} + +DRMP3_API const char* drmp3_version_string(void) +{ + return DRMP3_VERSION_STRING; +} + +/* Disable SIMD when compiling with TCC for now. */ +#if defined(__TINYC__) +#define DR_MP3_NO_SIMD +#endif + +#define DRMP3_OFFSET_PTR(p, offset) ((void*)((drmp3_uint8*)(p) + (offset))) + +#ifndef DRMP3_MAX_FRAME_SYNC_MATCHES +#define DRMP3_MAX_FRAME_SYNC_MATCHES 10 +#endif + +#define DRMP3_SHORT_BLOCK_TYPE 2 +#define DRMP3_STOP_BLOCK_TYPE 3 +#define DRMP3_MODE_MONO 3 +#define DRMP3_MODE_JOINT_STEREO 1 +#define DRMP3_HDR_SIZE 4 +#define DRMP3_HDR_IS_MONO(h) (((h[3]) & 0xC0) == 0xC0) +#define DRMP3_HDR_IS_MS_STEREO(h) (((h[3]) & 0xE0) == 0x60) +#define DRMP3_HDR_IS_FREE_FORMAT(h) (((h[2]) & 0xF0) == 0) +#define DRMP3_HDR_IS_CRC(h) (!((h[1]) & 1)) +#define DRMP3_HDR_TEST_PADDING(h) ((h[2]) & 0x2) +#define DRMP3_HDR_TEST_MPEG1(h) ((h[1]) & 0x8) +#define DRMP3_HDR_TEST_NOT_MPEG25(h) ((h[1]) & 0x10) +#define DRMP3_HDR_TEST_I_STEREO(h) ((h[3]) & 0x10) +#define DRMP3_HDR_TEST_MS_STEREO(h) ((h[3]) & 0x20) +#define DRMP3_HDR_GET_STEREO_MODE(h) (((h[3]) >> 6) & 3) +#define DRMP3_HDR_GET_STEREO_MODE_EXT(h) (((h[3]) >> 4) & 3) +#define DRMP3_HDR_GET_LAYER(h) (((h[1]) >> 1) & 3) +#define DRMP3_HDR_GET_BITRATE(h) ((h[2]) >> 4) +#define DRMP3_HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3) +#define DRMP3_HDR_GET_MY_SAMPLE_RATE(h) (DRMP3_HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3) +#define DRMP3_HDR_IS_FRAME_576(h) ((h[1] & 14) == 2) +#define DRMP3_HDR_IS_LAYER_1(h) ((h[1] & 6) == 6) + +#define DRMP3_BITS_DEQUANTIZER_OUT -1 +#define DRMP3_MAX_SCF (255 + DRMP3_BITS_DEQUANTIZER_OUT*4 - 210) +#define DRMP3_MAX_SCFI ((DRMP3_MAX_SCF + 3) & ~3) + +#define DRMP3_MIN(a, b) ((a) > (b) ? (b) : (a)) +#define DRMP3_MAX(a, b) ((a) < (b) ? (b) : (a)) + +#if !defined(DR_MP3_NO_SIMD) + +#if !defined(DR_MP3_ONLY_SIMD) && (defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) +/* x64 always have SSE2, arm64 always have neon, no need for generic code */ +#define DR_MP3_ONLY_SIMD +#endif + +#if ((defined(_MSC_VER) && _MSC_VER >= 1400) && defined(_M_X64)) || ((defined(__i386) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)) && ((defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__))) +#if defined(_MSC_VER) +#include +#endif +#include +#define DRMP3_HAVE_SSE 1 +#define DRMP3_HAVE_SIMD 1 +#define DRMP3_VSTORE _mm_storeu_ps +#define DRMP3_VLD _mm_loadu_ps +#define DRMP3_VSET _mm_set1_ps +#define DRMP3_VADD _mm_add_ps +#define DRMP3_VSUB _mm_sub_ps +#define DRMP3_VMUL _mm_mul_ps +#define DRMP3_VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y)) +#define DRMP3_VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y)) +#define DRMP3_VMUL_S(x, s) _mm_mul_ps(x, _mm_set1_ps(s)) +#define DRMP3_VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3)) +typedef __m128 drmp3_f4; +#if (defined(_MSC_VER) || defined(DR_MP3_ONLY_SIMD)) && !defined(__clang__) +#define drmp3_cpuid __cpuid +#else +static __inline__ __attribute__((always_inline)) void drmp3_cpuid(int CPUInfo[], const int InfoType) +{ +#if defined(__PIC__) + __asm__ __volatile__( +#if defined(__x86_64__) + "push %%rbx\n" + "cpuid\n" + "xchgl %%ebx, %1\n" + "pop %%rbx\n" +#else + "xchgl %%ebx, %1\n" + "cpuid\n" + "xchgl %%ebx, %1\n" +#endif + : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) + : "a" (InfoType)); +#else + __asm__ __volatile__( + "cpuid" + : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) + : "a" (InfoType)); +#endif +} +#endif +static int drmp3_have_simd(void) +{ +#ifdef DR_MP3_ONLY_SIMD + return 1; +#else + static int g_have_simd; + int CPUInfo[4]; +#ifdef MINIMP3_TEST + static int g_counter; + if (g_counter++ > 100) + return 0; +#endif + if (g_have_simd) + goto end; + drmp3_cpuid(CPUInfo, 0); + if (CPUInfo[0] > 0) + { + drmp3_cpuid(CPUInfo, 1); + g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */ + return g_have_simd - 1; + } + +end: + return g_have_simd - 1; +#endif +} +#elif defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +#include +#define DRMP3_HAVE_SSE 0 +#define DRMP3_HAVE_SIMD 1 +#define DRMP3_VSTORE vst1q_f32 +#define DRMP3_VLD vld1q_f32 +#define DRMP3_VSET vmovq_n_f32 +#define DRMP3_VADD vaddq_f32 +#define DRMP3_VSUB vsubq_f32 +#define DRMP3_VMUL vmulq_f32 +#define DRMP3_VMAC(a, x, y) vmlaq_f32(a, x, y) +#define DRMP3_VMSB(a, x, y) vmlsq_f32(a, x, y) +#define DRMP3_VMUL_S(x, s) vmulq_f32(x, vmovq_n_f32(s)) +#define DRMP3_VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x))) +typedef float32x4_t drmp3_f4; +static int drmp3_have_simd(void) +{ /* TODO: detect neon for !DR_MP3_ONLY_SIMD */ + return 1; +} +#else +#define DRMP3_HAVE_SSE 0 +#define DRMP3_HAVE_SIMD 0 +#ifdef DR_MP3_ONLY_SIMD +#error DR_MP3_ONLY_SIMD used, but SSE/NEON not enabled +#endif +#endif + +#else + +#define DRMP3_HAVE_SIMD 0 + +#endif + +#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64) && !defined(_M_ARM64EC) && !defined(__ARM_ARCH_6M__) +#define DRMP3_HAVE_ARMV6 1 +static __inline__ __attribute__((always_inline)) drmp3_int32 drmp3_clip_int16_arm(drmp3_int32 a) +{ + drmp3_int32 x = 0; + __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a)); + return x; +} +#else +#define DRMP3_HAVE_ARMV6 0 +#endif + + +/* Standard library stuff. */ +#ifndef DRMP3_ASSERT +#include +#define DRMP3_ASSERT(expression) assert(expression) +#endif +#ifndef DRMP3_COPY_MEMORY +#define DRMP3_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz)) +#endif +#ifndef DRMP3_MOVE_MEMORY +#define DRMP3_MOVE_MEMORY(dst, src, sz) memmove((dst), (src), (sz)) +#endif +#ifndef DRMP3_ZERO_MEMORY +#define DRMP3_ZERO_MEMORY(p, sz) memset((p), 0, (sz)) +#endif +#define DRMP3_ZERO_OBJECT(p) DRMP3_ZERO_MEMORY((p), sizeof(*(p))) +#ifndef DRMP3_MALLOC +#define DRMP3_MALLOC(sz) malloc((sz)) +#endif +#ifndef DRMP3_REALLOC +#define DRMP3_REALLOC(p, sz) realloc((p), (sz)) +#endif +#ifndef DRMP3_FREE +#define DRMP3_FREE(p) free((p)) +#endif + + + +typedef struct +{ + float scf[3*64]; + drmp3_uint8 total_bands, stereo_bands, bitalloc[64], scfcod[64]; +} drmp3_L12_scale_info; + +typedef struct +{ + drmp3_uint8 tab_offset, code_tab_width, band_count; +} drmp3_L12_subband_alloc; + +static void drmp3_bs_init(drmp3_bs *bs, const drmp3_uint8 *data, int bytes) +{ + bs->buf = data; + bs->pos = 0; + bs->limit = bytes*8; +} + +static drmp3_uint32 drmp3_bs_get_bits(drmp3_bs *bs, int n) +{ + drmp3_uint32 next, cache = 0, s = bs->pos & 7; + int shl = n + s; + const drmp3_uint8 *p = bs->buf + (bs->pos >> 3); + if ((bs->pos += n) > bs->limit) + return 0; + next = *p++ & (255 >> s); + while ((shl -= 8) > 0) + { + cache |= next << shl; + next = *p++; + } + return cache | (next >> -shl); +} + +static int drmp3_hdr_valid(const drmp3_uint8 *h) +{ + return h[0] == 0xff && + ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) && + (DRMP3_HDR_GET_LAYER(h) != 0) && + (DRMP3_HDR_GET_BITRATE(h) != 15) && + (DRMP3_HDR_GET_SAMPLE_RATE(h) != 3); +} + +static int drmp3_hdr_compare(const drmp3_uint8 *h1, const drmp3_uint8 *h2) +{ + return drmp3_hdr_valid(h2) && + ((h1[1] ^ h2[1]) & 0xFE) == 0 && + ((h1[2] ^ h2[2]) & 0x0C) == 0 && + !(DRMP3_HDR_IS_FREE_FORMAT(h1) ^ DRMP3_HDR_IS_FREE_FORMAT(h2)); +} + +static unsigned drmp3_hdr_bitrate_kbps(const drmp3_uint8 *h) +{ + static const drmp3_uint8 halfrate[2][3][15] = { + { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } }, + { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } }, + }; + return 2*halfrate[!!DRMP3_HDR_TEST_MPEG1(h)][DRMP3_HDR_GET_LAYER(h) - 1][DRMP3_HDR_GET_BITRATE(h)]; +} + +static unsigned drmp3_hdr_sample_rate_hz(const drmp3_uint8 *h) +{ + static const unsigned g_hz[3] = { 44100, 48000, 32000 }; + return g_hz[DRMP3_HDR_GET_SAMPLE_RATE(h)] >> (int)!DRMP3_HDR_TEST_MPEG1(h) >> (int)!DRMP3_HDR_TEST_NOT_MPEG25(h); +} + +static unsigned drmp3_hdr_frame_samples(const drmp3_uint8 *h) +{ + return DRMP3_HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)DRMP3_HDR_IS_FRAME_576(h)); +} + +static int drmp3_hdr_frame_bytes(const drmp3_uint8 *h, int free_format_size) +{ + int frame_bytes = drmp3_hdr_frame_samples(h)*drmp3_hdr_bitrate_kbps(h)*125/drmp3_hdr_sample_rate_hz(h); + if (DRMP3_HDR_IS_LAYER_1(h)) + { + frame_bytes &= ~3; /* slot align */ + } + return frame_bytes ? frame_bytes : free_format_size; +} + +static int drmp3_hdr_padding(const drmp3_uint8 *h) +{ + return DRMP3_HDR_TEST_PADDING(h) ? (DRMP3_HDR_IS_LAYER_1(h) ? 4 : 1) : 0; +} + +#ifndef DR_MP3_ONLY_MP3 +static const drmp3_L12_subband_alloc *drmp3_L12_subband_alloc_table(const drmp3_uint8 *hdr, drmp3_L12_scale_info *sci) +{ + const drmp3_L12_subband_alloc *alloc; + int mode = DRMP3_HDR_GET_STEREO_MODE(hdr); + int nbands, stereo_bands = (mode == DRMP3_MODE_MONO) ? 0 : (mode == DRMP3_MODE_JOINT_STEREO) ? (DRMP3_HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32; + + if (DRMP3_HDR_IS_LAYER_1(hdr)) + { + static const drmp3_L12_subband_alloc g_alloc_L1[] = { { 76, 4, 32 } }; + alloc = g_alloc_L1; + nbands = 32; + } else if (!DRMP3_HDR_TEST_MPEG1(hdr)) + { + static const drmp3_L12_subband_alloc g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } }; + alloc = g_alloc_L2M2; + nbands = 30; + } else + { + static const drmp3_L12_subband_alloc g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } }; + int sample_rate_idx = DRMP3_HDR_GET_SAMPLE_RATE(hdr); + unsigned kbps = drmp3_hdr_bitrate_kbps(hdr) >> (int)(mode != DRMP3_MODE_MONO); + if (!kbps) /* free-format */ + { + kbps = 192; + } + + alloc = g_alloc_L2M1; + nbands = 27; + if (kbps < 56) + { + static const drmp3_L12_subband_alloc g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } }; + alloc = g_alloc_L2M1_lowrate; + nbands = sample_rate_idx == 2 ? 12 : 8; + } else if (kbps >= 96 && sample_rate_idx != 1) + { + nbands = 30; + } + } + + sci->total_bands = (drmp3_uint8)nbands; + sci->stereo_bands = (drmp3_uint8)DRMP3_MIN(stereo_bands, nbands); + + return alloc; +} + +static void drmp3_L12_read_scalefactors(drmp3_bs *bs, drmp3_uint8 *pba, drmp3_uint8 *scfcod, int bands, float *scf) +{ + static const float g_deq_L12[18*3] = { +#define DRMP3_DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x + DRMP3_DQ(3),DRMP3_DQ(7),DRMP3_DQ(15),DRMP3_DQ(31),DRMP3_DQ(63),DRMP3_DQ(127),DRMP3_DQ(255),DRMP3_DQ(511),DRMP3_DQ(1023),DRMP3_DQ(2047),DRMP3_DQ(4095),DRMP3_DQ(8191),DRMP3_DQ(16383),DRMP3_DQ(32767),DRMP3_DQ(65535),DRMP3_DQ(3),DRMP3_DQ(5),DRMP3_DQ(9) + }; + int i, m; + for (i = 0; i < bands; i++) + { + float s = 0; + int ba = *pba++; + int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0; + for (m = 4; m; m >>= 1) + { + if (mask & m) + { + int b = drmp3_bs_get_bits(bs, 6); + s = g_deq_L12[ba*3 - 6 + b % 3]*(int)(1 << 21 >> b/3); + } + *scf++ = s; + } + } +} + +static void drmp3_L12_read_scale_info(const drmp3_uint8 *hdr, drmp3_bs *bs, drmp3_L12_scale_info *sci) +{ + static const drmp3_uint8 g_bitalloc_code_tab[] = { + 0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16, + 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16, + 0,17,18, 3,19,4,5,16, + 0,17,18,16, + 0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15, + 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14, + 0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16 + }; + const drmp3_L12_subband_alloc *subband_alloc = drmp3_L12_subband_alloc_table(hdr, sci); + + int i, k = 0, ba_bits = 0; + const drmp3_uint8 *ba_code_tab = g_bitalloc_code_tab; + + for (i = 0; i < sci->total_bands; i++) + { + drmp3_uint8 ba; + if (i == k) + { + k += subband_alloc->band_count; + ba_bits = subband_alloc->code_tab_width; + ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset; + subband_alloc++; + } + ba = ba_code_tab[drmp3_bs_get_bits(bs, ba_bits)]; + sci->bitalloc[2*i] = ba; + if (i < sci->stereo_bands) + { + ba = ba_code_tab[drmp3_bs_get_bits(bs, ba_bits)]; + } + sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0; + } + + for (i = 0; i < 2*sci->total_bands; i++) + { + sci->scfcod[i] = (drmp3_uint8)(sci->bitalloc[i] ? DRMP3_HDR_IS_LAYER_1(hdr) ? 2 : drmp3_bs_get_bits(bs, 2) : 6); + } + + drmp3_L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf); + + for (i = sci->stereo_bands; i < sci->total_bands; i++) + { + sci->bitalloc[2*i + 1] = 0; + } +} + +static int drmp3_L12_dequantize_granule(float *grbuf, drmp3_bs *bs, drmp3_L12_scale_info *sci, int group_size) +{ + int i, j, k, choff = 576; + for (j = 0; j < 4; j++) + { + float *dst = grbuf + group_size*j; + for (i = 0; i < 2*sci->total_bands; i++) + { + int ba = sci->bitalloc[i]; + if (ba != 0) + { + if (ba < 17) + { + int half = (1 << (ba - 1)) - 1; + for (k = 0; k < group_size; k++) + { + dst[k] = (float)((int)drmp3_bs_get_bits(bs, ba) - half); + } + } else + { + unsigned mod = (2 << (ba - 17)) + 1; /* 3, 5, 9 */ + unsigned code = drmp3_bs_get_bits(bs, mod + 2 - (mod >> 3)); /* 5, 7, 10 */ + for (k = 0; k < group_size; k++, code /= mod) + { + dst[k] = (float)((int)(code % mod - mod/2)); + } + } + } + dst += choff; + choff = 18 - choff; + } + } + return group_size*4; +} + +static void drmp3_L12_apply_scf_384(drmp3_L12_scale_info *sci, const float *scf, float *dst) +{ + int i, k; + DRMP3_COPY_MEMORY(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float)); + for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6) + { + for (k = 0; k < 12; k++) + { + dst[k + 0] *= scf[0]; + dst[k + 576] *= scf[3]; + } + } +} +#endif + +static int drmp3_L3_read_side_info(drmp3_bs *bs, drmp3_L3_gr_info *gr, const drmp3_uint8 *hdr) +{ + static const drmp3_uint8 g_scf_long[8][23] = { + { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 }, + { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 }, + { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 }, + { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 }, + { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 } + }; + static const drmp3_uint8 g_scf_short[8][40] = { + { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 }, + { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 }, + { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 }, + { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 }, + { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 }, + { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 } + }; + static const drmp3_uint8 g_scf_mixed[8][40] = { + { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 }, + { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 }, + { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 }, + { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 }, + { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 }, + { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 } + }; + + unsigned tables, scfsi = 0; + int main_data_begin, part_23_sum = 0; + int gr_count = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2; + int sr_idx = DRMP3_HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0); + + if (DRMP3_HDR_TEST_MPEG1(hdr)) + { + gr_count *= 2; + main_data_begin = drmp3_bs_get_bits(bs, 9); + scfsi = drmp3_bs_get_bits(bs, 7 + gr_count); + } else + { + main_data_begin = drmp3_bs_get_bits(bs, 8 + gr_count) >> gr_count; + } + + do + { + if (DRMP3_HDR_IS_MONO(hdr)) + { + scfsi <<= 4; + } + gr->part_23_length = (drmp3_uint16)drmp3_bs_get_bits(bs, 12); + part_23_sum += gr->part_23_length; + gr->big_values = (drmp3_uint16)drmp3_bs_get_bits(bs, 9); + if (gr->big_values > 288) + { + return -1; + } + gr->global_gain = (drmp3_uint8)drmp3_bs_get_bits(bs, 8); + gr->scalefac_compress = (drmp3_uint16)drmp3_bs_get_bits(bs, DRMP3_HDR_TEST_MPEG1(hdr) ? 4 : 9); + gr->sfbtab = g_scf_long[sr_idx]; + gr->n_long_sfb = 22; + gr->n_short_sfb = 0; + if (drmp3_bs_get_bits(bs, 1)) + { + gr->block_type = (drmp3_uint8)drmp3_bs_get_bits(bs, 2); + if (!gr->block_type) + { + return -1; + } + gr->mixed_block_flag = (drmp3_uint8)drmp3_bs_get_bits(bs, 1); + gr->region_count[0] = 7; + gr->region_count[1] = 255; + if (gr->block_type == DRMP3_SHORT_BLOCK_TYPE) + { + scfsi &= 0x0F0F; + if (!gr->mixed_block_flag) + { + gr->region_count[0] = 8; + gr->sfbtab = g_scf_short[sr_idx]; + gr->n_long_sfb = 0; + gr->n_short_sfb = 39; + } else + { + gr->sfbtab = g_scf_mixed[sr_idx]; + gr->n_long_sfb = DRMP3_HDR_TEST_MPEG1(hdr) ? 8 : 6; + gr->n_short_sfb = 30; + } + } + tables = drmp3_bs_get_bits(bs, 10); + tables <<= 5; + gr->subblock_gain[0] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3); + gr->subblock_gain[1] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3); + gr->subblock_gain[2] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3); + } else + { + gr->block_type = 0; + gr->mixed_block_flag = 0; + tables = drmp3_bs_get_bits(bs, 15); + gr->region_count[0] = (drmp3_uint8)drmp3_bs_get_bits(bs, 4); + gr->region_count[1] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3); + gr->region_count[2] = 255; + } + gr->table_select[0] = (drmp3_uint8)(tables >> 10); + gr->table_select[1] = (drmp3_uint8)((tables >> 5) & 31); + gr->table_select[2] = (drmp3_uint8)((tables) & 31); + gr->preflag = (drmp3_uint8)(DRMP3_HDR_TEST_MPEG1(hdr) ? drmp3_bs_get_bits(bs, 1) : (gr->scalefac_compress >= 500)); + gr->scalefac_scale = (drmp3_uint8)drmp3_bs_get_bits(bs, 1); + gr->count1_table = (drmp3_uint8)drmp3_bs_get_bits(bs, 1); + gr->scfsi = (drmp3_uint8)((scfsi >> 12) & 15); + scfsi <<= 4; + gr++; + } while(--gr_count); + + if (part_23_sum + bs->pos > bs->limit + main_data_begin*8) + { + return -1; + } + + return main_data_begin; +} + +static void drmp3_L3_read_scalefactors(drmp3_uint8 *scf, drmp3_uint8 *ist_pos, const drmp3_uint8 *scf_size, const drmp3_uint8 *scf_count, drmp3_bs *bitbuf, int scfsi) +{ + int i, k; + for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2) + { + int cnt = scf_count[i]; + if (scfsi & 8) + { + DRMP3_COPY_MEMORY(scf, ist_pos, cnt); + } else + { + int bits = scf_size[i]; + if (!bits) + { + DRMP3_ZERO_MEMORY(scf, cnt); + DRMP3_ZERO_MEMORY(ist_pos, cnt); + } else + { + int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1; + for (k = 0; k < cnt; k++) + { + int s = drmp3_bs_get_bits(bitbuf, bits); + ist_pos[k] = (drmp3_uint8)(s == max_scf ? -1 : s); + scf[k] = (drmp3_uint8)s; + } + } + } + ist_pos += cnt; + scf += cnt; + } + scf[0] = scf[1] = scf[2] = 0; +} + +static float drmp3_L3_ldexp_q2(float y, int exp_q2) +{ + static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f }; + int e; + do + { + e = DRMP3_MIN(30*4, exp_q2); + y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2)); + } while ((exp_q2 -= e) > 0); + return y; +} + +/* +I've had reports of GCC 14 throwing an incorrect -Wstringop-overflow warning here. This is an attempt +to silence this warning. +*/ +#if (defined(__GNUC__) && (__GNUC__ >= 13)) && !defined(__clang__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif +static void drmp3_L3_decode_scalefactors(const drmp3_uint8 *hdr, drmp3_uint8 *ist_pos, drmp3_bs *bs, const drmp3_L3_gr_info *gr, float *scf, int ch) +{ + static const drmp3_uint8 g_scf_partitions[3][28] = { + { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 }, + { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 }, + { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 } + }; + const drmp3_uint8 *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb]; + drmp3_uint8 scf_size[4], iscf[40]; + int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi; + float gain; + + if (DRMP3_HDR_TEST_MPEG1(hdr)) + { + static const drmp3_uint8 g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 }; + int part = g_scfc_decode[gr->scalefac_compress]; + scf_size[1] = scf_size[0] = (drmp3_uint8)(part >> 2); + scf_size[3] = scf_size[2] = (drmp3_uint8)(part & 3); + } else + { + static const drmp3_uint8 g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 }; + int k, modprod, sfc, ist = DRMP3_HDR_TEST_I_STEREO(hdr) && ch; + sfc = gr->scalefac_compress >> ist; + for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4) + { + for (modprod = 1, i = 3; i >= 0; i--) + { + scf_size[i] = (drmp3_uint8)(sfc / modprod % g_mod[k + i]); + modprod *= g_mod[k + i]; + } + } + scf_partition += k; + scfsi = -16; + } + drmp3_L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi); + + if (gr->n_short_sfb) + { + int sh = 3 - scf_shift; + for (i = 0; i < gr->n_short_sfb; i += 3) + { + iscf[gr->n_long_sfb + i + 0] = (drmp3_uint8)(iscf[gr->n_long_sfb + i + 0] + (gr->subblock_gain[0] << sh)); + iscf[gr->n_long_sfb + i + 1] = (drmp3_uint8)(iscf[gr->n_long_sfb + i + 1] + (gr->subblock_gain[1] << sh)); + iscf[gr->n_long_sfb + i + 2] = (drmp3_uint8)(iscf[gr->n_long_sfb + i + 2] + (gr->subblock_gain[2] << sh)); + } + } else if (gr->preflag) + { + static const drmp3_uint8 g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 }; + for (i = 0; i < 10; i++) + { + iscf[11 + i] = (drmp3_uint8)(iscf[11 + i] + g_preamp[i]); + } + } + + gain_exp = gr->global_gain + DRMP3_BITS_DEQUANTIZER_OUT*4 - 210 - (DRMP3_HDR_IS_MS_STEREO(hdr) ? 2 : 0); + gain = drmp3_L3_ldexp_q2(1 << (DRMP3_MAX_SCFI/4), DRMP3_MAX_SCFI - gain_exp); + for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++) + { + scf[i] = drmp3_L3_ldexp_q2(gain, iscf[i] << scf_shift); + } +} +#if (defined(__GNUC__) && (__GNUC__ >= 13)) && !defined(__clang__) + #pragma GCC diagnostic pop +#endif + +static const float g_drmp3_pow43[129 + 16] = { + 0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f, + 0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f +}; + +static float drmp3_L3_pow_43(int x) +{ + float frac; + int sign, mult = 256; + + if (x < 129) + { + return g_drmp3_pow43[16 + x]; + } + + if (x < 1024) + { + mult = 16; + x <<= 3; + } + + sign = 2*x & 64; + frac = (float)((x & 63) - sign) / ((x & ~63) + sign); + return g_drmp3_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult; +} + +static void drmp3_L3_huffman(float *dst, drmp3_bs *bs, const drmp3_L3_gr_info *gr_info, const float *scf, int layer3gr_limit) +{ + static const drmp3_int16 tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256, + -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288, + -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288, + -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258, + -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259, + -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258, + -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258, + -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259, + -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258, + -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290, + -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259, + -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258, + -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259, + -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258, + -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 }; + static const drmp3_uint8 tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205}; + static const drmp3_uint8 tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 }; + static const drmp3_int16 tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 }; + static const drmp3_uint8 g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 }; + +#define DRMP3_PEEK_BITS(n) (bs_cache >> (32 - (n))) +#define DRMP3_FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); } +#define DRMP3_CHECK_BITS while (bs_sh >= 0) { bs_cache |= (drmp3_uint32)*bs_next_ptr++ << bs_sh; bs_sh -= 8; } +#define DRMP3_BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh) + + float one = 0.0f; + int ireg = 0, big_val_cnt = gr_info->big_values; + const drmp3_uint8 *sfb = gr_info->sfbtab; + const drmp3_uint8 *bs_next_ptr = bs->buf + bs->pos/8; + drmp3_uint32 bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7); + int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8; + bs_next_ptr += 4; + + while (big_val_cnt > 0) + { + int tab_num = gr_info->table_select[ireg]; + int sfb_cnt = gr_info->region_count[ireg++]; + const drmp3_int16 *codebook = tabs + tabindex[tab_num]; + int linbits = g_linbits[tab_num]; + if (linbits) + { + do + { + np = *sfb++ / 2; + pairs_to_decode = DRMP3_MIN(big_val_cnt, np); + one = *scf++; + do + { + int j, w = 5; + int leaf = codebook[DRMP3_PEEK_BITS(w)]; + while (leaf < 0) + { + DRMP3_FLUSH_BITS(w); + w = leaf & 7; + leaf = codebook[DRMP3_PEEK_BITS(w) - (leaf >> 3)]; + } + DRMP3_FLUSH_BITS(leaf >> 8); + + for (j = 0; j < 2; j++, dst++, leaf >>= 4) + { + int lsb = leaf & 0x0F; + if (lsb == 15) + { + lsb += DRMP3_PEEK_BITS(linbits); + DRMP3_FLUSH_BITS(linbits); + DRMP3_CHECK_BITS; + *dst = one*drmp3_L3_pow_43(lsb)*((drmp3_int32)bs_cache < 0 ? -1: 1); + } else + { + *dst = g_drmp3_pow43[16 + lsb - 16*(bs_cache >> 31)]*one; + } + DRMP3_FLUSH_BITS(lsb ? 1 : 0); + } + DRMP3_CHECK_BITS; + } while (--pairs_to_decode); + } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0); + } else + { + do + { + np = *sfb++ / 2; + pairs_to_decode = DRMP3_MIN(big_val_cnt, np); + one = *scf++; + do + { + int j, w = 5; + int leaf = codebook[DRMP3_PEEK_BITS(w)]; + while (leaf < 0) + { + DRMP3_FLUSH_BITS(w); + w = leaf & 7; + leaf = codebook[DRMP3_PEEK_BITS(w) - (leaf >> 3)]; + } + DRMP3_FLUSH_BITS(leaf >> 8); + + for (j = 0; j < 2; j++, dst++, leaf >>= 4) + { + int lsb = leaf & 0x0F; + *dst = g_drmp3_pow43[16 + lsb - 16*(bs_cache >> 31)]*one; + DRMP3_FLUSH_BITS(lsb ? 1 : 0); + } + DRMP3_CHECK_BITS; + } while (--pairs_to_decode); + } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0); + } + } + + for (np = 1 - big_val_cnt;; dst += 4) + { + const drmp3_uint8 *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32; + int leaf = codebook_count1[DRMP3_PEEK_BITS(4)]; + if (!(leaf & 8)) + { + leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))]; + } + DRMP3_FLUSH_BITS(leaf & 7); + if (DRMP3_BSPOS > layer3gr_limit) + { + break; + } +#define DRMP3_RELOAD_SCALEFACTOR if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; } +#define DRMP3_DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((drmp3_int32)bs_cache < 0) ? -one : one; DRMP3_FLUSH_BITS(1) } + DRMP3_RELOAD_SCALEFACTOR; + DRMP3_DEQ_COUNT1(0); + DRMP3_DEQ_COUNT1(1); + DRMP3_RELOAD_SCALEFACTOR; + DRMP3_DEQ_COUNT1(2); + DRMP3_DEQ_COUNT1(3); + DRMP3_CHECK_BITS; + } + + bs->pos = layer3gr_limit; +} + +static void drmp3_L3_midside_stereo(float *left, int n) +{ + int i = 0; + float *right = left + 576; +#if DRMP3_HAVE_SIMD + if (drmp3_have_simd()) + { + for (; i < n - 3; i += 4) + { + drmp3_f4 vl = DRMP3_VLD(left + i); + drmp3_f4 vr = DRMP3_VLD(right + i); + DRMP3_VSTORE(left + i, DRMP3_VADD(vl, vr)); + DRMP3_VSTORE(right + i, DRMP3_VSUB(vl, vr)); + } +#ifdef __GNUC__ + /* Workaround for spurious -Waggressive-loop-optimizations warning from gcc. + * For more info see: https://github.com/lieff/minimp3/issues/88 + */ + if (__builtin_constant_p(n % 4 == 0) && n % 4 == 0) + return; +#endif + } +#endif + for (; i < n; i++) + { + float a = left[i]; + float b = right[i]; + left[i] = a + b; + right[i] = a - b; + } +} + +static void drmp3_L3_intensity_stereo_band(float *left, int n, float kl, float kr) +{ + int i; + for (i = 0; i < n; i++) + { + left[i + 576] = left[i]*kr; + left[i] = left[i]*kl; + } +} + +static void drmp3_L3_stereo_top_band(const float *right, const drmp3_uint8 *sfb, int nbands, int max_band[3]) +{ + int i, k; + + max_band[0] = max_band[1] = max_band[2] = -1; + + for (i = 0; i < nbands; i++) + { + for (k = 0; k < sfb[i]; k += 2) + { + if (right[k] != 0 || right[k + 1] != 0) + { + max_band[i % 3] = i; + break; + } + } + right += sfb[i]; + } +} + +static void drmp3_L3_stereo_process(float *left, const drmp3_uint8 *ist_pos, const drmp3_uint8 *sfb, const drmp3_uint8 *hdr, int max_band[3], int mpeg2_sh) +{ + static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 }; + unsigned i, max_pos = DRMP3_HDR_TEST_MPEG1(hdr) ? 7 : 64; + + for (i = 0; sfb[i]; i++) + { + unsigned ipos = ist_pos[i]; + if ((int)i > max_band[i % 3] && ipos < max_pos) + { + float kl, kr, s = DRMP3_HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1; + if (DRMP3_HDR_TEST_MPEG1(hdr)) + { + kl = g_pan[2*ipos]; + kr = g_pan[2*ipos + 1]; + } else + { + kl = 1; + kr = drmp3_L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh); + if (ipos & 1) + { + kl = kr; + kr = 1; + } + } + drmp3_L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s); + } else if (DRMP3_HDR_TEST_MS_STEREO(hdr)) + { + drmp3_L3_midside_stereo(left, sfb[i]); + } + left += sfb[i]; + } +} + +static void drmp3_L3_intensity_stereo(float *left, drmp3_uint8 *ist_pos, const drmp3_L3_gr_info *gr, const drmp3_uint8 *hdr) +{ + int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb; + int i, max_blocks = gr->n_short_sfb ? 3 : 1; + + drmp3_L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band); + if (gr->n_long_sfb) + { + max_band[0] = max_band[1] = max_band[2] = DRMP3_MAX(DRMP3_MAX(max_band[0], max_band[1]), max_band[2]); + } + for (i = 0; i < max_blocks; i++) + { + int default_pos = DRMP3_HDR_TEST_MPEG1(hdr) ? 3 : 0; + int itop = n_sfb - max_blocks + i; + int prev = itop - max_blocks; + ist_pos[itop] = (drmp3_uint8)(max_band[i] >= prev ? default_pos : ist_pos[prev]); + } + drmp3_L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1); +} + +static void drmp3_L3_reorder(float *grbuf, float *scratch, const drmp3_uint8 *sfb) +{ + int i, len; + float *src = grbuf, *dst = scratch; + + for (;0 != (len = *sfb); sfb += 3, src += 2*len) + { + for (i = 0; i < len; i++, src++) + { + *dst++ = src[0*len]; + *dst++ = src[1*len]; + *dst++ = src[2*len]; + } + } + DRMP3_COPY_MEMORY(grbuf, scratch, (dst - scratch)*sizeof(float)); +} + +static void drmp3_L3_antialias(float *grbuf, int nbands) +{ + static const float g_aa[2][8] = { + {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f}, + {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f} + }; + + for (; nbands > 0; nbands--, grbuf += 18) + { + int i = 0; +#if DRMP3_HAVE_SIMD + if (drmp3_have_simd()) for (; i < 8; i += 4) + { + drmp3_f4 vu = DRMP3_VLD(grbuf + 18 + i); + drmp3_f4 vd = DRMP3_VLD(grbuf + 14 - i); + drmp3_f4 vc0 = DRMP3_VLD(g_aa[0] + i); + drmp3_f4 vc1 = DRMP3_VLD(g_aa[1] + i); + vd = DRMP3_VREV(vd); + DRMP3_VSTORE(grbuf + 18 + i, DRMP3_VSUB(DRMP3_VMUL(vu, vc0), DRMP3_VMUL(vd, vc1))); + vd = DRMP3_VADD(DRMP3_VMUL(vu, vc1), DRMP3_VMUL(vd, vc0)); + DRMP3_VSTORE(grbuf + 14 - i, DRMP3_VREV(vd)); + } +#endif +#ifndef DR_MP3_ONLY_SIMD + for(; i < 8; i++) + { + float u = grbuf[18 + i]; + float d = grbuf[17 - i]; + grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i]; + grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i]; + } +#endif + } +} + +static void drmp3_L3_dct3_9(float *y) +{ + float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4; + + s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8]; + t0 = s0 + s6*0.5f; + s0 -= s6; + t4 = (s4 + s2)*0.93969262f; + t2 = (s8 + s2)*0.76604444f; + s6 = (s4 - s8)*0.17364818f; + s4 += s8 - s2; + + s2 = s0 - s4*0.5f; + y[4] = s4 + s0; + s8 = t0 - t2 + s6; + s0 = t0 - t4 + t2; + s4 = t0 + t4 - s6; + + s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7]; + + s3 *= 0.86602540f; + t0 = (s5 + s1)*0.98480775f; + t4 = (s5 - s7)*0.34202014f; + t2 = (s1 + s7)*0.64278761f; + s1 = (s1 - s5 - s7)*0.86602540f; + + s5 = t0 - s3 - t2; + s7 = t4 - s3 - t0; + s3 = t4 + s3 - t2; + + y[0] = s4 - s7; + y[1] = s2 + s1; + y[2] = s0 - s3; + y[3] = s8 + s5; + y[5] = s8 - s5; + y[6] = s0 + s3; + y[7] = s2 - s1; + y[8] = s4 + s7; +} + +static void drmp3_L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands) +{ + int i, j; + static const float g_twid9[18] = { + 0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f + }; + + for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9) + { + float co[9], si[9]; + co[0] = -grbuf[0]; + si[0] = grbuf[17]; + for (i = 0; i < 4; i++) + { + si[8 - 2*i] = grbuf[4*i + 1] - grbuf[4*i + 2]; + co[1 + 2*i] = grbuf[4*i + 1] + grbuf[4*i + 2]; + si[7 - 2*i] = grbuf[4*i + 4] - grbuf[4*i + 3]; + co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]); + } + drmp3_L3_dct3_9(co); + drmp3_L3_dct3_9(si); + + si[1] = -si[1]; + si[3] = -si[3]; + si[5] = -si[5]; + si[7] = -si[7]; + + i = 0; + +#if DRMP3_HAVE_SIMD + if (drmp3_have_simd()) for (; i < 8; i += 4) + { + drmp3_f4 vovl = DRMP3_VLD(overlap + i); + drmp3_f4 vc = DRMP3_VLD(co + i); + drmp3_f4 vs = DRMP3_VLD(si + i); + drmp3_f4 vr0 = DRMP3_VLD(g_twid9 + i); + drmp3_f4 vr1 = DRMP3_VLD(g_twid9 + 9 + i); + drmp3_f4 vw0 = DRMP3_VLD(window + i); + drmp3_f4 vw1 = DRMP3_VLD(window + 9 + i); + drmp3_f4 vsum = DRMP3_VADD(DRMP3_VMUL(vc, vr1), DRMP3_VMUL(vs, vr0)); + DRMP3_VSTORE(overlap + i, DRMP3_VSUB(DRMP3_VMUL(vc, vr0), DRMP3_VMUL(vs, vr1))); + DRMP3_VSTORE(grbuf + i, DRMP3_VSUB(DRMP3_VMUL(vovl, vw0), DRMP3_VMUL(vsum, vw1))); + vsum = DRMP3_VADD(DRMP3_VMUL(vovl, vw1), DRMP3_VMUL(vsum, vw0)); + DRMP3_VSTORE(grbuf + 14 - i, DRMP3_VREV(vsum)); + } +#endif + for (; i < 9; i++) + { + float ovl = overlap[i]; + float sum = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i]; + overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i]; + grbuf[i] = ovl*window[0 + i] - sum*window[9 + i]; + grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i]; + } + } +} + +static void drmp3_L3_idct3(float x0, float x1, float x2, float *dst) +{ + float m1 = x1*0.86602540f; + float a1 = x0 - x2*0.5f; + dst[1] = x0 + x2; + dst[0] = a1 + m1; + dst[2] = a1 - m1; +} + +static void drmp3_L3_imdct12(float *x, float *dst, float *overlap) +{ + static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f }; + float co[3], si[3]; + int i; + + drmp3_L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co); + drmp3_L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si); + si[1] = -si[1]; + + for (i = 0; i < 3; i++) + { + float ovl = overlap[i]; + float sum = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i]; + overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i]; + dst[i] = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i]; + dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i]; + } +} + +static void drmp3_L3_imdct_short(float *grbuf, float *overlap, int nbands) +{ + for (;nbands > 0; nbands--, overlap += 9, grbuf += 18) + { + float tmp[18]; + DRMP3_COPY_MEMORY(tmp, grbuf, sizeof(tmp)); + DRMP3_COPY_MEMORY(grbuf, overlap, 6*sizeof(float)); + drmp3_L3_imdct12(tmp, grbuf + 6, overlap + 6); + drmp3_L3_imdct12(tmp + 1, grbuf + 12, overlap + 6); + drmp3_L3_imdct12(tmp + 2, overlap, overlap + 6); + } +} + +static void drmp3_L3_change_sign(float *grbuf) +{ + int b, i; + for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36) + for (i = 1; i < 18; i += 2) + grbuf[i] = -grbuf[i]; +} + +static void drmp3_L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands) +{ + static const float g_mdct_window[2][18] = { + { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f }, + { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f } + }; + if (n_long_bands) + { + drmp3_L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands); + grbuf += 18*n_long_bands; + overlap += 9*n_long_bands; + } + if (block_type == DRMP3_SHORT_BLOCK_TYPE) + drmp3_L3_imdct_short(grbuf, overlap, 32 - n_long_bands); + else + drmp3_L3_imdct36(grbuf, overlap, g_mdct_window[block_type == DRMP3_STOP_BLOCK_TYPE], 32 - n_long_bands); +} + +static void drmp3_L3_save_reservoir(drmp3dec *h, drmp3dec_scratch *s) +{ + int pos = (s->bs.pos + 7)/8u; + int remains = s->bs.limit/8u - pos; + if (remains > DRMP3_MAX_BITRESERVOIR_BYTES) + { + pos += remains - DRMP3_MAX_BITRESERVOIR_BYTES; + remains = DRMP3_MAX_BITRESERVOIR_BYTES; + } + if (remains > 0) + { + DRMP3_MOVE_MEMORY(h->reserv_buf, s->maindata + pos, remains); + } + h->reserv = remains; +} + +static int drmp3_L3_restore_reservoir(drmp3dec *h, drmp3_bs *bs, drmp3dec_scratch *s, int main_data_begin) +{ + int frame_bytes = (bs->limit - bs->pos)/8; + int bytes_have = DRMP3_MIN(h->reserv, main_data_begin); + DRMP3_COPY_MEMORY(s->maindata, h->reserv_buf + DRMP3_MAX(0, h->reserv - main_data_begin), DRMP3_MIN(h->reserv, main_data_begin)); + DRMP3_COPY_MEMORY(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes); + drmp3_bs_init(&s->bs, s->maindata, bytes_have + frame_bytes); + return h->reserv >= main_data_begin; +} + +static void drmp3_L3_decode(drmp3dec *h, drmp3dec_scratch *s, drmp3_L3_gr_info *gr_info, int nch) +{ + int ch; + + for (ch = 0; ch < nch; ch++) + { + int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length; + drmp3_L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch); + drmp3_L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit); + } + + if (DRMP3_HDR_TEST_I_STEREO(h->header)) + { + drmp3_L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header); + } else if (DRMP3_HDR_IS_MS_STEREO(h->header)) + { + drmp3_L3_midside_stereo(s->grbuf[0], 576); + } + + for (ch = 0; ch < nch; ch++, gr_info++) + { + int aa_bands = 31; + int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(DRMP3_HDR_GET_MY_SAMPLE_RATE(h->header) == 2); + + if (gr_info->n_short_sfb) + { + aa_bands = n_long_bands - 1; + drmp3_L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb); + } + + drmp3_L3_antialias(s->grbuf[ch], aa_bands); + drmp3_L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands); + drmp3_L3_change_sign(s->grbuf[ch]); + } +} + +static void drmp3d_DCT_II(float *grbuf, int n) +{ + static const float g_sec[24] = { + 10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f + }; + int i, k = 0; +#if DRMP3_HAVE_SIMD + if (drmp3_have_simd()) for (; k < n; k += 4) + { + drmp3_f4 t[4][8], *x; + float *y = grbuf + k; + + for (x = t[0], i = 0; i < 8; i++, x++) + { + drmp3_f4 x0 = DRMP3_VLD(&y[i*18]); + drmp3_f4 x1 = DRMP3_VLD(&y[(15 - i)*18]); + drmp3_f4 x2 = DRMP3_VLD(&y[(16 + i)*18]); + drmp3_f4 x3 = DRMP3_VLD(&y[(31 - i)*18]); + drmp3_f4 t0 = DRMP3_VADD(x0, x3); + drmp3_f4 t1 = DRMP3_VADD(x1, x2); + drmp3_f4 t2 = DRMP3_VMUL_S(DRMP3_VSUB(x1, x2), g_sec[3*i + 0]); + drmp3_f4 t3 = DRMP3_VMUL_S(DRMP3_VSUB(x0, x3), g_sec[3*i + 1]); + x[0] = DRMP3_VADD(t0, t1); + x[8] = DRMP3_VMUL_S(DRMP3_VSUB(t0, t1), g_sec[3*i + 2]); + x[16] = DRMP3_VADD(t3, t2); + x[24] = DRMP3_VMUL_S(DRMP3_VSUB(t3, t2), g_sec[3*i + 2]); + } + for (x = t[0], i = 0; i < 4; i++, x += 8) + { + drmp3_f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt; + xt = DRMP3_VSUB(x0, x7); x0 = DRMP3_VADD(x0, x7); + x7 = DRMP3_VSUB(x1, x6); x1 = DRMP3_VADD(x1, x6); + x6 = DRMP3_VSUB(x2, x5); x2 = DRMP3_VADD(x2, x5); + x5 = DRMP3_VSUB(x3, x4); x3 = DRMP3_VADD(x3, x4); + x4 = DRMP3_VSUB(x0, x3); x0 = DRMP3_VADD(x0, x3); + x3 = DRMP3_VSUB(x1, x2); x1 = DRMP3_VADD(x1, x2); + x[0] = DRMP3_VADD(x0, x1); + x[4] = DRMP3_VMUL_S(DRMP3_VSUB(x0, x1), 0.70710677f); + x5 = DRMP3_VADD(x5, x6); + x6 = DRMP3_VMUL_S(DRMP3_VADD(x6, x7), 0.70710677f); + x7 = DRMP3_VADD(x7, xt); + x3 = DRMP3_VMUL_S(DRMP3_VADD(x3, x4), 0.70710677f); + x5 = DRMP3_VSUB(x5, DRMP3_VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */ + x7 = DRMP3_VADD(x7, DRMP3_VMUL_S(x5, 0.382683432f)); + x5 = DRMP3_VSUB(x5, DRMP3_VMUL_S(x7, 0.198912367f)); + x0 = DRMP3_VSUB(xt, x6); xt = DRMP3_VADD(xt, x6); + x[1] = DRMP3_VMUL_S(DRMP3_VADD(xt, x7), 0.50979561f); + x[2] = DRMP3_VMUL_S(DRMP3_VADD(x4, x3), 0.54119611f); + x[3] = DRMP3_VMUL_S(DRMP3_VSUB(x0, x5), 0.60134488f); + x[5] = DRMP3_VMUL_S(DRMP3_VADD(x0, x5), 0.89997619f); + x[6] = DRMP3_VMUL_S(DRMP3_VSUB(x4, x3), 1.30656302f); + x[7] = DRMP3_VMUL_S(DRMP3_VSUB(xt, x7), 2.56291556f); + } + + if (k > n - 3) + { +#if DRMP3_HAVE_SSE +#define DRMP3_VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v) +#else +#define DRMP3_VSAVE2(i, v) vst1_f32((float32_t *)&y[(i)*18], vget_low_f32(v)) +#endif + for (i = 0; i < 7; i++, y += 4*18) + { + drmp3_f4 s = DRMP3_VADD(t[3][i], t[3][i + 1]); + DRMP3_VSAVE2(0, t[0][i]); + DRMP3_VSAVE2(1, DRMP3_VADD(t[2][i], s)); + DRMP3_VSAVE2(2, DRMP3_VADD(t[1][i], t[1][i + 1])); + DRMP3_VSAVE2(3, DRMP3_VADD(t[2][1 + i], s)); + } + DRMP3_VSAVE2(0, t[0][7]); + DRMP3_VSAVE2(1, DRMP3_VADD(t[2][7], t[3][7])); + DRMP3_VSAVE2(2, t[1][7]); + DRMP3_VSAVE2(3, t[3][7]); + } else + { +#define DRMP3_VSAVE4(i, v) DRMP3_VSTORE(&y[(i)*18], v) + for (i = 0; i < 7; i++, y += 4*18) + { + drmp3_f4 s = DRMP3_VADD(t[3][i], t[3][i + 1]); + DRMP3_VSAVE4(0, t[0][i]); + DRMP3_VSAVE4(1, DRMP3_VADD(t[2][i], s)); + DRMP3_VSAVE4(2, DRMP3_VADD(t[1][i], t[1][i + 1])); + DRMP3_VSAVE4(3, DRMP3_VADD(t[2][1 + i], s)); + } + DRMP3_VSAVE4(0, t[0][7]); + DRMP3_VSAVE4(1, DRMP3_VADD(t[2][7], t[3][7])); + DRMP3_VSAVE4(2, t[1][7]); + DRMP3_VSAVE4(3, t[3][7]); + } + } else +#endif +#ifdef DR_MP3_ONLY_SIMD + {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */ +#else + for (; k < n; k++) + { + float t[4][8], *x, *y = grbuf + k; + + for (x = t[0], i = 0; i < 8; i++, x++) + { + float x0 = y[i*18]; + float x1 = y[(15 - i)*18]; + float x2 = y[(16 + i)*18]; + float x3 = y[(31 - i)*18]; + float t0 = x0 + x3; + float t1 = x1 + x2; + float t2 = (x1 - x2)*g_sec[3*i + 0]; + float t3 = (x0 - x3)*g_sec[3*i + 1]; + x[0] = t0 + t1; + x[8] = (t0 - t1)*g_sec[3*i + 2]; + x[16] = t3 + t2; + x[24] = (t3 - t2)*g_sec[3*i + 2]; + } + for (x = t[0], i = 0; i < 4; i++, x += 8) + { + float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt; + xt = x0 - x7; x0 += x7; + x7 = x1 - x6; x1 += x6; + x6 = x2 - x5; x2 += x5; + x5 = x3 - x4; x3 += x4; + x4 = x0 - x3; x0 += x3; + x3 = x1 - x2; x1 += x2; + x[0] = x0 + x1; + x[4] = (x0 - x1)*0.70710677f; + x5 = x5 + x6; + x6 = (x6 + x7)*0.70710677f; + x7 = x7 + xt; + x3 = (x3 + x4)*0.70710677f; + x5 -= x7*0.198912367f; /* rotate by PI/8 */ + x7 += x5*0.382683432f; + x5 -= x7*0.198912367f; + x0 = xt - x6; xt += x6; + x[1] = (xt + x7)*0.50979561f; + x[2] = (x4 + x3)*0.54119611f; + x[3] = (x0 - x5)*0.60134488f; + x[5] = (x0 + x5)*0.89997619f; + x[6] = (x4 - x3)*1.30656302f; + x[7] = (xt - x7)*2.56291556f; + + } + for (i = 0; i < 7; i++, y += 4*18) + { + y[0*18] = t[0][i]; + y[1*18] = t[2][i] + t[3][i] + t[3][i + 1]; + y[2*18] = t[1][i] + t[1][i + 1]; + y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1]; + } + y[0*18] = t[0][7]; + y[1*18] = t[2][7] + t[3][7]; + y[2*18] = t[1][7]; + y[3*18] = t[3][7]; + } +#endif +} + +#ifndef DR_MP3_FLOAT_OUTPUT +typedef drmp3_int16 drmp3d_sample_t; + +static drmp3_int16 drmp3d_scale_pcm(float sample) +{ + drmp3_int16 s; +#if DRMP3_HAVE_ARMV6 + drmp3_int32 s32 = (drmp3_int32)(sample + .5f); + s32 -= (s32 < 0); + s = (drmp3_int16)drmp3_clip_int16_arm(s32); +#else + if (sample >= 32766.5f) return (drmp3_int16) 32767; + if (sample <= -32767.5f) return (drmp3_int16)-32768; + s = (drmp3_int16)(sample + .5f); + s -= (s < 0); /* away from zero, to be compliant */ +#endif + return s; +} +#else +typedef float drmp3d_sample_t; + +static float drmp3d_scale_pcm(float sample) +{ + return sample*(1.f/32768.f); +} +#endif + +static void drmp3d_synth_pair(drmp3d_sample_t *pcm, int nch, const float *z) +{ + float a; + a = (z[14*64] - z[ 0]) * 29; + a += (z[ 1*64] + z[13*64]) * 213; + a += (z[12*64] - z[ 2*64]) * 459; + a += (z[ 3*64] + z[11*64]) * 2037; + a += (z[10*64] - z[ 4*64]) * 5153; + a += (z[ 5*64] + z[ 9*64]) * 6574; + a += (z[ 8*64] - z[ 6*64]) * 37489; + a += z[ 7*64] * 75038; + pcm[0] = drmp3d_scale_pcm(a); + + z += 2; + a = z[14*64] * 104; + a += z[12*64] * 1567; + a += z[10*64] * 9727; + a += z[ 8*64] * 64019; + a += z[ 6*64] * -9975; + a += z[ 4*64] * -45; + a += z[ 2*64] * 146; + a += z[ 0*64] * -5; + pcm[16*nch] = drmp3d_scale_pcm(a); +} + +static void drmp3d_synth(float *xl, drmp3d_sample_t *dstl, int nch, float *lins) +{ + int i; + float *xr = xl + 576*(nch - 1); + drmp3d_sample_t *dstr = dstl + (nch - 1); + + static const float g_win[] = { + -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992, + -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856, + -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630, + -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313, + -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908, + -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415, + -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835, + -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169, + -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420, + -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590, + -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679, + -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692, + -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629, + -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494, + -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290 + }; + float *zlin = lins + 15*64; + const float *w = g_win; + + zlin[4*15] = xl[18*16]; + zlin[4*15 + 1] = xr[18*16]; + zlin[4*15 + 2] = xl[0]; + zlin[4*15 + 3] = xr[0]; + + zlin[4*31] = xl[1 + 18*16]; + zlin[4*31 + 1] = xr[1 + 18*16]; + zlin[4*31 + 2] = xl[1]; + zlin[4*31 + 3] = xr[1]; + + drmp3d_synth_pair(dstr, nch, lins + 4*15 + 1); + drmp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1); + drmp3d_synth_pair(dstl, nch, lins + 4*15); + drmp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64); + +#if DRMP3_HAVE_SIMD + if (drmp3_have_simd()) for (i = 14; i >= 0; i--) + { +#define DRMP3_VLOAD(k) drmp3_f4 w0 = DRMP3_VSET(*w++); drmp3_f4 w1 = DRMP3_VSET(*w++); drmp3_f4 vz = DRMP3_VLD(&zlin[4*i - 64*k]); drmp3_f4 vy = DRMP3_VLD(&zlin[4*i - 64*(15 - k)]); +#define DRMP3_V0(k) { DRMP3_VLOAD(k) b = DRMP3_VADD(DRMP3_VMUL(vz, w1), DRMP3_VMUL(vy, w0)) ; a = DRMP3_VSUB(DRMP3_VMUL(vz, w0), DRMP3_VMUL(vy, w1)); } +#define DRMP3_V1(k) { DRMP3_VLOAD(k) b = DRMP3_VADD(b, DRMP3_VADD(DRMP3_VMUL(vz, w1), DRMP3_VMUL(vy, w0))); a = DRMP3_VADD(a, DRMP3_VSUB(DRMP3_VMUL(vz, w0), DRMP3_VMUL(vy, w1))); } +#define DRMP3_V2(k) { DRMP3_VLOAD(k) b = DRMP3_VADD(b, DRMP3_VADD(DRMP3_VMUL(vz, w1), DRMP3_VMUL(vy, w0))); a = DRMP3_VADD(a, DRMP3_VSUB(DRMP3_VMUL(vy, w1), DRMP3_VMUL(vz, w0))); } + drmp3_f4 a, b; + zlin[4*i] = xl[18*(31 - i)]; + zlin[4*i + 1] = xr[18*(31 - i)]; + zlin[4*i + 2] = xl[1 + 18*(31 - i)]; + zlin[4*i + 3] = xr[1 + 18*(31 - i)]; + zlin[4*i + 64] = xl[1 + 18*(1 + i)]; + zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)]; + zlin[4*i - 64 + 2] = xl[18*(1 + i)]; + zlin[4*i - 64 + 3] = xr[18*(1 + i)]; + + DRMP3_V0(0) DRMP3_V2(1) DRMP3_V1(2) DRMP3_V2(3) DRMP3_V1(4) DRMP3_V2(5) DRMP3_V1(6) DRMP3_V2(7) + + { +#ifndef DR_MP3_FLOAT_OUTPUT +#if DRMP3_HAVE_SSE + static const drmp3_f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f }; + static const drmp3_f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f }; + __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)), + _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min))); + dstr[(15 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 1); + dstr[(17 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 5); + dstl[(15 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 0); + dstl[(17 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 4); + dstr[(47 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 3); + dstr[(49 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 7); + dstl[(47 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 2); + dstl[(49 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 6); +#else + int16x4_t pcma, pcmb; + a = DRMP3_VADD(a, DRMP3_VSET(0.5f)); + b = DRMP3_VADD(b, DRMP3_VSET(0.5f)); + pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, DRMP3_VSET(0))))); + pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, DRMP3_VSET(0))))); + vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1); + vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1); + vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0); + vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0); + vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3); + vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3); + vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2); + vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2); +#endif +#else + #if DRMP3_HAVE_SSE + static const drmp3_f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f }; + #else + const drmp3_f4 g_scale = vdupq_n_f32(1.0f/32768.0f); + #endif + a = DRMP3_VMUL(a, g_scale); + b = DRMP3_VMUL(b, g_scale); +#if DRMP3_HAVE_SSE + _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0))); + _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0))); + _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3))); + _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3))); + _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2))); + _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2))); +#else + vst1q_lane_f32(dstr + (15 - i)*nch, a, 1); + vst1q_lane_f32(dstr + (17 + i)*nch, b, 1); + vst1q_lane_f32(dstl + (15 - i)*nch, a, 0); + vst1q_lane_f32(dstl + (17 + i)*nch, b, 0); + vst1q_lane_f32(dstr + (47 - i)*nch, a, 3); + vst1q_lane_f32(dstr + (49 + i)*nch, b, 3); + vst1q_lane_f32(dstl + (47 - i)*nch, a, 2); + vst1q_lane_f32(dstl + (49 + i)*nch, b, 2); +#endif +#endif /* DR_MP3_FLOAT_OUTPUT */ + } + } else +#endif +#ifdef DR_MP3_ONLY_SIMD + {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */ +#else + for (i = 14; i >= 0; i--) + { +#define DRMP3_LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64]; +#define DRMP3_S0(k) { int j; DRMP3_LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j]*w1 + vy[j]*w0, a[j] = vz[j]*w0 - vy[j]*w1; } +#define DRMP3_S1(k) { int j; DRMP3_LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; } +#define DRMP3_S2(k) { int j; DRMP3_LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; } + float a[4], b[4]; + + zlin[4*i] = xl[18*(31 - i)]; + zlin[4*i + 1] = xr[18*(31 - i)]; + zlin[4*i + 2] = xl[1 + 18*(31 - i)]; + zlin[4*i + 3] = xr[1 + 18*(31 - i)]; + zlin[4*(i + 16)] = xl[1 + 18*(1 + i)]; + zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)]; + zlin[4*(i - 16) + 2] = xl[18*(1 + i)]; + zlin[4*(i - 16) + 3] = xr[18*(1 + i)]; + + DRMP3_S0(0) DRMP3_S2(1) DRMP3_S1(2) DRMP3_S2(3) DRMP3_S1(4) DRMP3_S2(5) DRMP3_S1(6) DRMP3_S2(7) + + dstr[(15 - i)*nch] = drmp3d_scale_pcm(a[1]); + dstr[(17 + i)*nch] = drmp3d_scale_pcm(b[1]); + dstl[(15 - i)*nch] = drmp3d_scale_pcm(a[0]); + dstl[(17 + i)*nch] = drmp3d_scale_pcm(b[0]); + dstr[(47 - i)*nch] = drmp3d_scale_pcm(a[3]); + dstr[(49 + i)*nch] = drmp3d_scale_pcm(b[3]); + dstl[(47 - i)*nch] = drmp3d_scale_pcm(a[2]); + dstl[(49 + i)*nch] = drmp3d_scale_pcm(b[2]); + } +#endif +} + +static void drmp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, drmp3d_sample_t *pcm, float *lins) +{ + int i; + for (i = 0; i < nch; i++) + { + drmp3d_DCT_II(grbuf + 576*i, nbands); + } + + DRMP3_COPY_MEMORY(lins, qmf_state, sizeof(float)*15*64); + + for (i = 0; i < nbands; i += 2) + { + drmp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64); + } +#ifndef DR_MP3_NONSTANDARD_BUT_LOGICAL + if (nch == 1) + { + for (i = 0; i < 15*64; i += 2) + { + qmf_state[i] = lins[nbands*64 + i]; + } + } else +#endif + { + DRMP3_COPY_MEMORY(qmf_state, lins + nbands*64, sizeof(float)*15*64); + } +} + +static int drmp3d_match_frame(const drmp3_uint8 *hdr, int mp3_bytes, int frame_bytes) +{ + int i, nmatch; + for (i = 0, nmatch = 0; nmatch < DRMP3_MAX_FRAME_SYNC_MATCHES; nmatch++) + { + i += drmp3_hdr_frame_bytes(hdr + i, frame_bytes) + drmp3_hdr_padding(hdr + i); + if (i + DRMP3_HDR_SIZE > mp3_bytes) + return nmatch > 0; + if (!drmp3_hdr_compare(hdr, hdr + i)) + return 0; + } + return 1; +} + +static int drmp3d_find_frame(const drmp3_uint8 *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes) +{ + int i, k; + for (i = 0; i < mp3_bytes - DRMP3_HDR_SIZE; i++, mp3++) + { + if (drmp3_hdr_valid(mp3)) + { + int frame_bytes = drmp3_hdr_frame_bytes(mp3, *free_format_bytes); + int frame_and_padding = frame_bytes + drmp3_hdr_padding(mp3); + + for (k = DRMP3_HDR_SIZE; !frame_bytes && k < DRMP3_MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - DRMP3_HDR_SIZE; k++) + { + if (drmp3_hdr_compare(mp3, mp3 + k)) + { + int fb = k - drmp3_hdr_padding(mp3); + int nextfb = fb + drmp3_hdr_padding(mp3 + k); + if (i + k + nextfb + DRMP3_HDR_SIZE > mp3_bytes || !drmp3_hdr_compare(mp3, mp3 + k + nextfb)) + continue; + frame_and_padding = k; + frame_bytes = fb; + *free_format_bytes = fb; + } + } + + if ((frame_bytes && i + frame_and_padding <= mp3_bytes && + drmp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) || + (!i && frame_and_padding == mp3_bytes)) + { + *ptr_frame_bytes = frame_and_padding; + return i; + } + *free_format_bytes = 0; + } + } + *ptr_frame_bytes = 0; + return mp3_bytes; +} + +DRMP3_API void drmp3dec_init(drmp3dec *dec) +{ + dec->header[0] = 0; +} + +DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int mp3_bytes, void *pcm, drmp3dec_frame_info *info) +{ + int i = 0, igr, frame_size = 0, success = 1; + const drmp3_uint8 *hdr; + drmp3_bs bs_frame[1]; + + if (mp3_bytes > 4 && dec->header[0] == 0xff && drmp3_hdr_compare(dec->header, mp3)) + { + frame_size = drmp3_hdr_frame_bytes(mp3, dec->free_format_bytes) + drmp3_hdr_padding(mp3); + if (frame_size != mp3_bytes && (frame_size + DRMP3_HDR_SIZE > mp3_bytes || !drmp3_hdr_compare(mp3, mp3 + frame_size))) + { + frame_size = 0; + } + } + if (!frame_size) + { + DRMP3_ZERO_MEMORY(dec, sizeof(drmp3dec)); + i = drmp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size); + if (!frame_size || i + frame_size > mp3_bytes) + { + info->frame_bytes = i; + return 0; + } + } + + hdr = mp3 + i; + DRMP3_COPY_MEMORY(dec->header, hdr, DRMP3_HDR_SIZE); + info->frame_bytes = i + frame_size; + info->channels = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2; + info->sample_rate = drmp3_hdr_sample_rate_hz(hdr); + info->layer = 4 - DRMP3_HDR_GET_LAYER(hdr); + info->bitrate_kbps = drmp3_hdr_bitrate_kbps(hdr); + + drmp3_bs_init(bs_frame, hdr + DRMP3_HDR_SIZE, frame_size - DRMP3_HDR_SIZE); + if (DRMP3_HDR_IS_CRC(hdr)) + { + drmp3_bs_get_bits(bs_frame, 16); + } + + if (info->layer == 3) + { + int main_data_begin = drmp3_L3_read_side_info(bs_frame, dec->scratch.gr_info, hdr); + if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit) + { + drmp3dec_init(dec); + return 0; + } + success = drmp3_L3_restore_reservoir(dec, bs_frame, &dec->scratch, main_data_begin); + if (success && pcm != NULL) + { + for (igr = 0; igr < (DRMP3_HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*576*info->channels)) + { + DRMP3_ZERO_MEMORY(dec->scratch.grbuf[0], 576*2*sizeof(float)); + drmp3_L3_decode(dec, &dec->scratch, dec->scratch.gr_info + igr*info->channels, info->channels); + drmp3d_synth_granule(dec->qmf_state, dec->scratch.grbuf[0], 18, info->channels, (drmp3d_sample_t*)pcm, dec->scratch.syn[0]); + } + } + drmp3_L3_save_reservoir(dec, &dec->scratch); + } else + { +#ifdef DR_MP3_ONLY_MP3 + return 0; +#else + drmp3_L12_scale_info sci[1]; + + if (pcm == NULL) { + return drmp3_hdr_frame_samples(hdr); + } + + drmp3_L12_read_scale_info(hdr, bs_frame, sci); + + DRMP3_ZERO_MEMORY(dec->scratch.grbuf[0], 576*2*sizeof(float)); + for (i = 0, igr = 0; igr < 3; igr++) + { + if (12 == (i += drmp3_L12_dequantize_granule(dec->scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1))) + { + i = 0; + drmp3_L12_apply_scf_384(sci, sci->scf + igr, dec->scratch.grbuf[0]); + drmp3d_synth_granule(dec->qmf_state, dec->scratch.grbuf[0], 12, info->channels, (drmp3d_sample_t*)pcm, dec->scratch.syn[0]); + DRMP3_ZERO_MEMORY(dec->scratch.grbuf[0], 576*2*sizeof(float)); + pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*384*info->channels); + } + if (bs_frame->pos > bs_frame->limit) + { + drmp3dec_init(dec); + return 0; + } + } +#endif + } + + return success*drmp3_hdr_frame_samples(dec->header); +} + +DRMP3_API void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, size_t num_samples) +{ + size_t i = 0; +#if DRMP3_HAVE_SIMD + size_t aligned_count = num_samples & ~7; + for(; i < aligned_count; i+=8) + { + drmp3_f4 scale = DRMP3_VSET(32768.0f); + drmp3_f4 a = DRMP3_VMUL(DRMP3_VLD(&in[i ]), scale); + drmp3_f4 b = DRMP3_VMUL(DRMP3_VLD(&in[i+4]), scale); +#if DRMP3_HAVE_SSE + drmp3_f4 s16max = DRMP3_VSET( 32767.0f); + drmp3_f4 s16min = DRMP3_VSET(-32768.0f); + __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, s16max), s16min)), + _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, s16max), s16min))); + out[i ] = (drmp3_int16)_mm_extract_epi16(pcm8, 0); + out[i+1] = (drmp3_int16)_mm_extract_epi16(pcm8, 1); + out[i+2] = (drmp3_int16)_mm_extract_epi16(pcm8, 2); + out[i+3] = (drmp3_int16)_mm_extract_epi16(pcm8, 3); + out[i+4] = (drmp3_int16)_mm_extract_epi16(pcm8, 4); + out[i+5] = (drmp3_int16)_mm_extract_epi16(pcm8, 5); + out[i+6] = (drmp3_int16)_mm_extract_epi16(pcm8, 6); + out[i+7] = (drmp3_int16)_mm_extract_epi16(pcm8, 7); +#else + int16x4_t pcma, pcmb; + a = DRMP3_VADD(a, DRMP3_VSET(0.5f)); + b = DRMP3_VADD(b, DRMP3_VSET(0.5f)); + pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, DRMP3_VSET(0))))); + pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, DRMP3_VSET(0))))); + vst1_lane_s16(out+i , pcma, 0); + vst1_lane_s16(out+i+1, pcma, 1); + vst1_lane_s16(out+i+2, pcma, 2); + vst1_lane_s16(out+i+3, pcma, 3); + vst1_lane_s16(out+i+4, pcmb, 0); + vst1_lane_s16(out+i+5, pcmb, 1); + vst1_lane_s16(out+i+6, pcmb, 2); + vst1_lane_s16(out+i+7, pcmb, 3); +#endif + } +#endif + for(; i < num_samples; i++) + { + float sample = in[i] * 32768.0f; + if (sample >= 32766.5f) + out[i] = (drmp3_int16) 32767; + else if (sample <= -32767.5f) + out[i] = (drmp3_int16)-32768; + else + { + short s = (drmp3_int16)(sample + .5f); + s -= (s < 0); /* away from zero, to be compliant */ + out[i] = s; + } + } +} + + + +/************************************************************************************************************************************************************ + + Main Public API + + ************************************************************************************************************************************************************/ +/* SIZE_MAX */ +#if defined(SIZE_MAX) + #define DRMP3_SIZE_MAX SIZE_MAX +#else + #if defined(_WIN64) || defined(_LP64) || defined(__LP64__) + #define DRMP3_SIZE_MAX ((drmp3_uint64)0xFFFFFFFFFFFFFFFF) + #else + #define DRMP3_SIZE_MAX 0xFFFFFFFF + #endif +#endif +/* End SIZE_MAX */ + +/* Options. */ +#ifndef DRMP3_SEEK_LEADING_MP3_FRAMES +#define DRMP3_SEEK_LEADING_MP3_FRAMES 2 +#endif + +#define DRMP3_MIN_DATA_CHUNK_SIZE 16384 + +/* The size in bytes of each chunk of data to read from the MP3 stream. minimp3 recommends at least 16K, but in an attempt to reduce data movement I'm making this slightly larger. */ +#ifndef DRMP3_DATA_CHUNK_SIZE +#define DRMP3_DATA_CHUNK_SIZE (DRMP3_MIN_DATA_CHUNK_SIZE*4) +#endif + + +#define DRMP3_COUNTOF(x) (sizeof(x) / sizeof(x[0])) +#define DRMP3_CLAMP(x, lo, hi) (DRMP3_MAX(lo, DRMP3_MIN(x, hi))) + +#ifndef DRMP3_PI_D +#define DRMP3_PI_D 3.14159265358979323846264 +#endif + +#define DRMP3_DEFAULT_RESAMPLER_LPF_ORDER 2 + +static DRMP3_INLINE float drmp3_mix_f32(float x, float y, float a) +{ + return x*(1-a) + y*a; +} +static DRMP3_INLINE float drmp3_mix_f32_fast(float x, float y, float a) +{ + float r0 = (y - x); + float r1 = r0*a; + return x + r1; + /*return x + (y - x)*a;*/ +} + + +/* +Greatest common factor using Euclid's algorithm iteratively. +*/ +static DRMP3_INLINE drmp3_uint32 drmp3_gcf_u32(drmp3_uint32 a, drmp3_uint32 b) +{ + for (;;) { + if (b == 0) { + break; + } else { + drmp3_uint32 t = a; + a = b; + b = t % a; + } + } + + return a; +} + + +static void* drmp3__malloc_default(size_t sz, void* pUserData) +{ + (void)pUserData; + return DRMP3_MALLOC(sz); +} + +static void* drmp3__realloc_default(void* p, size_t sz, void* pUserData) +{ + (void)pUserData; + return DRMP3_REALLOC(p, sz); +} + +static void drmp3__free_default(void* p, void* pUserData) +{ + (void)pUserData; + DRMP3_FREE(p); +} + + +static void* drmp3__malloc_from_callbacks(size_t sz, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onMalloc != NULL) { + return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData); + } + + /* Try using realloc(). */ + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData); + } + + return NULL; +} + +static void* drmp3__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData); + } + + /* Try emulating realloc() in terms of malloc()/free(). */ + if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) { + void* p2; + + p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData); + if (p2 == NULL) { + return NULL; + } + + if (p != NULL) { + DRMP3_COPY_MEMORY(p2, p, szOld); + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + } + + return p2; + } + + return NULL; +} + +static void drmp3__free_from_callbacks(void* p, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (p == NULL || pAllocationCallbacks == NULL) { + return; + } + + if (pAllocationCallbacks->onFree != NULL) { + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + } +} + + +static drmp3_allocation_callbacks drmp3_copy_allocation_callbacks_or_defaults(const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks != NULL) { + /* Copy. */ + return *pAllocationCallbacks; + } else { + /* Defaults. */ + drmp3_allocation_callbacks allocationCallbacks; + allocationCallbacks.pUserData = NULL; + allocationCallbacks.onMalloc = drmp3__malloc_default; + allocationCallbacks.onRealloc = drmp3__realloc_default; + allocationCallbacks.onFree = drmp3__free_default; + return allocationCallbacks; + } +} + + + +static size_t drmp3__on_read(drmp3* pMP3, void* pBufferOut, size_t bytesToRead) +{ + size_t bytesRead; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->onRead != NULL); + + /* + Don't try reading 0 bytes from the callback. This can happen when the stream is clamped against + ID3v1 or APE tags at the end of the stream. + */ + if (bytesToRead == 0) { + return 0; + } + + bytesRead = pMP3->onRead(pMP3->pUserData, pBufferOut, bytesToRead); + pMP3->streamCursor += bytesRead; + + return bytesRead; +} + +static size_t drmp3__on_read_clamped(drmp3* pMP3, void* pBufferOut, size_t bytesToRead) +{ + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->onRead != NULL); + + if (pMP3->streamLength == DRMP3_UINT64_MAX) { + return drmp3__on_read(pMP3, pBufferOut, bytesToRead); + } else { + drmp3_uint64 bytesRemaining; + + bytesRemaining = (pMP3->streamLength - pMP3->streamCursor); + if (bytesToRead > bytesRemaining) { + bytesToRead = (size_t)bytesRemaining; + } + + return drmp3__on_read(pMP3, pBufferOut, bytesToRead); + } +} + +static drmp3_bool32 drmp3__on_seek(drmp3* pMP3, int offset, drmp3_seek_origin origin) +{ + DRMP3_ASSERT(offset >= 0); + DRMP3_ASSERT(origin == DRMP3_SEEK_SET || origin == DRMP3_SEEK_CUR); + + if (!pMP3->onSeek(pMP3->pUserData, offset, origin)) { + return DRMP3_FALSE; + } + + if (origin == DRMP3_SEEK_SET) { + pMP3->streamCursor = (drmp3_uint64)offset; + } else{ + pMP3->streamCursor += offset; + } + + return DRMP3_TRUE; +} + +static drmp3_bool32 drmp3__on_seek_64(drmp3* pMP3, drmp3_uint64 offset, drmp3_seek_origin origin) +{ + if (offset <= 0x7FFFFFFF) { + return drmp3__on_seek(pMP3, (int)offset, origin); + } + + /* Getting here "offset" is too large for a 32-bit integer. We just keep seeking forward until we hit the offset. */ + if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, DRMP3_SEEK_SET)) { + return DRMP3_FALSE; + } + + offset -= 0x7FFFFFFF; + while (offset > 0) { + if (offset <= 0x7FFFFFFF) { + if (!drmp3__on_seek(pMP3, (int)offset, DRMP3_SEEK_CUR)) { + return DRMP3_FALSE; + } + offset = 0; + } else { + if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, DRMP3_SEEK_CUR)) { + return DRMP3_FALSE; + } + offset -= 0x7FFFFFFF; + } + } + + return DRMP3_TRUE; +} + +static void drmp3__on_meta(drmp3* pMP3, drmp3_metadata_type type, const void* pRawData, size_t rawDataSize) +{ + if (pMP3->onMeta) { + drmp3_metadata metadata; + + DRMP3_ZERO_OBJECT(&metadata); + metadata.type = type; + metadata.pRawData = pRawData; + metadata.rawDataSize = rawDataSize; + + pMP3->onMeta(pMP3->pUserDataMeta, &metadata); + } +} + + +static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3dec_frame_info* pMP3FrameInfo, const drmp3_uint8** ppMP3FrameData) +{ + drmp3_uint32 pcmFramesRead = 0; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->onRead != NULL); + + if (pMP3->atEnd) { + return 0; + } + + for (;;) { + drmp3dec_frame_info info; + + /* minimp3 recommends doing data submission in chunks of at least 16K. If we don't have at least 16K bytes available, get more. */ + if (pMP3->dataSize < DRMP3_MIN_DATA_CHUNK_SIZE) { + size_t bytesRead; + + /* First we need to move the data down. */ + if (pMP3->pData != NULL) { + DRMP3_MOVE_MEMORY(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize); + } + + pMP3->dataConsumed = 0; + + if (pMP3->dataCapacity < DRMP3_DATA_CHUNK_SIZE) { + drmp3_uint8* pNewData; + size_t newDataCap; + + newDataCap = DRMP3_DATA_CHUNK_SIZE; + + pNewData = (drmp3_uint8*)drmp3__realloc_from_callbacks(pMP3->pData, newDataCap, pMP3->dataCapacity, &pMP3->allocationCallbacks); + if (pNewData == NULL) { + return 0; /* Out of memory. */ + } + + pMP3->pData = pNewData; + pMP3->dataCapacity = newDataCap; + } + + bytesRead = drmp3__on_read_clamped(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize)); + if (bytesRead == 0) { + if (pMP3->dataSize == 0) { + pMP3->atEnd = DRMP3_TRUE; + return 0; /* No data. */ + } + } + + pMP3->dataSize += bytesRead; + } + + if (pMP3->dataSize > INT_MAX) { + pMP3->atEnd = DRMP3_TRUE; + return 0; /* File too big. */ + } + + DRMP3_ASSERT(pMP3->pData != NULL); + DRMP3_ASSERT(pMP3->dataCapacity > 0); + + /* Do a runtime check here to try silencing a false-positive from clang-analyzer. */ + if (pMP3->pData == NULL) { + return 0; + } + + pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->pData + pMP3->dataConsumed, (int)pMP3->dataSize, pPCMFrames, &info); /* <-- Safe size_t -> int conversion thanks to the check above. */ + + /* Consume the data. */ + pMP3->dataConsumed += (size_t)info.frame_bytes; + pMP3->dataSize -= (size_t)info.frame_bytes; + + /* pcmFramesRead will be equal to 0 if decoding failed. If it is zero and info.frame_bytes > 0 then we have successfully decoded the frame. */ + if (pcmFramesRead > 0) { + pcmFramesRead = drmp3_hdr_frame_samples(pMP3->decoder.header); + pMP3->pcmFramesConsumedInMP3Frame = 0; + pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead; + pMP3->mp3FrameChannels = info.channels; + pMP3->mp3FrameSampleRate = info.sample_rate; + + if (pMP3FrameInfo != NULL) { + *pMP3FrameInfo = info; + } + + if (ppMP3FrameData != NULL) { + *ppMP3FrameData = pMP3->pData + pMP3->dataConsumed - (size_t)info.frame_bytes; + } + + break; + } else if (info.frame_bytes == 0) { + /* Need more data. minimp3 recommends doing data submission in 16K chunks. */ + size_t bytesRead; + + /* First we need to move the data down. */ + DRMP3_MOVE_MEMORY(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize); + pMP3->dataConsumed = 0; + + if (pMP3->dataCapacity == pMP3->dataSize) { + /* No room. Expand. */ + drmp3_uint8* pNewData; + size_t newDataCap; + + newDataCap = pMP3->dataCapacity + DRMP3_DATA_CHUNK_SIZE; + + pNewData = (drmp3_uint8*)drmp3__realloc_from_callbacks(pMP3->pData, newDataCap, pMP3->dataCapacity, &pMP3->allocationCallbacks); + if (pNewData == NULL) { + return 0; /* Out of memory. */ + } + + pMP3->pData = pNewData; + pMP3->dataCapacity = newDataCap; + } + + /* Fill in a chunk. */ + bytesRead = drmp3__on_read_clamped(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize)); + if (bytesRead == 0) { + pMP3->atEnd = DRMP3_TRUE; + return 0; /* Error reading more data. */ + } + + pMP3->dataSize += bytesRead; + } + }; + + return pcmFramesRead; +} + +static drmp3_uint32 drmp3_decode_next_frame_ex__memory(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3dec_frame_info* pMP3FrameInfo, const drmp3_uint8** ppMP3FrameData) +{ + drmp3_uint32 pcmFramesRead = 0; + drmp3dec_frame_info info; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->memory.pData != NULL); + + if (pMP3->atEnd) { + return 0; + } + + for (;;) { + pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->memory.pData + pMP3->memory.currentReadPos, (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos), pPCMFrames, &info); + if (pcmFramesRead > 0) { + pcmFramesRead = drmp3_hdr_frame_samples(pMP3->decoder.header); + pMP3->pcmFramesConsumedInMP3Frame = 0; + pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead; + pMP3->mp3FrameChannels = info.channels; + pMP3->mp3FrameSampleRate = info.sample_rate; + + if (pMP3FrameInfo != NULL) { + *pMP3FrameInfo = info; + } + + if (ppMP3FrameData != NULL) { + *ppMP3FrameData = pMP3->memory.pData + pMP3->memory.currentReadPos; + } + + break; + } else if (info.frame_bytes > 0) { + /* No frames were read, but it looks like we skipped past one. Read the next MP3 frame. */ + pMP3->memory.currentReadPos += (size_t)info.frame_bytes; + pMP3->streamCursor += (size_t)info.frame_bytes; + } else { + /* Nothing at all was read. Abort. */ + break; + } + } + + /* Consume the data. */ + pMP3->memory.currentReadPos += (size_t)info.frame_bytes; + pMP3->streamCursor += (size_t)info.frame_bytes; + + return pcmFramesRead; +} + +static drmp3_uint32 drmp3_decode_next_frame_ex(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3dec_frame_info* pMP3FrameInfo, const drmp3_uint8** ppMP3FrameData) +{ + if (pMP3->memory.pData != NULL && pMP3->memory.dataSize > 0) { + return drmp3_decode_next_frame_ex__memory(pMP3, pPCMFrames, pMP3FrameInfo, ppMP3FrameData); + } else { + return drmp3_decode_next_frame_ex__callbacks(pMP3, pPCMFrames, pMP3FrameInfo, ppMP3FrameData); + } +} + +static drmp3_uint32 drmp3_decode_next_frame(drmp3* pMP3) +{ + DRMP3_ASSERT(pMP3 != NULL); + return drmp3_decode_next_frame_ex(pMP3, (drmp3d_sample_t*)pMP3->pcmFrames, NULL, NULL); +} + +#if 0 +static drmp3_uint32 drmp3_seek_next_frame(drmp3* pMP3) +{ + drmp3_uint32 pcmFrameCount; + + DRMP3_ASSERT(pMP3 != NULL); + + pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, NULL, NULL, NULL); + if (pcmFrameCount == 0) { + return 0; + } + + /* We have essentially just skipped past the frame, so just set the remaining samples to 0. */ + pMP3->currentPCMFrame += pcmFrameCount; + pMP3->pcmFramesConsumedInMP3Frame = pcmFrameCount; + pMP3->pcmFramesRemainingInMP3Frame = 0; + + return pcmFrameCount; +} +#endif + +static drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, drmp3_meta_proc onMeta, void* pUserData, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3dec_frame_info firstFrameInfo; + const drmp3_uint8* pFirstFrameData; + drmp3_uint32 firstFramePCMFrameCount; + drmp3_uint32 detectedMP3FrameCount = 0xFFFFFFFF; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(onRead != NULL); + + /* This function assumes the output object has already been reset to 0. Do not do that here, otherwise things will break. */ + drmp3dec_init(&pMP3->decoder); + + pMP3->onRead = onRead; + pMP3->onSeek = onSeek; + pMP3->onMeta = onMeta; + pMP3->pUserData = pUserData; + pMP3->pUserDataMeta = pUserDataMeta; + pMP3->allocationCallbacks = drmp3_copy_allocation_callbacks_or_defaults(pAllocationCallbacks); + + if (pMP3->allocationCallbacks.onFree == NULL || (pMP3->allocationCallbacks.onMalloc == NULL && pMP3->allocationCallbacks.onRealloc == NULL)) { + return DRMP3_FALSE; /* Invalid allocation callbacks. */ + } + + pMP3->streamCursor = 0; + pMP3->streamLength = DRMP3_UINT64_MAX; + pMP3->streamStartOffset = 0; + pMP3->delayInPCMFrames = 0; + pMP3->paddingInPCMFrames = 0; + pMP3->totalPCMFrameCount = DRMP3_UINT64_MAX; + + /* We'll first check for any ID3v1 or APE tags. */ + #if 1 + if (onSeek != NULL && onTell != NULL) { + if (onSeek(pUserData, 0, DRMP3_SEEK_END)) { + drmp3_int64 streamLen; + int streamEndOffset = 0; + + /* First get the length of the stream. We need this so we can ensure the stream is big enough to store the tags. */ + if (onTell(pUserData, &streamLen)) { + /* ID3v1 */ + if (streamLen > 128) { + char id3[3]; + if (onSeek(pUserData, streamEndOffset - 128, DRMP3_SEEK_END)) { + if (onRead(pUserData, id3, 3) == 3 && id3[0] == 'T' && id3[1] == 'A' && id3[2] == 'G') { + /* We have an ID3v1 tag. */ + streamEndOffset -= 128; + streamLen -= 128; + + /* Fire a metadata callback for the TAG data. */ + if (onMeta != NULL) { + drmp3_uint8 tag[128]; + tag[0] = 'T'; tag[1] = 'A'; tag[2] = 'G'; + + if (onRead(pUserData, tag + 3, 125) == 125) { + drmp3__on_meta(pMP3, DRMP3_METADATA_TYPE_ID3V1, tag, 128); + } + } + } else { + /* No ID3v1 tag. */ + } + } else { + /* Failed to seek to the ID3v1 tag. */ + } + } else { + /* Stream too short. No ID3v1 tag. */ + } + + /* APE */ + if (streamLen > 32) { + char ape[32]; /* The footer. */ + if (onSeek(pUserData, streamEndOffset - 32, DRMP3_SEEK_END)) { + if (onRead(pUserData, ape, 32) == 32 && ape[0] == 'A' && ape[1] == 'P' && ape[2] == 'E' && ape[3] == 'T' && ape[4] == 'A' && ape[5] == 'G' && ape[6] == 'E' && ape[7] == 'X') { + /* We have an APE tag. */ + drmp3_uint32 tagSize = + ((drmp3_uint32)ape[24] << 0) | + ((drmp3_uint32)ape[25] << 8) | + ((drmp3_uint32)ape[26] << 16) | + ((drmp3_uint32)ape[27] << 24); + + streamEndOffset -= 32 + tagSize; + streamLen -= 32 + tagSize; + + /* Fire a metadata callback for the APE data. Must include both the main content and footer. */ + if (onMeta != NULL) { + /* We first need to seek to the start of the APE tag. */ + if (onSeek(pUserData, streamEndOffset, DRMP3_SEEK_END)) { + size_t apeTagSize = (size_t)tagSize + 32; + drmp3_uint8* pTagData = (drmp3_uint8*)drmp3_malloc(apeTagSize, pAllocationCallbacks); + if (pTagData != NULL) { + if (onRead(pUserData, pTagData, apeTagSize) == apeTagSize) { + drmp3__on_meta(pMP3, DRMP3_METADATA_TYPE_APE, pTagData, apeTagSize); + } + + drmp3_free(pTagData, pAllocationCallbacks); + } + } + } + } + } + } else { + /* Stream too short. No APE tag. */ + } + + /* Seek back to the start. */ + if (!onSeek(pUserData, 0, DRMP3_SEEK_SET)) { + return DRMP3_FALSE; /* Failed to seek back to the start. */ + } + + pMP3->streamLength = (drmp3_uint64)streamLen; + + if (pMP3->memory.pData != NULL) { + pMP3->memory.dataSize = (size_t)pMP3->streamLength; + } + } else { + /* Failed to get the length of the stream. ID3v1 and APE tags cannot be skipped. */ + if (!onSeek(pUserData, 0, DRMP3_SEEK_SET)) { + return DRMP3_FALSE; /* Failed to seek back to the start. */ + } + } + } else { + /* Failed to seek to the end. Cannot skip ID3v1 or APE tags. */ + } + } else { + /* No onSeek or onTell callback. Cannot skip ID3v1 or APE tags. */ + } + #endif + + + /* ID3v2 tags */ + #if 1 + { + char header[10]; + if (onRead(pUserData, header, 10) == 10) { + if (header[0] == 'I' && header[1] == 'D' && header[2] == '3') { + drmp3_uint32 tagSize = + (((drmp3_uint32)header[6] & 0x7F) << 21) | + (((drmp3_uint32)header[7] & 0x7F) << 14) | + (((drmp3_uint32)header[8] & 0x7F) << 7) | + (((drmp3_uint32)header[9] & 0x7F) << 0); + + /* Account for the footer. */ + if (header[5] & 0x10) { + tagSize += 10; + } + + /* Read the tag content and fire a metadata callback. */ + if (onMeta != NULL) { + size_t tagSizeWithHeader = 10 + tagSize; + drmp3_uint8* pTagData = (drmp3_uint8*)drmp3_malloc(tagSizeWithHeader, pAllocationCallbacks); + if (pTagData != NULL) { + DRMP3_COPY_MEMORY(pTagData, header, 10); + + if (onRead(pUserData, pTagData + 10, tagSize) == tagSize) { + drmp3__on_meta(pMP3, DRMP3_METADATA_TYPE_ID3V2, pTagData, tagSizeWithHeader); + } + + drmp3_free(pTagData, pAllocationCallbacks); + } + } else { + /* Don't have a metadata callback, so just skip the tag. */ + if (onSeek != NULL) { + if (!onSeek(pUserData, tagSize, DRMP3_SEEK_CUR)) { + return DRMP3_FALSE; /* Failed to seek past the ID3v2 tag. */ + } + } else { + /* Don't have a seek callback. Read and discard. */ + char discard[1024]; + + while (tagSize > 0) { + size_t bytesToRead = tagSize; + if (bytesToRead > sizeof(discard)) { + bytesToRead = sizeof(discard); + } + + if (onRead(pUserData, discard, bytesToRead) != bytesToRead) { + return DRMP3_FALSE; /* Failed to read data. */ + } + + tagSize -= (drmp3_uint32)bytesToRead; + } + } + } + + pMP3->streamStartOffset += 10 + tagSize; /* +10 for the header. */ + pMP3->streamCursor = pMP3->streamStartOffset; + } else { + /* Not an ID3v2 tag. Seek back to the start. */ + if (onSeek != NULL) { + if (!onSeek(pUserData, 0, DRMP3_SEEK_SET)) { + return DRMP3_FALSE; /* Failed to seek back to the start. */ + } + } else { + /* Don't have a seek callback to move backwards. We'll just fall through and let the decoding process re-sync. The ideal solution here would be to read into the cache. */ + + /* + TODO: Copy the header into the cache. Will need to allocate space. See drmp3_decode_next_frame_ex__callbacks. There is not need + to handle the memory case because that will always have a seek implementation and will never hit this code path. + */ + } + } + } else { + /* Failed to read the header. We can return false here. If we couldn't read 10 bytes there's no way we'll have a valid MP3 stream. */ + return DRMP3_FALSE; + } + } + #endif + + /* + Decode the first frame to confirm that it is indeed a valid MP3 stream. Note that it's possible the first frame + is actually a Xing/LAME/VBRI header. If this is the case we need to skip over it. + */ + firstFramePCMFrameCount = drmp3_decode_next_frame_ex(pMP3, (drmp3d_sample_t*)pMP3->pcmFrames, &firstFrameInfo, &pFirstFrameData); + if (firstFramePCMFrameCount > 0) { + DRMP3_ASSERT(pFirstFrameData != NULL); + + /* + It might be a header. If so, we need to clear out the cached PCM frames in order to trigger a reload of fresh + data when decoding starts. We can assume all validation has already been performed to check if this is a valid + MP3 frame and that there is more than 0 bytes making up the frame. + + We're going to be basing this parsing code off the minimp3_ex implementation. + */ + #if 1 + DRMP3_ASSERT(firstFrameInfo.frame_bytes > 0); + { + drmp3_bs bs; + drmp3_L3_gr_info grInfo[4]; + const drmp3_uint8* pTagData = pFirstFrameData; + + drmp3_bs_init(&bs, pFirstFrameData + DRMP3_HDR_SIZE, firstFrameInfo.frame_bytes - DRMP3_HDR_SIZE); + + if (DRMP3_HDR_IS_CRC(pFirstFrameData)) { + drmp3_bs_get_bits(&bs, 16); /* CRC. */ + } + + if (drmp3_L3_read_side_info(&bs, grInfo, pFirstFrameData) >= 0) { + drmp3_bool32 isXing = DRMP3_FALSE; + drmp3_bool32 isInfo = DRMP3_FALSE; + const drmp3_uint8* pTagDataBeg; + + pTagDataBeg = pFirstFrameData + DRMP3_HDR_SIZE + (bs.pos/8); + pTagData = pTagDataBeg; + + /* Check for both "Xing" and "Info" identifiers. */ + isXing = (pTagData[0] == 'X' && pTagData[1] == 'i' && pTagData[2] == 'n' && pTagData[3] == 'g'); + isInfo = (pTagData[0] == 'I' && pTagData[1] == 'n' && pTagData[2] == 'f' && pTagData[3] == 'o'); + + if (isXing || isInfo) { + drmp3_uint32 bytes = 0; + drmp3_uint32 flags = pTagData[7]; + + pTagData += 8; /* Skip past the ID and flags. */ + + if (flags & 0x01) { /* FRAMES flag. */ + detectedMP3FrameCount = (drmp3_uint32)pTagData[0] << 24 | (drmp3_uint32)pTagData[1] << 16 | (drmp3_uint32)pTagData[2] << 8 | (drmp3_uint32)pTagData[3]; + pTagData += 4; + } + + if (flags & 0x02) { /* BYTES flag. */ + bytes = (drmp3_uint32)pTagData[0] << 24 | (drmp3_uint32)pTagData[1] << 16 | (drmp3_uint32)pTagData[2] << 8 | (drmp3_uint32)pTagData[3]; + (void)bytes; /* <-- Just to silence a warning about `bytes` being assigned but unused. Want to leave this here in case I want to make use of it later. */ + pTagData += 4; + } + + if (flags & 0x04) { /* TOC flag. */ + /* TODO: Extract and bind seek points. */ + pTagData += 100; + } + + if (flags & 0x08) { /* SCALE flag. */ + pTagData += 4; + } + + /* At this point we're done with the Xing/Info header. Now we can look at the LAME data. */ + if (pTagData[0]) { + pTagData += 21; + + if (pTagData - pFirstFrameData + 14 < firstFrameInfo.frame_bytes) { + int delayInPCMFrames; + int paddingInPCMFrames; + + delayInPCMFrames = (( (drmp3_uint32)pTagData[0] << 4) | ((drmp3_uint32)pTagData[1] >> 4)) + (528 + 1); + paddingInPCMFrames = ((((drmp3_uint32)pTagData[1] & 0xF) << 8) | ((drmp3_uint32)pTagData[2] )) - (528 + 1); + if (paddingInPCMFrames < 0) { + paddingInPCMFrames = 0; /* Padding cannot be negative. Probably a malformed file. Ignore. */ + } + + pMP3->delayInPCMFrames = (drmp3_uint32)delayInPCMFrames; + pMP3->paddingInPCMFrames = (drmp3_uint32)paddingInPCMFrames; + } + } + + /* + My understanding is that if the "Xing" header is present we can consider this to be a VBR stream and if the "Info" header is + present it's a CBR stream. If this is not the case let me know! I'm just tracking this for the time being in case I want to + look at doing some CBR optimizations later on, such as faster seeking. + */ + if (isXing) { + pMP3->isVBR = DRMP3_TRUE; + } else if (isInfo) { + pMP3->isCBR = DRMP3_TRUE; + } + + /* Post the raw data of the tag to the metadata callback. */ + if (onMeta != NULL) { + drmp3_metadata_type metadataType = isXing ? DRMP3_METADATA_TYPE_XING : DRMP3_METADATA_TYPE_VBRI; + size_t tagDataSize; + + tagDataSize = (size_t)firstFrameInfo.frame_bytes; + tagDataSize -= (size_t)(pTagDataBeg - pFirstFrameData); + + drmp3__on_meta(pMP3, metadataType, pTagDataBeg, tagDataSize); + } + + /* Since this was identified as a tag, we don't want to treat it as audio. We need to clear out the PCM cache. */ + pMP3->pcmFramesRemainingInMP3Frame = 0; + + /* The start offset needs to be moved to the end of this frame so it's not included in any audio processing after seeking. */ + pMP3->streamStartOffset += (drmp3_uint32)(firstFrameInfo.frame_bytes); + pMP3->streamCursor = pMP3->streamStartOffset; + + /* + The internal decoder needs to be reset to clear out any state. If we don't reset this state, it's possible for + there to be inconsistencies in the number of samples read when reading to the end of the stream depending on + whether or not the caller seeks to the start of the stream. + */ + drmp3dec_init(&pMP3->decoder); + } + } else { + /* Failed to read the side info. */ + } + } + #endif + } else { + /* Not a valid MP3 stream. */ + drmp3__free_from_callbacks(pMP3->pData, &pMP3->allocationCallbacks); /* The call above may have allocated memory. Need to make sure it's freed before aborting. */ + return DRMP3_FALSE; + } + + if (detectedMP3FrameCount != 0xFFFFFFFF) { + pMP3->totalPCMFrameCount = detectedMP3FrameCount * firstFramePCMFrameCount; + } + + pMP3->channels = pMP3->mp3FrameChannels; + pMP3->sampleRate = pMP3->mp3FrameSampleRate; + + return DRMP3_TRUE; +} + +DRMP3_API drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, drmp3_meta_proc onMeta, void* pUserData, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pMP3 == NULL || onRead == NULL) { + return DRMP3_FALSE; + } + + DRMP3_ZERO_OBJECT(pMP3); + return drmp3_init_internal(pMP3, onRead, onSeek, onTell, onMeta, pUserData, pUserData, pAllocationCallbacks); +} + + +static size_t drmp3__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead) +{ + drmp3* pMP3 = (drmp3*)pUserData; + size_t bytesRemaining; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->memory.dataSize >= pMP3->memory.currentReadPos); + + bytesRemaining = pMP3->memory.dataSize - pMP3->memory.currentReadPos; + if (bytesToRead > bytesRemaining) { + bytesToRead = bytesRemaining; + } + + if (bytesToRead > 0) { + DRMP3_COPY_MEMORY(pBufferOut, pMP3->memory.pData + pMP3->memory.currentReadPos, bytesToRead); + pMP3->memory.currentReadPos += bytesToRead; + } + + return bytesToRead; +} + +static drmp3_bool32 drmp3__on_seek_memory(void* pUserData, int byteOffset, drmp3_seek_origin origin) +{ + drmp3* pMP3 = (drmp3*)pUserData; + drmp3_int64 newCursor; + + DRMP3_ASSERT(pMP3 != NULL); + + newCursor = pMP3->memory.currentReadPos; + + if (origin == DRMP3_SEEK_SET) { + newCursor = 0; + } else if (origin == DRMP3_SEEK_CUR) { + newCursor = (drmp3_int64)pMP3->memory.currentReadPos; + } else if (origin == DRMP3_SEEK_END) { + newCursor = (drmp3_int64)pMP3->memory.dataSize; + } else { + DRMP3_ASSERT(!"Invalid seek origin"); + return DRMP3_FALSE; + } + + newCursor += byteOffset; + + if (newCursor < 0) { + return DRMP3_FALSE; /* Trying to seek prior to the start of the buffer. */ + } + if ((size_t)newCursor > pMP3->memory.dataSize) { + return DRMP3_FALSE; /* Trying to seek beyond the end of the buffer. */ + } + + pMP3->memory.currentReadPos = (size_t)newCursor; + + return DRMP3_TRUE; +} + +static drmp3_bool32 drmp3__on_tell_memory(void* pUserData, drmp3_int64* pCursor) +{ + drmp3* pMP3 = (drmp3*)pUserData; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pCursor != NULL); + + *pCursor = (drmp3_int64)pMP3->memory.currentReadPos; + return DRMP3_TRUE; +} + +DRMP3_API drmp3_bool32 drmp3_init_memory_with_metadata(drmp3* pMP3, const void* pData, size_t dataSize, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3_bool32 result; + + if (pMP3 == NULL) { + return DRMP3_FALSE; + } + + DRMP3_ZERO_OBJECT(pMP3); + + if (pData == NULL || dataSize == 0) { + return DRMP3_FALSE; + } + + pMP3->memory.pData = (const drmp3_uint8*)pData; + pMP3->memory.dataSize = dataSize; + pMP3->memory.currentReadPos = 0; + + result = drmp3_init_internal(pMP3, drmp3__on_read_memory, drmp3__on_seek_memory, drmp3__on_tell_memory, onMeta, pMP3, pUserDataMeta, pAllocationCallbacks); + if (result == DRMP3_FALSE) { + return DRMP3_FALSE; + } + + /* Adjust the length of the memory stream to account for ID3v1 and APE tags. */ + if (pMP3->streamLength <= (drmp3_uint64)DRMP3_SIZE_MAX) { + pMP3->memory.dataSize = (size_t)pMP3->streamLength; /* Safe cast. */ + } + + if (pMP3->streamStartOffset > (drmp3_uint64)DRMP3_SIZE_MAX) { + return DRMP3_FALSE; /* Tags too big. */ + } + + return DRMP3_TRUE; +} + +DRMP3_API drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t dataSize, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + return drmp3_init_memory_with_metadata(pMP3, pData, dataSize, NULL, NULL, pAllocationCallbacks); +} + + +#ifndef DR_MP3_NO_STDIO +#include +#include /* For wcslen(), wcsrtombs() */ + +/* Errno */ +/* drmp3_result_from_errno() is only used inside DR_MP3_NO_STDIO for now. Move this out if it's ever used elsewhere. */ +#include +static drmp3_result drmp3_result_from_errno(int e) +{ + switch (e) + { + case 0: return DRMP3_SUCCESS; + #ifdef EPERM + case EPERM: return DRMP3_INVALID_OPERATION; + #endif + #ifdef ENOENT + case ENOENT: return DRMP3_DOES_NOT_EXIST; + #endif + #ifdef ESRCH + case ESRCH: return DRMP3_DOES_NOT_EXIST; + #endif + #ifdef EINTR + case EINTR: return DRMP3_INTERRUPT; + #endif + #ifdef EIO + case EIO: return DRMP3_IO_ERROR; + #endif + #ifdef ENXIO + case ENXIO: return DRMP3_DOES_NOT_EXIST; + #endif + #ifdef E2BIG + case E2BIG: return DRMP3_INVALID_ARGS; + #endif + #ifdef ENOEXEC + case ENOEXEC: return DRMP3_INVALID_FILE; + #endif + #ifdef EBADF + case EBADF: return DRMP3_INVALID_FILE; + #endif + #ifdef ECHILD + case ECHILD: return DRMP3_ERROR; + #endif + #ifdef EAGAIN + case EAGAIN: return DRMP3_UNAVAILABLE; + #endif + #ifdef ENOMEM + case ENOMEM: return DRMP3_OUT_OF_MEMORY; + #endif + #ifdef EACCES + case EACCES: return DRMP3_ACCESS_DENIED; + #endif + #ifdef EFAULT + case EFAULT: return DRMP3_BAD_ADDRESS; + #endif + #ifdef ENOTBLK + case ENOTBLK: return DRMP3_ERROR; + #endif + #ifdef EBUSY + case EBUSY: return DRMP3_BUSY; + #endif + #ifdef EEXIST + case EEXIST: return DRMP3_ALREADY_EXISTS; + #endif + #ifdef EXDEV + case EXDEV: return DRMP3_ERROR; + #endif + #ifdef ENODEV + case ENODEV: return DRMP3_DOES_NOT_EXIST; + #endif + #ifdef ENOTDIR + case ENOTDIR: return DRMP3_NOT_DIRECTORY; + #endif + #ifdef EISDIR + case EISDIR: return DRMP3_IS_DIRECTORY; + #endif + #ifdef EINVAL + case EINVAL: return DRMP3_INVALID_ARGS; + #endif + #ifdef ENFILE + case ENFILE: return DRMP3_TOO_MANY_OPEN_FILES; + #endif + #ifdef EMFILE + case EMFILE: return DRMP3_TOO_MANY_OPEN_FILES; + #endif + #ifdef ENOTTY + case ENOTTY: return DRMP3_INVALID_OPERATION; + #endif + #ifdef ETXTBSY + case ETXTBSY: return DRMP3_BUSY; + #endif + #ifdef EFBIG + case EFBIG: return DRMP3_TOO_BIG; + #endif + #ifdef ENOSPC + case ENOSPC: return DRMP3_NO_SPACE; + #endif + #ifdef ESPIPE + case ESPIPE: return DRMP3_BAD_SEEK; + #endif + #ifdef EROFS + case EROFS: return DRMP3_ACCESS_DENIED; + #endif + #ifdef EMLINK + case EMLINK: return DRMP3_TOO_MANY_LINKS; + #endif + #ifdef EPIPE + case EPIPE: return DRMP3_BAD_PIPE; + #endif + #ifdef EDOM + case EDOM: return DRMP3_OUT_OF_RANGE; + #endif + #ifdef ERANGE + case ERANGE: return DRMP3_OUT_OF_RANGE; + #endif + #ifdef EDEADLK + case EDEADLK: return DRMP3_DEADLOCK; + #endif + #ifdef ENAMETOOLONG + case ENAMETOOLONG: return DRMP3_PATH_TOO_LONG; + #endif + #ifdef ENOLCK + case ENOLCK: return DRMP3_ERROR; + #endif + #ifdef ENOSYS + case ENOSYS: return DRMP3_NOT_IMPLEMENTED; + #endif + #if defined(ENOTEMPTY) && ENOTEMPTY != EEXIST /* In AIX, ENOTEMPTY and EEXIST use the same value. */ + case ENOTEMPTY: return DRMP3_DIRECTORY_NOT_EMPTY; + #endif + #ifdef ELOOP + case ELOOP: return DRMP3_TOO_MANY_LINKS; + #endif + #ifdef ENOMSG + case ENOMSG: return DRMP3_NO_MESSAGE; + #endif + #ifdef EIDRM + case EIDRM: return DRMP3_ERROR; + #endif + #ifdef ECHRNG + case ECHRNG: return DRMP3_ERROR; + #endif + #ifdef EL2NSYNC + case EL2NSYNC: return DRMP3_ERROR; + #endif + #ifdef EL3HLT + case EL3HLT: return DRMP3_ERROR; + #endif + #ifdef EL3RST + case EL3RST: return DRMP3_ERROR; + #endif + #ifdef ELNRNG + case ELNRNG: return DRMP3_OUT_OF_RANGE; + #endif + #ifdef EUNATCH + case EUNATCH: return DRMP3_ERROR; + #endif + #ifdef ENOCSI + case ENOCSI: return DRMP3_ERROR; + #endif + #ifdef EL2HLT + case EL2HLT: return DRMP3_ERROR; + #endif + #ifdef EBADE + case EBADE: return DRMP3_ERROR; + #endif + #ifdef EBADR + case EBADR: return DRMP3_ERROR; + #endif + #ifdef EXFULL + case EXFULL: return DRMP3_ERROR; + #endif + #ifdef ENOANO + case ENOANO: return DRMP3_ERROR; + #endif + #ifdef EBADRQC + case EBADRQC: return DRMP3_ERROR; + #endif + #ifdef EBADSLT + case EBADSLT: return DRMP3_ERROR; + #endif + #ifdef EBFONT + case EBFONT: return DRMP3_INVALID_FILE; + #endif + #ifdef ENOSTR + case ENOSTR: return DRMP3_ERROR; + #endif + #ifdef ENODATA + case ENODATA: return DRMP3_NO_DATA_AVAILABLE; + #endif + #ifdef ETIME + case ETIME: return DRMP3_TIMEOUT; + #endif + #ifdef ENOSR + case ENOSR: return DRMP3_NO_DATA_AVAILABLE; + #endif + #ifdef ENONET + case ENONET: return DRMP3_NO_NETWORK; + #endif + #ifdef ENOPKG + case ENOPKG: return DRMP3_ERROR; + #endif + #ifdef EREMOTE + case EREMOTE: return DRMP3_ERROR; + #endif + #ifdef ENOLINK + case ENOLINK: return DRMP3_ERROR; + #endif + #ifdef EADV + case EADV: return DRMP3_ERROR; + #endif + #ifdef ESRMNT + case ESRMNT: return DRMP3_ERROR; + #endif + #ifdef ECOMM + case ECOMM: return DRMP3_ERROR; + #endif + #ifdef EPROTO + case EPROTO: return DRMP3_ERROR; + #endif + #ifdef EMULTIHOP + case EMULTIHOP: return DRMP3_ERROR; + #endif + #ifdef EDOTDOT + case EDOTDOT: return DRMP3_ERROR; + #endif + #ifdef EBADMSG + case EBADMSG: return DRMP3_BAD_MESSAGE; + #endif + #ifdef EOVERFLOW + case EOVERFLOW: return DRMP3_TOO_BIG; + #endif + #ifdef ENOTUNIQ + case ENOTUNIQ: return DRMP3_NOT_UNIQUE; + #endif + #ifdef EBADFD + case EBADFD: return DRMP3_ERROR; + #endif + #ifdef EREMCHG + case EREMCHG: return DRMP3_ERROR; + #endif + #ifdef ELIBACC + case ELIBACC: return DRMP3_ACCESS_DENIED; + #endif + #ifdef ELIBBAD + case ELIBBAD: return DRMP3_INVALID_FILE; + #endif + #ifdef ELIBSCN + case ELIBSCN: return DRMP3_INVALID_FILE; + #endif + #ifdef ELIBMAX + case ELIBMAX: return DRMP3_ERROR; + #endif + #ifdef ELIBEXEC + case ELIBEXEC: return DRMP3_ERROR; + #endif + #ifdef EILSEQ + case EILSEQ: return DRMP3_INVALID_DATA; + #endif + #ifdef ERESTART + case ERESTART: return DRMP3_ERROR; + #endif + #ifdef ESTRPIPE + case ESTRPIPE: return DRMP3_ERROR; + #endif + #ifdef EUSERS + case EUSERS: return DRMP3_ERROR; + #endif + #ifdef ENOTSOCK + case ENOTSOCK: return DRMP3_NOT_SOCKET; + #endif + #ifdef EDESTADDRREQ + case EDESTADDRREQ: return DRMP3_NO_ADDRESS; + #endif + #ifdef EMSGSIZE + case EMSGSIZE: return DRMP3_TOO_BIG; + #endif + #ifdef EPROTOTYPE + case EPROTOTYPE: return DRMP3_BAD_PROTOCOL; + #endif + #ifdef ENOPROTOOPT + case ENOPROTOOPT: return DRMP3_PROTOCOL_UNAVAILABLE; + #endif + #ifdef EPROTONOSUPPORT + case EPROTONOSUPPORT: return DRMP3_PROTOCOL_NOT_SUPPORTED; + #endif + #ifdef ESOCKTNOSUPPORT + case ESOCKTNOSUPPORT: return DRMP3_SOCKET_NOT_SUPPORTED; + #endif + #ifdef EOPNOTSUPP + case EOPNOTSUPP: return DRMP3_INVALID_OPERATION; + #endif + #ifdef EPFNOSUPPORT + case EPFNOSUPPORT: return DRMP3_PROTOCOL_FAMILY_NOT_SUPPORTED; + #endif + #ifdef EAFNOSUPPORT + case EAFNOSUPPORT: return DRMP3_ADDRESS_FAMILY_NOT_SUPPORTED; + #endif + #ifdef EADDRINUSE + case EADDRINUSE: return DRMP3_ALREADY_IN_USE; + #endif + #ifdef EADDRNOTAVAIL + case EADDRNOTAVAIL: return DRMP3_ERROR; + #endif + #ifdef ENETDOWN + case ENETDOWN: return DRMP3_NO_NETWORK; + #endif + #ifdef ENETUNREACH + case ENETUNREACH: return DRMP3_NO_NETWORK; + #endif + #ifdef ENETRESET + case ENETRESET: return DRMP3_NO_NETWORK; + #endif + #ifdef ECONNABORTED + case ECONNABORTED: return DRMP3_NO_NETWORK; + #endif + #ifdef ECONNRESET + case ECONNRESET: return DRMP3_CONNECTION_RESET; + #endif + #ifdef ENOBUFS + case ENOBUFS: return DRMP3_NO_SPACE; + #endif + #ifdef EISCONN + case EISCONN: return DRMP3_ALREADY_CONNECTED; + #endif + #ifdef ENOTCONN + case ENOTCONN: return DRMP3_NOT_CONNECTED; + #endif + #ifdef ESHUTDOWN + case ESHUTDOWN: return DRMP3_ERROR; + #endif + #ifdef ETOOMANYREFS + case ETOOMANYREFS: return DRMP3_ERROR; + #endif + #ifdef ETIMEDOUT + case ETIMEDOUT: return DRMP3_TIMEOUT; + #endif + #ifdef ECONNREFUSED + case ECONNREFUSED: return DRMP3_CONNECTION_REFUSED; + #endif + #ifdef EHOSTDOWN + case EHOSTDOWN: return DRMP3_NO_HOST; + #endif + #ifdef EHOSTUNREACH + case EHOSTUNREACH: return DRMP3_NO_HOST; + #endif + #ifdef EALREADY + case EALREADY: return DRMP3_IN_PROGRESS; + #endif + #ifdef EINPROGRESS + case EINPROGRESS: return DRMP3_IN_PROGRESS; + #endif + #ifdef ESTALE + case ESTALE: return DRMP3_INVALID_FILE; + #endif + #ifdef EUCLEAN + case EUCLEAN: return DRMP3_ERROR; + #endif + #ifdef ENOTNAM + case ENOTNAM: return DRMP3_ERROR; + #endif + #ifdef ENAVAIL + case ENAVAIL: return DRMP3_ERROR; + #endif + #ifdef EISNAM + case EISNAM: return DRMP3_ERROR; + #endif + #ifdef EREMOTEIO + case EREMOTEIO: return DRMP3_IO_ERROR; + #endif + #ifdef EDQUOT + case EDQUOT: return DRMP3_NO_SPACE; + #endif + #ifdef ENOMEDIUM + case ENOMEDIUM: return DRMP3_DOES_NOT_EXIST; + #endif + #ifdef EMEDIUMTYPE + case EMEDIUMTYPE: return DRMP3_ERROR; + #endif + #ifdef ECANCELED + case ECANCELED: return DRMP3_CANCELLED; + #endif + #ifdef ENOKEY + case ENOKEY: return DRMP3_ERROR; + #endif + #ifdef EKEYEXPIRED + case EKEYEXPIRED: return DRMP3_ERROR; + #endif + #ifdef EKEYREVOKED + case EKEYREVOKED: return DRMP3_ERROR; + #endif + #ifdef EKEYREJECTED + case EKEYREJECTED: return DRMP3_ERROR; + #endif + #ifdef EOWNERDEAD + case EOWNERDEAD: return DRMP3_ERROR; + #endif + #ifdef ENOTRECOVERABLE + case ENOTRECOVERABLE: return DRMP3_ERROR; + #endif + #ifdef ERFKILL + case ERFKILL: return DRMP3_ERROR; + #endif + #ifdef EHWPOISON + case EHWPOISON: return DRMP3_ERROR; + #endif + default: return DRMP3_ERROR; + } +} +/* End Errno */ + +/* fopen */ +static drmp3_result drmp3_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode) +{ +#if defined(_MSC_VER) && _MSC_VER >= 1400 + errno_t err; +#endif + + if (ppFile != NULL) { + *ppFile = NULL; /* Safety. */ + } + + if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) { + return DRMP3_INVALID_ARGS; + } + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + err = fopen_s(ppFile, pFilePath, pOpenMode); + if (err != 0) { + return drmp3_result_from_errno(err); + } +#else +#if defined(_WIN32) || defined(__APPLE__) + *ppFile = fopen(pFilePath, pOpenMode); +#else + #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE) + *ppFile = fopen64(pFilePath, pOpenMode); + #else + *ppFile = fopen(pFilePath, pOpenMode); + #endif +#endif + if (*ppFile == NULL) { + drmp3_result result = drmp3_result_from_errno(errno); + if (result == DRMP3_SUCCESS) { + result = DRMP3_ERROR; /* Just a safety check to make sure we never ever return success when pFile == NULL. */ + } + + return result; + } +#endif + + return DRMP3_SUCCESS; +} + +/* +_wfopen() isn't always available in all compilation environments. + + * Windows only. + * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back). + * MinGW-64 (both 32- and 64-bit) seems to support it. + * MinGW wraps it in !defined(__STRICT_ANSI__). + * OpenWatcom wraps it in !defined(_NO_EXT_KEYS). + +This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs() +fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support. +*/ +#if defined(_WIN32) + #if defined(_MSC_VER) || defined(__MINGW64__) || (!defined(__STRICT_ANSI__) && !defined(_NO_EXT_KEYS)) + #define DRMP3_HAS_WFOPEN + #endif +#endif + +static drmp3_result drmp3_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (ppFile != NULL) { + *ppFile = NULL; /* Safety. */ + } + + if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) { + return DRMP3_INVALID_ARGS; + } + +#if defined(DRMP3_HAS_WFOPEN) + { + /* Use _wfopen() on Windows. */ + #if defined(_MSC_VER) && _MSC_VER >= 1400 + errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode); + if (err != 0) { + return drmp3_result_from_errno(err); + } + #else + *ppFile = _wfopen(pFilePath, pOpenMode); + if (*ppFile == NULL) { + return drmp3_result_from_errno(errno); + } + #endif + (void)pAllocationCallbacks; + } +#else + /* + Use fopen() on anything other than Windows. Requires a conversion. This is annoying because + fopen() is locale specific. The only real way I can think of to do this is with wcsrtombs(). Note + that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for + maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler + error I'll look into improving compatibility. + */ + + /* + Some compilers don't support wchar_t or wcsrtombs() which we're using below. In this case we just + need to abort with an error. If you encounter a compiler lacking such support, add it to this list + and submit a bug report and it'll be added to the library upstream. + */ + #if defined(__DJGPP__) + { + /* Nothing to do here. This will fall through to the error check below. */ + } + #else + { + mbstate_t mbs; + size_t lenMB; + const wchar_t* pFilePathTemp = pFilePath; + char* pFilePathMB = NULL; + char pOpenModeMB[32] = {0}; + + /* Get the length first. */ + DRMP3_ZERO_OBJECT(&mbs); + lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs); + if (lenMB == (size_t)-1) { + return drmp3_result_from_errno(errno); + } + + pFilePathMB = (char*)drmp3__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks); + if (pFilePathMB == NULL) { + return DRMP3_OUT_OF_MEMORY; + } + + pFilePathTemp = pFilePath; + DRMP3_ZERO_OBJECT(&mbs); + wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs); + + /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */ + { + size_t i = 0; + for (;;) { + if (pOpenMode[i] == 0) { + pOpenModeMB[i] = '\0'; + break; + } + + pOpenModeMB[i] = (char)pOpenMode[i]; + i += 1; + } + } + + *ppFile = fopen(pFilePathMB, pOpenModeMB); + + drmp3__free_from_callbacks(pFilePathMB, pAllocationCallbacks); + } + #endif + + if (*ppFile == NULL) { + return DRMP3_ERROR; + } +#endif + + return DRMP3_SUCCESS; +} +/* End fopen */ + + +static size_t drmp3__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead) +{ + return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData); +} + +static drmp3_bool32 drmp3__on_seek_stdio(void* pUserData, int offset, drmp3_seek_origin origin) +{ + int whence = SEEK_SET; + if (origin == DRMP3_SEEK_CUR) { + whence = SEEK_CUR; + } else if (origin == DRMP3_SEEK_END) { + whence = SEEK_END; + } + + return fseek((FILE*)pUserData, offset, whence) == 0; +} + +static drmp3_bool32 drmp3__on_tell_stdio(void* pUserData, drmp3_int64* pCursor) +{ + FILE* pFileStdio = (FILE*)pUserData; + drmp3_int64 result; + + /* These were all validated at a higher level. */ + DRMP3_ASSERT(pFileStdio != NULL); + DRMP3_ASSERT(pCursor != NULL); + +#if defined(_WIN32) && !defined(NXDK) + #if defined(_MSC_VER) && _MSC_VER > 1200 + result = _ftelli64(pFileStdio); + #else + result = ftell(pFileStdio); + #endif +#else + result = ftell(pFileStdio); +#endif + + *pCursor = result; + + return DRMP3_TRUE; +} + +DRMP3_API drmp3_bool32 drmp3_init_file_with_metadata(drmp3* pMP3, const char* pFilePath, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3_bool32 result; + FILE* pFile; + + if (pMP3 == NULL) { + return DRMP3_FALSE; + } + + DRMP3_ZERO_OBJECT(pMP3); + + if (drmp3_fopen(&pFile, pFilePath, "rb") != DRMP3_SUCCESS) { + return DRMP3_FALSE; + } + + result = drmp3_init_internal(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, drmp3__on_tell_stdio, onMeta, (void*)pFile, pUserDataMeta, pAllocationCallbacks); + if (result != DRMP3_TRUE) { + fclose(pFile); + return result; + } + + return DRMP3_TRUE; +} + +DRMP3_API drmp3_bool32 drmp3_init_file_with_metadata_w(drmp3* pMP3, const wchar_t* pFilePath, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3_bool32 result; + FILE* pFile; + + if (pMP3 == NULL) { + return DRMP3_FALSE; + } + + DRMP3_ZERO_OBJECT(pMP3); + + if (drmp3_wfopen(&pFile, pFilePath, L"rb", pAllocationCallbacks) != DRMP3_SUCCESS) { + return DRMP3_FALSE; + } + + result = drmp3_init_internal(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, drmp3__on_tell_stdio, onMeta, (void*)pFile, pUserDataMeta, pAllocationCallbacks); + if (result != DRMP3_TRUE) { + fclose(pFile); + return result; + } + + return DRMP3_TRUE; +} + +DRMP3_API drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + return drmp3_init_file_with_metadata(pMP3, pFilePath, NULL, NULL, pAllocationCallbacks); +} + +DRMP3_API drmp3_bool32 drmp3_init_file_w(drmp3* pMP3, const wchar_t* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + return drmp3_init_file_with_metadata_w(pMP3, pFilePath, NULL, NULL, pAllocationCallbacks); +} +#endif + +DRMP3_API void drmp3_uninit(drmp3* pMP3) +{ + if (pMP3 == NULL) { + return; + } + +#ifndef DR_MP3_NO_STDIO + if (pMP3->onRead == drmp3__on_read_stdio) { + FILE* pFile = (FILE*)pMP3->pUserData; + if (pFile != NULL) { + fclose(pFile); + pMP3->pUserData = NULL; /* Make sure the file handle is cleared to NULL to we don't attempt to close it a second time. */ + } + } +#endif + + drmp3__free_from_callbacks(pMP3->pData, &pMP3->allocationCallbacks); +} + +#if defined(DR_MP3_FLOAT_OUTPUT) +static void drmp3_f32_to_s16(drmp3_int16* dst, const float* src, drmp3_uint64 sampleCount) +{ + drmp3_uint64 i; + drmp3_uint64 i4; + drmp3_uint64 sampleCount4; + + /* Unrolled. */ + i = 0; + sampleCount4 = sampleCount >> 2; + for (i4 = 0; i4 < sampleCount4; i4 += 1) { + float x0 = src[i+0]; + float x1 = src[i+1]; + float x2 = src[i+2]; + float x3 = src[i+3]; + + x0 = ((x0 < -1) ? -1 : ((x0 > 1) ? 1 : x0)); + x1 = ((x1 < -1) ? -1 : ((x1 > 1) ? 1 : x1)); + x2 = ((x2 < -1) ? -1 : ((x2 > 1) ? 1 : x2)); + x3 = ((x3 < -1) ? -1 : ((x3 > 1) ? 1 : x3)); + + x0 = x0 * 32767.0f; + x1 = x1 * 32767.0f; + x2 = x2 * 32767.0f; + x3 = x3 * 32767.0f; + + dst[i+0] = (drmp3_int16)x0; + dst[i+1] = (drmp3_int16)x1; + dst[i+2] = (drmp3_int16)x2; + dst[i+3] = (drmp3_int16)x3; + + i += 4; + } + + /* Leftover. */ + for (; i < sampleCount; i += 1) { + float x = src[i]; + x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); /* clip */ + x = x * 32767.0f; /* -1..1 to -32767..32767 */ + + dst[i] = (drmp3_int16)x; + } +} +#endif + +#if !defined(DR_MP3_FLOAT_OUTPUT) +static void drmp3_s16_to_f32(float* dst, const drmp3_int16* src, drmp3_uint64 sampleCount) +{ + drmp3_uint64 i; + for (i = 0; i < sampleCount; i += 1) { + float x = (float)src[i]; + x = x * 0.000030517578125f; /* -32768..32767 to -1..0.999969482421875 */ + dst[i] = x; + } +} +#endif + + +static drmp3_uint64 drmp3_read_pcm_frames_raw(drmp3* pMP3, drmp3_uint64 framesToRead, void* pBufferOut) +{ + drmp3_uint64 totalFramesRead = 0; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->onRead != NULL); + + while (framesToRead > 0) { + drmp3_uint32 framesToConsume; + + /* Skip frames if necessary. */ + if (pMP3->currentPCMFrame < pMP3->delayInPCMFrames) { + drmp3_uint32 framesToSkip = (drmp3_uint32)DRMP3_MIN(pMP3->pcmFramesRemainingInMP3Frame, pMP3->delayInPCMFrames - pMP3->currentPCMFrame); + + pMP3->currentPCMFrame += framesToSkip; + pMP3->pcmFramesConsumedInMP3Frame += framesToSkip; + pMP3->pcmFramesRemainingInMP3Frame -= framesToSkip; + } + + framesToConsume = (drmp3_uint32)DRMP3_MIN(pMP3->pcmFramesRemainingInMP3Frame, framesToRead); + + /* Clamp the number of frames to read to the padding. */ + if (pMP3->totalPCMFrameCount != DRMP3_UINT64_MAX && pMP3->totalPCMFrameCount > pMP3->paddingInPCMFrames) { + if (pMP3->currentPCMFrame < (pMP3->totalPCMFrameCount - pMP3->paddingInPCMFrames)) { + drmp3_uint64 framesRemainigToPadding = (pMP3->totalPCMFrameCount - pMP3->paddingInPCMFrames) - pMP3->currentPCMFrame; + if (framesToConsume > framesRemainigToPadding) { + framesToConsume = (drmp3_uint32)framesRemainigToPadding; + } + } else { + /* We're into the padding. Abort. */ + break; + } + } + + if (pBufferOut != NULL) { + #if defined(DR_MP3_FLOAT_OUTPUT) + { + /* f32 */ + float* pFramesOutF32 = (float*)DRMP3_OFFSET_PTR(pBufferOut, sizeof(float) * totalFramesRead * pMP3->channels); + float* pFramesInF32 = (float*)DRMP3_OFFSET_PTR(&pMP3->pcmFrames[0], sizeof(float) * pMP3->pcmFramesConsumedInMP3Frame * pMP3->mp3FrameChannels); + DRMP3_COPY_MEMORY(pFramesOutF32, pFramesInF32, sizeof(float) * framesToConsume * pMP3->channels); + } + #else + { + /* s16 */ + drmp3_int16* pFramesOutS16 = (drmp3_int16*)DRMP3_OFFSET_PTR(pBufferOut, sizeof(drmp3_int16) * totalFramesRead * pMP3->channels); + drmp3_int16* pFramesInS16 = (drmp3_int16*)DRMP3_OFFSET_PTR(&pMP3->pcmFrames[0], sizeof(drmp3_int16) * pMP3->pcmFramesConsumedInMP3Frame * pMP3->mp3FrameChannels); + DRMP3_COPY_MEMORY(pFramesOutS16, pFramesInS16, sizeof(drmp3_int16) * framesToConsume * pMP3->channels); + } + #endif + } + + pMP3->currentPCMFrame += framesToConsume; + pMP3->pcmFramesConsumedInMP3Frame += framesToConsume; + pMP3->pcmFramesRemainingInMP3Frame -= framesToConsume; + totalFramesRead += framesToConsume; + framesToRead -= framesToConsume; + + if (framesToRead == 0) { + break; + } + + /* If the cursor is already at the padding we need to abort. */ + if (pMP3->totalPCMFrameCount != DRMP3_UINT64_MAX && pMP3->totalPCMFrameCount > pMP3->paddingInPCMFrames && pMP3->currentPCMFrame >= (pMP3->totalPCMFrameCount - pMP3->paddingInPCMFrames)) { + break; + } + + DRMP3_ASSERT(pMP3->pcmFramesRemainingInMP3Frame == 0); + + /* At this point we have exhausted our in-memory buffer so we need to re-fill. */ + if (drmp3_decode_next_frame(pMP3) == 0) { + break; + } + } + + return totalFramesRead; +} + + +DRMP3_API drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut) +{ + if (pMP3 == NULL || pMP3->onRead == NULL) { + return 0; + } + +#if defined(DR_MP3_FLOAT_OUTPUT) + /* Fast path. No conversion required. */ + return drmp3_read_pcm_frames_raw(pMP3, framesToRead, pBufferOut); +#else + /* Slow path. Convert from s16 to f32. */ + { + drmp3_int16 pTempS16[8192]; + drmp3_uint64 totalPCMFramesRead = 0; + + while (totalPCMFramesRead < framesToRead) { + drmp3_uint64 framesJustRead; + drmp3_uint64 framesRemaining = framesToRead - totalPCMFramesRead; + drmp3_uint64 framesToReadNow = DRMP3_COUNTOF(pTempS16) / pMP3->channels; + if (framesToReadNow > framesRemaining) { + framesToReadNow = framesRemaining; + } + + framesJustRead = drmp3_read_pcm_frames_raw(pMP3, framesToReadNow, pTempS16); + if (framesJustRead == 0) { + break; + } + + drmp3_s16_to_f32((float*)DRMP3_OFFSET_PTR(pBufferOut, sizeof(float) * totalPCMFramesRead * pMP3->channels), pTempS16, framesJustRead * pMP3->channels); + totalPCMFramesRead += framesJustRead; + } + + return totalPCMFramesRead; + } +#endif +} + +DRMP3_API drmp3_uint64 drmp3_read_pcm_frames_s16(drmp3* pMP3, drmp3_uint64 framesToRead, drmp3_int16* pBufferOut) +{ + if (pMP3 == NULL || pMP3->onRead == NULL) { + return 0; + } + +#if !defined(DR_MP3_FLOAT_OUTPUT) + /* Fast path. No conversion required. */ + return drmp3_read_pcm_frames_raw(pMP3, framesToRead, pBufferOut); +#else + /* Slow path. Convert from f32 to s16. */ + { + float pTempF32[4096]; + drmp3_uint64 totalPCMFramesRead = 0; + + while (totalPCMFramesRead < framesToRead) { + drmp3_uint64 framesJustRead; + drmp3_uint64 framesRemaining = framesToRead - totalPCMFramesRead; + drmp3_uint64 framesToReadNow = DRMP3_COUNTOF(pTempF32) / pMP3->channels; + if (framesToReadNow > framesRemaining) { + framesToReadNow = framesRemaining; + } + + framesJustRead = drmp3_read_pcm_frames_raw(pMP3, framesToReadNow, pTempF32); + if (framesJustRead == 0) { + break; + } + + drmp3_f32_to_s16((drmp3_int16*)DRMP3_OFFSET_PTR(pBufferOut, sizeof(drmp3_int16) * totalPCMFramesRead * pMP3->channels), pTempF32, framesJustRead * pMP3->channels); + totalPCMFramesRead += framesJustRead; + } + + return totalPCMFramesRead; + } +#endif +} + +static void drmp3_reset(drmp3* pMP3) +{ + DRMP3_ASSERT(pMP3 != NULL); + + pMP3->pcmFramesConsumedInMP3Frame = 0; + pMP3->pcmFramesRemainingInMP3Frame = 0; + pMP3->currentPCMFrame = 0; + pMP3->dataSize = 0; + pMP3->atEnd = DRMP3_FALSE; + drmp3dec_init(&pMP3->decoder); +} + +static drmp3_bool32 drmp3_seek_to_start_of_stream(drmp3* pMP3) +{ + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->onSeek != NULL); + + /* Seek to the start of the stream to begin with. */ + if (!drmp3__on_seek_64(pMP3, pMP3->streamStartOffset, DRMP3_SEEK_SET)) { + return DRMP3_FALSE; + } + + /* Clear any cached data. */ + drmp3_reset(pMP3); + return DRMP3_TRUE; +} + + +static drmp3_bool32 drmp3_seek_forward_by_pcm_frames__brute_force(drmp3* pMP3, drmp3_uint64 frameOffset) +{ + drmp3_uint64 framesRead; + + /* + Just using a dumb read-and-discard for now. What would be nice is to parse only the header of the MP3 frame, and then skip over leading + frames without spending the time doing a full decode. I cannot see an easy way to do this in minimp3, however, so it may involve some + kind of manual processing. + */ +#if defined(DR_MP3_FLOAT_OUTPUT) + framesRead = drmp3_read_pcm_frames_f32(pMP3, frameOffset, NULL); +#else + framesRead = drmp3_read_pcm_frames_s16(pMP3, frameOffset, NULL); +#endif + if (framesRead != frameOffset) { + return DRMP3_FALSE; + } + + return DRMP3_TRUE; +} + +static drmp3_bool32 drmp3_seek_to_pcm_frame__brute_force(drmp3* pMP3, drmp3_uint64 frameIndex) +{ + DRMP3_ASSERT(pMP3 != NULL); + + if (frameIndex == pMP3->currentPCMFrame) { + return DRMP3_TRUE; + } + + /* + If we're moving foward we just read from where we're at. Otherwise we need to move back to the start of + the stream and read from the beginning. + */ + if (frameIndex < pMP3->currentPCMFrame) { + /* Moving backward. Move to the start of the stream and then move forward. */ + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + } + + DRMP3_ASSERT(frameIndex >= pMP3->currentPCMFrame); + return drmp3_seek_forward_by_pcm_frames__brute_force(pMP3, (frameIndex - pMP3->currentPCMFrame)); +} + +static drmp3_bool32 drmp3_find_closest_seek_point(drmp3* pMP3, drmp3_uint64 frameIndex, drmp3_uint32* pSeekPointIndex) +{ + drmp3_uint32 iSeekPoint; + + DRMP3_ASSERT(pSeekPointIndex != NULL); + + *pSeekPointIndex = 0; + + if (frameIndex < pMP3->pSeekPoints[0].pcmFrameIndex) { + return DRMP3_FALSE; + } + + /* Linear search for simplicity to begin with while I'm getting this thing working. Once it's all working change this to a binary search. */ + for (iSeekPoint = 0; iSeekPoint < pMP3->seekPointCount; ++iSeekPoint) { + if (pMP3->pSeekPoints[iSeekPoint].pcmFrameIndex > frameIndex) { + break; /* Found it. */ + } + + *pSeekPointIndex = iSeekPoint; + } + + return DRMP3_TRUE; +} + +static drmp3_bool32 drmp3_seek_to_pcm_frame__seek_table(drmp3* pMP3, drmp3_uint64 frameIndex) +{ + drmp3_seek_point seekPoint; + drmp3_uint32 priorSeekPointIndex; + drmp3_uint16 iMP3Frame; + drmp3_uint64 leftoverFrames; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->pSeekPoints != NULL); + DRMP3_ASSERT(pMP3->seekPointCount > 0); + + /* If there is no prior seekpoint it means the target PCM frame comes before the first seek point. Just assume a seekpoint at the start of the file in this case. */ + if (drmp3_find_closest_seek_point(pMP3, frameIndex, &priorSeekPointIndex)) { + seekPoint = pMP3->pSeekPoints[priorSeekPointIndex]; + } else { + seekPoint.seekPosInBytes = 0; + seekPoint.pcmFrameIndex = 0; + seekPoint.mp3FramesToDiscard = 0; + seekPoint.pcmFramesToDiscard = 0; + } + + /* First thing to do is seek to the first byte of the relevant MP3 frame. */ + if (!drmp3__on_seek_64(pMP3, seekPoint.seekPosInBytes, DRMP3_SEEK_SET)) { + return DRMP3_FALSE; /* Failed to seek. */ + } + + /* Clear any cached data. */ + drmp3_reset(pMP3); + + /* Whole MP3 frames need to be discarded first. */ + for (iMP3Frame = 0; iMP3Frame < seekPoint.mp3FramesToDiscard; ++iMP3Frame) { + drmp3_uint32 pcmFramesRead; + drmp3d_sample_t* pPCMFrames; + + /* Pass in non-null for the last frame because we want to ensure the sample rate converter is preloaded correctly. */ + pPCMFrames = NULL; + if (iMP3Frame == seekPoint.mp3FramesToDiscard-1) { + pPCMFrames = (drmp3d_sample_t*)pMP3->pcmFrames; + } + + /* We first need to decode the next frame. */ + pcmFramesRead = drmp3_decode_next_frame_ex(pMP3, pPCMFrames, NULL, NULL); + if (pcmFramesRead == 0) { + return DRMP3_FALSE; + } + } + + /* We seeked to an MP3 frame in the raw stream so we need to make sure the current PCM frame is set correctly. */ + pMP3->currentPCMFrame = seekPoint.pcmFrameIndex - seekPoint.pcmFramesToDiscard; + + /* + Now at this point we can follow the same process as the brute force technique where we just skip over unnecessary MP3 frames and then + read-and-discard at least 2 whole MP3 frames. + */ + leftoverFrames = frameIndex - pMP3->currentPCMFrame; + return drmp3_seek_forward_by_pcm_frames__brute_force(pMP3, leftoverFrames); +} + +DRMP3_API drmp3_bool32 drmp3_seek_to_pcm_frame(drmp3* pMP3, drmp3_uint64 frameIndex) +{ + if (pMP3 == NULL || pMP3->onSeek == NULL) { + return DRMP3_FALSE; + } + + if (frameIndex == 0) { + return drmp3_seek_to_start_of_stream(pMP3); + } + + /* Use the seek table if we have one. */ + if (pMP3->pSeekPoints != NULL && pMP3->seekPointCount > 0) { + return drmp3_seek_to_pcm_frame__seek_table(pMP3, frameIndex); + } else { + return drmp3_seek_to_pcm_frame__brute_force(pMP3, frameIndex); + } +} + +DRMP3_API drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint64* pMP3FrameCount, drmp3_uint64* pPCMFrameCount) +{ + drmp3_uint64 currentPCMFrame; + drmp3_uint64 totalPCMFrameCount; + drmp3_uint64 totalMP3FrameCount; + + if (pMP3 == NULL) { + return DRMP3_FALSE; + } + + /* + The way this works is we move back to the start of the stream, iterate over each MP3 frame and calculate the frame count based + on our output sample rate, the seek back to the PCM frame we were sitting on before calling this function. + */ + + /* The stream must support seeking for this to work. */ + if (pMP3->onSeek == NULL) { + return DRMP3_FALSE; + } + + /* We'll need to seek back to where we were, so grab the PCM frame we're currently sitting on so we can restore later. */ + currentPCMFrame = pMP3->currentPCMFrame; + + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + + totalPCMFrameCount = 0; + totalMP3FrameCount = 0; + + for (;;) { + drmp3_uint32 pcmFramesInCurrentMP3Frame; + + pcmFramesInCurrentMP3Frame = drmp3_decode_next_frame_ex(pMP3, NULL, NULL, NULL); + if (pcmFramesInCurrentMP3Frame == 0) { + break; + } + + totalPCMFrameCount += pcmFramesInCurrentMP3Frame; + totalMP3FrameCount += 1; + } + + /* Finally, we need to seek back to where we were. */ + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + + if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) { + return DRMP3_FALSE; + } + + if (pMP3FrameCount != NULL) { + *pMP3FrameCount = totalMP3FrameCount; + } + if (pPCMFrameCount != NULL) { + *pPCMFrameCount = totalPCMFrameCount; + } + + return DRMP3_TRUE; +} + +DRMP3_API drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3) +{ + drmp3_uint64 totalPCMFrameCount; + + if (pMP3 == NULL) { + return 0; + } + + if (pMP3->totalPCMFrameCount != DRMP3_UINT64_MAX) { + totalPCMFrameCount = pMP3->totalPCMFrameCount; + + if (totalPCMFrameCount >= pMP3->delayInPCMFrames) { + totalPCMFrameCount -= pMP3->delayInPCMFrames; + } else { + /* The delay is greater than the frame count reported by the Xing/Info tag. Assume it's invalid and ignore. */ + } + + if (totalPCMFrameCount >= pMP3->paddingInPCMFrames) { + totalPCMFrameCount -= pMP3->paddingInPCMFrames; + } else { + /* The padding is greater than the frame count reported by the Xing/Info tag. Assume it's invalid and ignore. */ + } + + return totalPCMFrameCount; + } else { + /* Unknown frame count. Need to calculate it. */ + if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, NULL, &totalPCMFrameCount)) { + return 0; + } + + return totalPCMFrameCount; + } +} + +DRMP3_API drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3) +{ + drmp3_uint64 totalMP3FrameCount; + if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, NULL)) { + return 0; + } + + return totalMP3FrameCount; +} + +static void drmp3__accumulate_running_pcm_frame_count(drmp3* pMP3, drmp3_uint32 pcmFrameCountIn, drmp3_uint64* pRunningPCMFrameCount, float* pRunningPCMFrameCountFractionalPart) +{ + float srcRatio; + float pcmFrameCountOutF; + drmp3_uint32 pcmFrameCountOut; + + srcRatio = (float)pMP3->mp3FrameSampleRate / (float)pMP3->sampleRate; + DRMP3_ASSERT(srcRatio > 0); + + pcmFrameCountOutF = *pRunningPCMFrameCountFractionalPart + (pcmFrameCountIn / srcRatio); + pcmFrameCountOut = (drmp3_uint32)pcmFrameCountOutF; + *pRunningPCMFrameCountFractionalPart = pcmFrameCountOutF - pcmFrameCountOut; + *pRunningPCMFrameCount += pcmFrameCountOut; +} + +typedef struct +{ + drmp3_uint64 bytePos; + drmp3_uint64 pcmFrameIndex; /* <-- After sample rate conversion. */ +} drmp3__seeking_mp3_frame_info; + +DRMP3_API drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pSeekPointCount, drmp3_seek_point* pSeekPoints) +{ + drmp3_uint32 seekPointCount; + drmp3_uint64 currentPCMFrame; + drmp3_uint64 totalMP3FrameCount; + drmp3_uint64 totalPCMFrameCount; + + if (pMP3 == NULL || pSeekPointCount == NULL || pSeekPoints == NULL) { + return DRMP3_FALSE; /* Invalid args. */ + } + + seekPointCount = *pSeekPointCount; + if (seekPointCount == 0) { + return DRMP3_FALSE; /* The client has requested no seek points. Consider this to be invalid arguments since the client has probably not intended this. */ + } + + /* We'll need to seek back to the current sample after calculating the seekpoints so we need to go ahead and grab the current location at the top. */ + currentPCMFrame = pMP3->currentPCMFrame; + + /* We never do more than the total number of MP3 frames and we limit it to 32-bits. */ + if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, &totalPCMFrameCount)) { + return DRMP3_FALSE; + } + + /* If there's less than DRMP3_SEEK_LEADING_MP3_FRAMES+1 frames we just report 1 seek point which will be the very start of the stream. */ + if (totalMP3FrameCount < DRMP3_SEEK_LEADING_MP3_FRAMES+1) { + seekPointCount = 1; + pSeekPoints[0].seekPosInBytes = 0; + pSeekPoints[0].pcmFrameIndex = 0; + pSeekPoints[0].mp3FramesToDiscard = 0; + pSeekPoints[0].pcmFramesToDiscard = 0; + } else { + drmp3_uint64 pcmFramesBetweenSeekPoints; + drmp3__seeking_mp3_frame_info mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES+1]; + drmp3_uint64 runningPCMFrameCount = 0; + float runningPCMFrameCountFractionalPart = 0; + drmp3_uint64 nextTargetPCMFrame; + drmp3_uint32 iMP3Frame; + drmp3_uint32 iSeekPoint; + + if (seekPointCount > totalMP3FrameCount-1) { + seekPointCount = (drmp3_uint32)totalMP3FrameCount-1; + } + + pcmFramesBetweenSeekPoints = totalPCMFrameCount / (seekPointCount+1); + + /* + Here is where we actually calculate the seek points. We need to start by moving the start of the stream. We then enumerate over each + MP3 frame. + */ + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + + /* + We need to cache the byte positions of the previous MP3 frames. As a new MP3 frame is iterated, we cycle the byte positions in this + array. The value in the first item in this array is the byte position that will be reported in the next seek point. + */ + + /* We need to initialize the array of MP3 byte positions for the leading MP3 frames. */ + for (iMP3Frame = 0; iMP3Frame < DRMP3_SEEK_LEADING_MP3_FRAMES+1; ++iMP3Frame) { + drmp3_uint32 pcmFramesInCurrentMP3FrameIn; + + /* The byte position of the next frame will be the stream's cursor position, minus whatever is sitting in the buffer. */ + DRMP3_ASSERT(pMP3->streamCursor >= pMP3->dataSize); + mp3FrameInfo[iMP3Frame].bytePos = pMP3->streamCursor - pMP3->dataSize; + mp3FrameInfo[iMP3Frame].pcmFrameIndex = runningPCMFrameCount; + + /* We need to get information about this frame so we can know how many samples it contained. */ + pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, NULL, NULL); + if (pcmFramesInCurrentMP3FrameIn == 0) { + return DRMP3_FALSE; /* This should never happen. */ + } + + drmp3__accumulate_running_pcm_frame_count(pMP3, pcmFramesInCurrentMP3FrameIn, &runningPCMFrameCount, &runningPCMFrameCountFractionalPart); + } + + /* + At this point we will have extracted the byte positions of the leading MP3 frames. We can now start iterating over each seek point and + calculate them. + */ + nextTargetPCMFrame = 0; + for (iSeekPoint = 0; iSeekPoint < seekPointCount; ++iSeekPoint) { + nextTargetPCMFrame += pcmFramesBetweenSeekPoints; + + for (;;) { + if (nextTargetPCMFrame < runningPCMFrameCount) { + /* The next seek point is in the current MP3 frame. */ + pSeekPoints[iSeekPoint].seekPosInBytes = mp3FrameInfo[0].bytePos; + pSeekPoints[iSeekPoint].pcmFrameIndex = nextTargetPCMFrame; + pSeekPoints[iSeekPoint].mp3FramesToDiscard = DRMP3_SEEK_LEADING_MP3_FRAMES; + pSeekPoints[iSeekPoint].pcmFramesToDiscard = (drmp3_uint16)(nextTargetPCMFrame - mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES-1].pcmFrameIndex); + break; + } else { + size_t i; + drmp3_uint32 pcmFramesInCurrentMP3FrameIn; + + /* + The next seek point is not in the current MP3 frame, so continue on to the next one. The first thing to do is cycle the cached + MP3 frame info. + */ + for (i = 0; i < DRMP3_COUNTOF(mp3FrameInfo)-1; ++i) { + mp3FrameInfo[i] = mp3FrameInfo[i+1]; + } + + /* Cache previous MP3 frame info. */ + mp3FrameInfo[DRMP3_COUNTOF(mp3FrameInfo)-1].bytePos = pMP3->streamCursor - pMP3->dataSize; + mp3FrameInfo[DRMP3_COUNTOF(mp3FrameInfo)-1].pcmFrameIndex = runningPCMFrameCount; + + /* + Go to the next MP3 frame. This shouldn't ever fail, but just in case it does we just set the seek point and break. If it happens, it + should only ever do it for the last seek point. + */ + pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, NULL, NULL); + if (pcmFramesInCurrentMP3FrameIn == 0) { + pSeekPoints[iSeekPoint].seekPosInBytes = mp3FrameInfo[0].bytePos; + pSeekPoints[iSeekPoint].pcmFrameIndex = nextTargetPCMFrame; + pSeekPoints[iSeekPoint].mp3FramesToDiscard = DRMP3_SEEK_LEADING_MP3_FRAMES; + pSeekPoints[iSeekPoint].pcmFramesToDiscard = (drmp3_uint16)(nextTargetPCMFrame - mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES-1].pcmFrameIndex); + break; + } + + drmp3__accumulate_running_pcm_frame_count(pMP3, pcmFramesInCurrentMP3FrameIn, &runningPCMFrameCount, &runningPCMFrameCountFractionalPart); + } + } + } + + /* Finally, we need to seek back to where we were. */ + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) { + return DRMP3_FALSE; + } + } + + *pSeekPointCount = seekPointCount; + return DRMP3_TRUE; +} + +DRMP3_API drmp3_bool32 drmp3_bind_seek_table(drmp3* pMP3, drmp3_uint32 seekPointCount, drmp3_seek_point* pSeekPoints) +{ + if (pMP3 == NULL) { + return DRMP3_FALSE; + } + + if (seekPointCount == 0 || pSeekPoints == NULL) { + /* Unbinding. */ + pMP3->seekPointCount = 0; + pMP3->pSeekPoints = NULL; + } else { + /* Binding. */ + pMP3->seekPointCount = seekPointCount; + pMP3->pSeekPoints = pSeekPoints; + } + + return DRMP3_TRUE; +} + + +static float* drmp3__full_read_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) +{ + drmp3_uint64 totalFramesRead = 0; + drmp3_uint64 framesCapacity = 0; + float* pFrames = NULL; + float temp[4096]; + + DRMP3_ASSERT(pMP3 != NULL); + + for (;;) { + drmp3_uint64 framesToReadRightNow = DRMP3_COUNTOF(temp) / pMP3->channels; + drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_f32(pMP3, framesToReadRightNow, temp); + if (framesJustRead == 0) { + break; + } + + /* Reallocate the output buffer if there's not enough room. */ + if (framesCapacity < totalFramesRead + framesJustRead) { + drmp3_uint64 oldFramesBufferSize; + drmp3_uint64 newFramesBufferSize; + drmp3_uint64 newFramesCap; + float* pNewFrames; + + newFramesCap = framesCapacity * 2; + if (newFramesCap < totalFramesRead + framesJustRead) { + newFramesCap = totalFramesRead + framesJustRead; + } + + oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(float); + newFramesBufferSize = newFramesCap * pMP3->channels * sizeof(float); + if (newFramesBufferSize > (drmp3_uint64)DRMP3_SIZE_MAX) { + break; + } + + pNewFrames = (float*)drmp3__realloc_from_callbacks(pFrames, (size_t)newFramesBufferSize, (size_t)oldFramesBufferSize, &pMP3->allocationCallbacks); + if (pNewFrames == NULL) { + drmp3__free_from_callbacks(pFrames, &pMP3->allocationCallbacks); + break; + } + + pFrames = pNewFrames; + framesCapacity = newFramesCap; + } + + DRMP3_COPY_MEMORY(pFrames + totalFramesRead*pMP3->channels, temp, (size_t)(framesJustRead*pMP3->channels*sizeof(float))); + totalFramesRead += framesJustRead; + + /* If the number of frames we asked for is less that what we actually read it means we've reached the end. */ + if (framesJustRead != framesToReadRightNow) { + break; + } + } + + if (pConfig != NULL) { + pConfig->channels = pMP3->channels; + pConfig->sampleRate = pMP3->sampleRate; + } + + drmp3_uninit(pMP3); + + if (pTotalFrameCount) { + *pTotalFrameCount = totalFramesRead; + } + + return pFrames; +} + +static drmp3_int16* drmp3__full_read_and_close_s16(drmp3* pMP3, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) +{ + drmp3_uint64 totalFramesRead = 0; + drmp3_uint64 framesCapacity = 0; + drmp3_int16* pFrames = NULL; + drmp3_int16 temp[4096]; + + DRMP3_ASSERT(pMP3 != NULL); + + for (;;) { + drmp3_uint64 framesToReadRightNow = DRMP3_COUNTOF(temp) / pMP3->channels; + drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_s16(pMP3, framesToReadRightNow, temp); + if (framesJustRead == 0) { + break; + } + + /* Reallocate the output buffer if there's not enough room. */ + if (framesCapacity < totalFramesRead + framesJustRead) { + drmp3_uint64 newFramesBufferSize; + drmp3_uint64 oldFramesBufferSize; + drmp3_uint64 newFramesCap; + drmp3_int16* pNewFrames; + + newFramesCap = framesCapacity * 2; + if (newFramesCap < totalFramesRead + framesJustRead) { + newFramesCap = totalFramesRead + framesJustRead; + } + + oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(drmp3_int16); + newFramesBufferSize = newFramesCap * pMP3->channels * sizeof(drmp3_int16); + if (newFramesBufferSize > (drmp3_uint64)DRMP3_SIZE_MAX) { + break; + } + + pNewFrames = (drmp3_int16*)drmp3__realloc_from_callbacks(pFrames, (size_t)newFramesBufferSize, (size_t)oldFramesBufferSize, &pMP3->allocationCallbacks); + if (pNewFrames == NULL) { + drmp3__free_from_callbacks(pFrames, &pMP3->allocationCallbacks); + break; + } + + pFrames = pNewFrames; + framesCapacity = newFramesCap; + } + + DRMP3_COPY_MEMORY(pFrames + totalFramesRead*pMP3->channels, temp, (size_t)(framesJustRead*pMP3->channels*sizeof(drmp3_int16))); + totalFramesRead += framesJustRead; + + /* If the number of frames we asked for is less that what we actually read it means we've reached the end. */ + if (framesJustRead != framesToReadRightNow) { + break; + } + } + + if (pConfig != NULL) { + pConfig->channels = pMP3->channels; + pConfig->sampleRate = pMP3->sampleRate; + } + + drmp3_uninit(pMP3); + + if (pTotalFrameCount) { + *pTotalFrameCount = totalFramesRead; + } + + return pFrames; +} + + +DRMP3_API float* drmp3_open_and_read_pcm_frames_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init(&mp3, onRead, onSeek, onTell, NULL, pUserData, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount); +} + +DRMP3_API drmp3_int16* drmp3_open_and_read_pcm_frames_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init(&mp3, onRead, onSeek, onTell, NULL, pUserData, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount); +} + + +DRMP3_API float* drmp3_open_memory_and_read_pcm_frames_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init_memory(&mp3, pData, dataSize, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount); +} + +DRMP3_API drmp3_int16* drmp3_open_memory_and_read_pcm_frames_s16(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init_memory(&mp3, pData, dataSize, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount); +} + + +#ifndef DR_MP3_NO_STDIO +DRMP3_API float* drmp3_open_file_and_read_pcm_frames_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init_file(&mp3, filePath, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount); +} + +DRMP3_API drmp3_int16* drmp3_open_file_and_read_pcm_frames_s16(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init_file(&mp3, filePath, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount); +} +#endif + +DRMP3_API void* drmp3_malloc(size_t sz, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks != NULL) { + return drmp3__malloc_from_callbacks(sz, pAllocationCallbacks); + } else { + return drmp3__malloc_default(sz, NULL); + } +} + +DRMP3_API void drmp3_free(void* p, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks != NULL) { + drmp3__free_from_callbacks(p, pAllocationCallbacks); + } else { + drmp3__free_default(p, NULL); + } +} + +#endif /* dr_mp3_c */ +#endif /*DR_MP3_IMPLEMENTATION*/ + +/* +DIFFERENCES BETWEEN minimp3 AND dr_mp3 +====================================== +- First, keep in mind that minimp3 (https://github.com/lieff/minimp3) is where all the real work was done. All of the + code relating to the actual decoding remains mostly unmodified, apart from some namespacing changes. +- dr_mp3 adds a pulling style API which allows you to deliver raw data via callbacks. So, rather than pushing data + to the decoder, the decoder _pulls_ data from your callbacks. +- In addition to callbacks, a decoder can be initialized from a block of memory and a file. +- The dr_mp3 pull API reads PCM frames rather than whole MP3 frames. +- dr_mp3 adds convenience APIs for opening and decoding entire files in one go. +- dr_mp3 is fully namespaced, including the implementation section, which is more suitable when compiling projects + as a single translation unit (aka unity builds). At the time of writing this, a unity build is not possible when + using minimp3 in conjunction with stb_vorbis. dr_mp3 addresses this. +*/ + +/* +REVISION HISTORY +================ +v0.7.2 - TBD + - Reduce stack space to improve robustness on embedded systems. + - Fix a compilation error with MSVC Clang toolset relating to cpuid. + +v0.7.1 - 2025-09-10 + - Silence a warning with GCC. + - Fix an error with the NXDK build. + - Fix a decoding inconsistency when seeking. Prior to this change, reading to the end of the stream immediately after initializing will result in a different number of samples read than if the stream is seeked to the start and read to the end. + +v0.7.0 - 2025-07-23 + - The old `DRMP3_IMPLEMENTATION` has been removed. Use `DR_MP3_IMPLEMENTATION` instead. The reason for this change is that in the future everything will eventually be using the underscored naming convention in the future, so `drmp3` will become `dr_mp3`. + - API CHANGE: Seek origins have been renamed to match the naming convention used by dr_wav and my other libraries. + - drmp3_seek_origin_start -> DRMP3_SEEK_SET + - drmp3_seek_origin_current -> DRMP3_SEEK_CUR + - DRMP3_SEEK_END (new) + - API CHANGE: Add DRMP3_SEEK_END as a seek origin for the seek callback. This is required for detection of ID3v1 and APE tags. + - API CHANGE: Add onTell callback to `drmp3_init()`. This is needed in order to track the location of ID3v1 and APE tags. + - API CHANGE: Add onMeta callback to `drmp3_init()`. This is used for reporting tag data back to the caller. Currently this only reports the raw tag data which means applications need to parse the data themselves. + - API CHANGE: Rename `drmp3dec_frame_info.hz` to `drmp3dec_frame_info.sample_rate`. + - Add detection of ID3v2, ID3v1, APE and Xing/VBRI tags. This should fix errors with some files where the decoder was reading tags as audio data. + - Delay and padding samples from LAME tags are now handled. + - Fix compilation for AIX OS. + +v0.6.40 - 2024-12-17 + - Improve detection of ARM64EC + +v0.6.39 - 2024-02-27 + - Fix a Wdouble-promotion warning. + +v0.6.38 - 2023-11-02 + - Fix build for ARMv6-M. + +v0.6.37 - 2023-07-07 + - Silence a static analysis warning. + +v0.6.36 - 2023-06-17 + - Fix an incorrect date in revision history. No functional change. + +v0.6.35 - 2023-05-22 + - Minor code restructure. No functional change. + +v0.6.34 - 2022-09-17 + - Fix compilation with DJGPP. + - Fix compilation when compiling with x86 with no SSE2. + - Remove an unnecessary variable from the drmp3 structure. + +v0.6.33 - 2022-04-10 + - Fix compilation error with the MSVC ARM64 build. + - Fix compilation error on older versions of GCC. + - Remove some unused functions. + +v0.6.32 - 2021-12-11 + - Fix a warning with Clang. + +v0.6.31 - 2021-08-22 + - Fix a bug when loading from memory. + +v0.6.30 - 2021-08-16 + - Silence some warnings. + - Replace memory operations with DRMP3_* macros. + +v0.6.29 - 2021-08-08 + - Bring up to date with minimp3. + +v0.6.28 - 2021-07-31 + - Fix platform detection for ARM64. + - Fix a compilation error with C89. + +v0.6.27 - 2021-02-21 + - Fix a warning due to referencing _MSC_VER when it is undefined. + +v0.6.26 - 2021-01-31 + - Bring up to date with minimp3. + +v0.6.25 - 2020-12-26 + - Remove DRMP3_DEFAULT_CHANNELS and DRMP3_DEFAULT_SAMPLE_RATE which are leftovers from some removed APIs. + +v0.6.24 - 2020-12-07 + - Fix a typo in version date for 0.6.23. + +v0.6.23 - 2020-12-03 + - Fix an error where a file can be closed twice when initialization of the decoder fails. + +v0.6.22 - 2020-12-02 + - Fix an error where it's possible for a file handle to be left open when initialization of the decoder fails. + +v0.6.21 - 2020-11-28 + - Bring up to date with minimp3. + +v0.6.20 - 2020-11-21 + - Fix compilation with OpenWatcom. + +v0.6.19 - 2020-11-13 + - Minor code clean up. + +v0.6.18 - 2020-11-01 + - Improve compiler support for older versions of GCC. + +v0.6.17 - 2020-09-28 + - Bring up to date with minimp3. + +v0.6.16 - 2020-08-02 + - Simplify sized types. + +v0.6.15 - 2020-07-25 + - Fix a compilation warning. + +v0.6.14 - 2020-07-23 + - Fix undefined behaviour with memmove(). + +v0.6.13 - 2020-07-06 + - Fix a bug when converting from s16 to f32 in drmp3_read_pcm_frames_f32(). + +v0.6.12 - 2020-06-23 + - Add include guard for the implementation section. + +v0.6.11 - 2020-05-26 + - Fix use of uninitialized variable error. + +v0.6.10 - 2020-05-16 + - Add compile-time and run-time version querying. + - DRMP3_VERSION_MINOR + - DRMP3_VERSION_MAJOR + - DRMP3_VERSION_REVISION + - DRMP3_VERSION_STRING + - drmp3_version() + - drmp3_version_string() + +v0.6.9 - 2020-04-30 + - Change the `pcm` parameter of drmp3dec_decode_frame() to a `const drmp3_uint8*` for consistency with internal APIs. + +v0.6.8 - 2020-04-26 + - Optimizations to decoding when initializing from memory. + +v0.6.7 - 2020-04-25 + - Fix a compilation error with DR_MP3_NO_STDIO + - Optimization to decoding by reducing some data movement. + +v0.6.6 - 2020-04-23 + - Fix a minor bug with the running PCM frame counter. + +v0.6.5 - 2020-04-19 + - Fix compilation error on ARM builds. + +v0.6.4 - 2020-04-19 + - Bring up to date with changes to minimp3. + +v0.6.3 - 2020-04-13 + - Fix some pedantic warnings. + +v0.6.2 - 2020-04-10 + - Fix a crash in drmp3_open_*_and_read_pcm_frames_*() if the output config object is NULL. + +v0.6.1 - 2020-04-05 + - Fix warnings. + +v0.6.0 - 2020-04-04 + - API CHANGE: Remove the pConfig parameter from the following APIs: + - drmp3_init() + - drmp3_init_memory() + - drmp3_init_file() + - Add drmp3_init_file_w() for opening a file from a wchar_t encoded path. + +v0.5.6 - 2020-02-12 + - Bring up to date with minimp3. + +v0.5.5 - 2020-01-29 + - Fix a memory allocation bug in high level s16 decoding APIs. + +v0.5.4 - 2019-12-02 + - Fix a possible null pointer dereference when using custom memory allocators for realloc(). + +v0.5.3 - 2019-11-14 + - Fix typos in documentation. + +v0.5.2 - 2019-11-02 + - Bring up to date with minimp3. + +v0.5.1 - 2019-10-08 + - Fix a warning with GCC. + +v0.5.0 - 2019-10-07 + - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation + routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs: + - drmp3_init() + - drmp3_init_file() + - drmp3_init_memory() + - drmp3_open_and_read_pcm_frames_f32() + - drmp3_open_and_read_pcm_frames_s16() + - drmp3_open_memory_and_read_pcm_frames_f32() + - drmp3_open_memory_and_read_pcm_frames_s16() + - drmp3_open_file_and_read_pcm_frames_f32() + - drmp3_open_file_and_read_pcm_frames_s16() + - API CHANGE: Renamed the following APIs: + - drmp3_open_and_read_f32() -> drmp3_open_and_read_pcm_frames_f32() + - drmp3_open_and_read_s16() -> drmp3_open_and_read_pcm_frames_s16() + - drmp3_open_memory_and_read_f32() -> drmp3_open_memory_and_read_pcm_frames_f32() + - drmp3_open_memory_and_read_s16() -> drmp3_open_memory_and_read_pcm_frames_s16() + - drmp3_open_file_and_read_f32() -> drmp3_open_file_and_read_pcm_frames_f32() + - drmp3_open_file_and_read_s16() -> drmp3_open_file_and_read_pcm_frames_s16() + +v0.4.7 - 2019-07-28 + - Fix a compiler error. + +v0.4.6 - 2019-06-14 + - Fix a compiler error. + +v0.4.5 - 2019-06-06 + - Bring up to date with minimp3. + +v0.4.4 - 2019-05-06 + - Fixes to the VC6 build. + +v0.4.3 - 2019-05-05 + - Use the channel count and/or sample rate of the first MP3 frame instead of DRMP3_DEFAULT_CHANNELS and + DRMP3_DEFAULT_SAMPLE_RATE when they are set to 0. To use the old behaviour, just set the relevant property to + DRMP3_DEFAULT_CHANNELS or DRMP3_DEFAULT_SAMPLE_RATE. + - Add s16 reading APIs + - drmp3_read_pcm_frames_s16 + - drmp3_open_memory_and_read_pcm_frames_s16 + - drmp3_open_and_read_pcm_frames_s16 + - drmp3_open_file_and_read_pcm_frames_s16 + - Add drmp3_get_mp3_and_pcm_frame_count() to the public header section. + - Add support for C89. + - Change license to choice of public domain or MIT-0. + +v0.4.2 - 2019-02-21 + - Fix a warning. + +v0.4.1 - 2018-12-30 + - Fix a warning. + +v0.4.0 - 2018-12-16 + - API CHANGE: Rename some APIs: + - drmp3_read_f32 -> to drmp3_read_pcm_frames_f32 + - drmp3_seek_to_frame -> drmp3_seek_to_pcm_frame + - drmp3_open_and_decode_f32 -> drmp3_open_and_read_pcm_frames_f32 + - drmp3_open_and_decode_memory_f32 -> drmp3_open_memory_and_read_pcm_frames_f32 + - drmp3_open_and_decode_file_f32 -> drmp3_open_file_and_read_pcm_frames_f32 + - Add drmp3_get_pcm_frame_count(). + - Add drmp3_get_mp3_frame_count(). + - Improve seeking performance. + +v0.3.2 - 2018-09-11 + - Fix a couple of memory leaks. + - Bring up to date with minimp3. + +v0.3.1 - 2018-08-25 + - Fix C++ build. + +v0.3.0 - 2018-08-25 + - Bring up to date with minimp3. This has a minor API change: the "pcm" parameter of drmp3dec_decode_frame() has + been changed from short* to void* because it can now output both s16 and f32 samples, depending on whether or + not the DR_MP3_FLOAT_OUTPUT option is set. + +v0.2.11 - 2018-08-08 + - Fix a bug where the last part of a file is not read. + +v0.2.10 - 2018-08-07 + - Improve 64-bit detection. + +v0.2.9 - 2018-08-05 + - Fix C++ build on older versions of GCC. + - Bring up to date with minimp3. + +v0.2.8 - 2018-08-02 + - Fix compilation errors with older versions of GCC. + +v0.2.7 - 2018-07-13 + - Bring up to date with minimp3. + +v0.2.6 - 2018-07-12 + - Bring up to date with minimp3. + +v0.2.5 - 2018-06-22 + - Bring up to date with minimp3. + +v0.2.4 - 2018-05-12 + - Bring up to date with minimp3. + +v0.2.3 - 2018-04-29 + - Fix TCC build. + +v0.2.2 - 2018-04-28 + - Fix bug when opening a decoder from memory. + +v0.2.1 - 2018-04-27 + - Efficiency improvements when the decoder reaches the end of the stream. + +v0.2 - 2018-04-21 + - Bring up to date with minimp3. + - Start using major.minor.revision versioning. + +v0.1d - 2018-03-30 + - Bring up to date with minimp3. + +v0.1c - 2018-03-11 + - Fix C++ build error. + +v0.1b - 2018-03-07 + - Bring up to date with minimp3. + +v0.1a - 2018-02-28 + - Fix compilation error on GCC/Clang. + - Fix some warnings. + +v0.1 - 2018-02-xx + - Initial versioned release. +*/ + +/* +This software is available as a choice of the following licenses. Choose +whichever you prefer. + +=============================================================================== +ALTERNATIVE 1 - Public Domain (www.unlicense.org) +=============================================================================== +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. + +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to + +=============================================================================== +ALTERNATIVE 2 - MIT No Attribution +=============================================================================== +Copyright 2023 David Reid + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +/* + https://github.com/lieff/minimp3 + To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. + This software is distributed without any warranty. + See . +*/ diff --git a/src/lib/subsonic.hpp b/src/lib/subsonic.hpp new file mode 100644 index 0000000..1c46265 --- /dev/null +++ b/src/lib/subsonic.hpp @@ -0,0 +1,708 @@ +#pragma once + +#include +#include +#include + +#include +#include + +struct ArtistInfo { + std::string id; + std::string name; + std::string coverArt; + int albumCount = 0; +}; +struct SongInfo { + std::string id; + std::string parent; + + std::string title; + std::string album; + std::string albumId; + std::string artist; + std::string artistId; + + std::string coverArt; + std::string suffix; + std::string contentType; + std::string path; + + int duration = 0; + int bitRate = 0; + int bitDepth = 0; + int samplingRate = 0; + int channelCount = 0; + int track = 0; + int discNumber = 0; + int year = 0; + int size = 0; +}; + +struct AlbumInfo { + std::string id; + std::string name; // alias "title"/"album/name" + std::string artist; + std::string artistId; + std::string coverArt; + + int songCount = 0; + int year = 0; + int duration = 0; + int playCount = 0; + + std::string genre; + std::string created; // ISO8601 string +}; + +class SubsonicClient { +public: + SubsonicClient(const std::string& baseUrl, + const std::string& user, + const std::string& pass, + const std::string& clientName, + const std::string& apiVersion) + : base_(baseUrl), + user_(user), + pass_(pass), + client_(clientName), + api_(apiVersion) + { + // strip trailing slash from base + if (!base_.empty() && base_.back() == '/') + base_.pop_back(); + } + + // Simple ping; returns true on success, false on HTTP/parse error + bool ping(std::string* outRawJson = nullptr, std::string* outError = nullptr) const { + std::string url = buildUrl("ping", ""); + std::string body; + if (!httpGet(url, body, outError)) { + return false; + } + if (outRawJson) *outRawJson = body; + + Json::Value root; + if (!parseJson(body, root, outError)) { + return false; + } + + const Json::Value& sub = root["subsonic-response"]; + if (!sub.isObject()) { + if (outError) *outError = "Missing 'subsonic-response'"; + return false; + } + const Json::Value& status = sub["status"]; + if (status.isString() && status.asString() == "ok") { + return true; + } + + if (outError) { + const Json::Value& err = sub["error"]; + if (err.isObject()) { + std::string code = err["code"].isInt() ? std::to_string(err["code"].asInt()) : "?"; + std::string msg = err["message"].isString() ? err["message"].asString() : "unknown"; + *outError = "Subsonic error " + code + ": " + msg; + } else { + *outError = "Subsonic ping failed (unknown status)"; + } + } + return false; + } + + static size_t writeBinaryCB(char* ptr, size_t size, size_t nmemb, void* userdata) + { + size_t total = size * nmemb; + std::string* out = (std::string*)userdata; + out->append(ptr, total); + return total; + } + + bool httpGetBinary(const std::string& url, + std::string& outData, + std::string* outError = nullptr) const + { + outData.clear(); + + CURL* c = curl_easy_init(); + if (!c) { + if (outError) *outError = "curl_easy_init failed"; + return false; + } + + curl_easy_setopt(c, CURLOPT_URL, url.c_str()); + curl_easy_setopt(c, CURLOPT_FOLLOWLOCATION, 1L); + + // Write binary bytes directly into string + curl_easy_setopt(c, CURLOPT_WRITEFUNCTION, writeBinaryCB); + curl_easy_setopt(c, CURLOPT_WRITEDATA, &outData); + + // no JSON / no SSL required unless needed + curl_easy_setopt(c, CURLOPT_CONNECTTIMEOUT, 10L); + curl_easy_setopt(c, CURLOPT_TIMEOUT, 30L); + + CURLcode res = curl_easy_perform(c); + + char* ctype = NULL; + curl_easy_getinfo(c, CURLINFO_CONTENT_TYPE, &ctype); + + curl_easy_cleanup(c); + + if (res != CURLE_OK) { + if (outError) + *outError = curl_easy_strerror(res); + return false; + } + + // Detect error (Subsonic returns XML error instead of image) + if (ctype && std::string(ctype).rfind("text/xml", 0) == 0) { + if (outError) + *outError = "Subsonic returned an error (XML). Invalid ID?"; + return false; + } + + return true; + } + std::string coverArtUrl(const std::string& id) const { + std::string q = buildQuery(); + std::string idEnc = urlEncode(id); + + std::string url; + url.reserve(base_.size() + q.size() + idEnc.size() + 32); + + url.append(base_) + .append("/rest/getCoverArt.view?") + .append(q) + .append("&id=") + .append(idEnc); + + return url; + } + + std::vector getAllAlbums(std::string* outError = nullptr) const + { + std::vector all; + all.reserve(4096); // reasonably large library + + const int pageSize = 500; // max allowed by spec + int offset = 0; + + while (true) { + auto page = getAlbumList("alphabeticalByName", + pageSize, + offset, + 0, 0, "", -1, + outError); + + if (page.empty()) + break; + + // append + all.insert(all.end(), page.begin(), page.end()); + + // move to next page + offset += pageSize; + } + + return all; + } + + std::vector getAlbumList(const std::string& type, + int size = 10, + int offset = 0, + int fromYear = 0, + int toYear = 0, + const std::string& genre = "", + int musicFolderId = -1, + std::string* outError = nullptr) const + { + // ---- Build query string ---- + char extra[512]; + + // Year-range mode + if (type == "byYear") { + if (fromYear == 0 || toYear == 0) { + if (outError) *outError = "byYear requires fromYear and toYear"; + return {}; + } + + if (musicFolderId >= 0) { + std::snprintf(extra, sizeof(extra), + "type=byYear&size=%d&offset=%d&fromYear=%d&toYear=%d&musicFolderId=%d", + size, offset, fromYear, toYear, musicFolderId); + } else { + std::snprintf(extra, sizeof(extra), + "type=byYear&size=%d&offset=%d&fromYear=%d&toYear=%d", + size, offset, fromYear, toYear); + } + } + // Genre mode + else if (type == "byGenre") { + if (genre.empty()) { + if (outError) *outError = "byGenre requires a genre name"; + return {}; + } + + std::string genc = urlEncode(genre); + + if (musicFolderId >= 0) { + std::snprintf(extra, sizeof(extra), + "type=byGenre&size=%d&offset=%d&genre=%s&musicFolderId=%d", + size, offset, genc.c_str(), musicFolderId); + } else { + std::snprintf(extra, sizeof(extra), + "type=byGenre&size=%d&offset=%d&genre=%s", + size, offset, genc.c_str()); + } + } + // Normal modes + else { + if (musicFolderId >= 0) { + std::snprintf(extra, sizeof(extra), + "type=%s&size=%d&offset=%d&musicFolderId=%d", + type.c_str(), size, offset, musicFolderId); + } else { + std::snprintf(extra, sizeof(extra), + "type=%s&size=%d&offset=%d", + type.c_str(), size, offset); + } + } + + // ---- HTTP ---- + std::string url = buildUrl("getAlbumList2", extra); + std::string body; + + if (!httpGet(url, body, outError)) + return {}; + + // ---- JSON parsing ---- + Json::Value root; + if (!parseJson(body, root, outError)) + return {}; + + const Json::Value& sub = root["subsonic-response"]; + if (!sub.isObject()) { + if (outError) *outError = "Missing 'subsonic-response'"; + return {}; + } + + const Json::Value& albumList = sub["albumList2"]; + if (!albumList.isObject()) { + if (outError) *outError = "Missing 'albumList2'"; + return {}; + } + + const Json::Value& albums = albumList["album"]; + if (!albums.isArray()) { + if (outError) *outError = "'albumList2.album' is missing or not an array"; + return {}; + } + + std::vector out; + out.reserve(albums.size()); + + // ---- Extract albums ---- + for (Json::ArrayIndex i = 0; i < albums.size(); ++i) { + const Json::Value& a = albums[i]; + if (!a.isObject()) continue; + + AlbumInfo info; + + if (a["id"].isString()) info.id = a["id"].asString(); + if (a["name"].isString()) info.name = a["name"].asString(); + else if (a["title"].isString()) info.name = a["title"].asString(); + else if (a["album"].isString()) info.name = a["album"].asString(); + + if (a["artist"].isString()) info.artist = a["artist"].asString(); + if (a["artistId"].isString()) info.artistId = a["artistId"].asString(); + if (a["coverArt"].isString()) info.coverArt = a["coverArt"].asString(); + if (a["genre"].isString()) info.genre = a["genre"].asString(); + if (a["created"].isString()) info.created = a["created"].asString(); + + if (a["songCount"].isInt()) info.songCount = a["songCount"].asInt(); + if (a["duration"].isInt()) info.duration = a["duration"].asInt(); + if (a["playCount"].isInt()) info.playCount = a["playCount"].asInt(); + if (a["year"].isInt()) info.year = a["year"].asInt(); + + out.push_back(info); + } + + return out; + } + + std::vector getArtists(std::string* outError = nullptr) const { + std::string url = buildUrl("getArtists", ""); + std::string body; + + // ---- HTTP ---- + if (!httpGet(url, body, outError)) + return {}; + + // ---- JSON ---- + Json::Value root; + if (!parseJson(body, root, outError)) + return {}; + + const Json::Value& sub = root["subsonic-response"]; + if (!sub.isObject()) { + if (outError) *outError = "Missing 'subsonic-response'"; + return {}; + } + + const Json::Value& artists = sub["artists"]; + if (!artists.isObject()) { + if (outError) *outError = "Missing 'artists'"; + return {}; + } + + const Json::Value& index = artists["index"]; + if (!index.isArray()) { + if (outError) *outError = "Missing 'index' array"; + return {}; + } + + std::vector out; + out.reserve(128); // typical Subsonic library size + + // artists → index[] → artist[] + for (Json::ArrayIndex i = 0; i < index.size(); ++i) { + const Json::Value& entry = index[i]; + const Json::Value& arr = entry["artist"]; + + if (!arr.isArray()) + continue; + + for (Json::ArrayIndex j = 0; j < arr.size(); ++j) { + const Json::Value& a = arr[j]; + if (!a.isObject()) + continue; + + ArtistInfo info; + + if (a["id"].isString()) info.id = a["id"].asString(); + if (a["name"].isString()) info.name = a["name"].asString(); + if (a["coverArt"].isString()) info.coverArt = a["coverArt"].asString(); + if (a["albumCount"].isInt()) info.albumCount = a["albumCount"].asInt(); + + out.push_back(info); + } + } + + return out; + } + + // Search3 → returns a vector of song IDs + std::vector searchSongIds(const std::string& query, + int count = 20, + std::string* outError = nullptr) const + { + if (count <= 0) count = 20; + + std::string qenc = urlEncode(query); + char extra[256]; + std::snprintf(extra, sizeof(extra), + "query=%s&songCount=%d", qenc.c_str(), count); + + std::string url = buildUrl("search3", extra); + std::string body; + if (!httpGet(url, body, outError)) { + return {}; + } + + Json::Value root; + if (!parseJson(body, root, outError)) { + return {}; + } + + const Json::Value& sub = root["subsonic-response"]; + if (!sub.isObject()) { + if (outError) *outError = "Missing 'subsonic-response'"; + return {}; + } + + const Json::Value& sr3 = sub["searchResult3"]; + if (!sr3.isObject()) { + if (outError) *outError = "Missing 'searchResult3'"; + return {}; + } + + const Json::Value& songs = sr3["song"]; + if (!songs.isArray()) { + if (outError) *outError = "Missing 'song' array"; + return {}; + } + + std::vector ids; + ids.reserve(static_cast(count)); + + for (Json::ArrayIndex i = 0; i < songs.size(); ++i) { + const Json::Value& song = songs[i]; + if (!song.isObject()) continue; + + const Json::Value& id = song["id"]; + if (!id.isString()) continue; + + ids.push_back(id.asString()); + if ((int)ids.size() >= count) break; + } + + return ids; + } + + // Just build a stream URL for a given song ID + std::string streamUrl(const std::string& songId) const { + std::string q = buildQuery(); + std::string enc = urlEncode(songId); + + std::string url; + url.reserve(base_.size() + 96 + q.size() + enc.size()); + url.append(base_).append("/rest/stream.view?").append(q) + .append("&id=").append(enc); + + return url; + } + + // Expose the raw GET if you want to parse manually elsewhere + bool rawGet(const std::string& endpoint, + const std::string& extraQuery, + std::string& outBody, + std::string* outError = nullptr) const + { + std::string url = buildUrl(endpoint, extraQuery); + return httpGet(url, outBody, outError); + } + std::vector getAlbum(const std::string& albumId, + std::string* outError = nullptr) const + { + // Build /rest/getAlbum.view?id=xxx + std::string enc = urlEncode(albumId); + std::string extra = "id=" + enc; + std::string url = buildUrl("getAlbum", extra); + + std::string body; + if (!httpGet(url, body, outError)) + return {}; + + Json::Value root; + if (!parseJson(body, root, outError)) + return {}; + + const Json::Value& sub = root["subsonic-response"]; + if (!sub.isObject()) { + if (outError) *outError = "Missing 'subsonic-response'"; + return {}; + } + + const Json::Value& alb = sub["album"]; + if (!alb.isObject()) { + if (outError) *outError = "Missing 'album'"; + return {}; + } + + const Json::Value& songs = alb["song"]; + if (!songs.isArray()) { + // No songs = empty album + return {}; + } + + std::vector out; + out.reserve(songs.size()); + + for (Json::ArrayIndex i = 0; i < songs.size(); ++i) { + const Json::Value& s = songs[i]; + if (!s.isObject()) + continue; + + SongInfo info; + + auto getStr = [&](const char* k, std::string& dst) { + if (s[k].isString()) dst = s[k].asString(); + }; + auto getInt = [&](const char* k, int& dst) { + if (s[k].isInt()) dst = s[k].asInt(); + }; + + // Strings + getStr("id", info.id); + getStr("parent", info.parent); + getStr("title", info.title); + getStr("album", info.album); + getStr("albumId", info.albumId); + getStr("artist", info.artist); + getStr("artistId", info.artistId); + getStr("coverArt", info.coverArt); + getStr("suffix", info.suffix); + getStr("contentType", info.contentType); + getStr("path", info.path); + + // Ints + getInt("duration", info.duration); + getInt("bitRate", info.bitRate); + getInt("bitDepth", info.bitDepth); + getInt("samplingRate", info.samplingRate); + getInt("channelCount", info.channelCount); + getInt("track", info.track); + getInt("year", info.year); + getInt("size", info.size); + getInt("discNumber", info.discNumber); + + out.push_back(info); + } + + return out; + } + +private: + std::string base_; + std::string user_; + std::string pass_; + std::string client_; + std::string api_; + + // --- Helpers ---------------------------------------------------------- + + static size_t curlWriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) { + size_t total = size * nmemb; + std::string* out = static_cast(userdata); + out->append(ptr, total); + return total; + } + + bool httpGet(const std::string& url, + std::string& outBody, + std::string* outError) const + { + outBody.clear(); + + CURL* c = curl_easy_init(); + if (!c) { + if (outError) *outError = "curl_easy_init failed"; + return false; + } + + curl_easy_setopt(c, CURLOPT_URL, url.c_str()); + curl_easy_setopt(c, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(c, CURLOPT_WRITEFUNCTION, &curlWriteCallback); + curl_easy_setopt(c, CURLOPT_WRITEDATA, &outBody); + + // Reasonable defaults; tweak if Vita/curl complains + curl_easy_setopt(c, CURLOPT_CONNECTTIMEOUT, 10L); + curl_easy_setopt(c, CURLOPT_TIMEOUT, 30L); + + // If you're on HTTPS and don't have CA bundle, you might need: + // curl_easy_setopt(c, CURLOPT_SSL_VERIFYPEER, 0L); + // curl_easy_setopt(c, CURLOPT_SSL_VERIFYHOST, 0L); + + CURLcode res = curl_easy_perform(c); + long httpCode = 0; + curl_easy_getinfo(c, CURLINFO_RESPONSE_CODE, &httpCode); + curl_easy_cleanup(c); + + if (res != CURLE_OK) { + if (outError) { + *outError = "curl error: "; + *outError += curl_easy_strerror(res); + } + return false; + } + + if (httpCode < 200 || httpCode >= 300) { + if (outError) { + *outError = "HTTP error: " + std::to_string(httpCode); + } + return false; + } + + return true; + } + + std::string buildQuery() const { + std::string userEnc = urlEncode(user_); + std::string clientEnc = urlEncode(client_); + std::string hexPw = hexEncode(pass_); + + char buf[512]; + std::snprintf(buf, sizeof(buf), + "u=%s&p=enc:%s&v=%s&c=%s&f=json", + userEnc.c_str(), hexPw.c_str(), + api_.c_str(), clientEnc.c_str()); + return std::string(buf); + } + + std::string buildUrl(const std::string& endpoint, + const std::string& extra) const + { + std::string q = buildQuery(); + std::string url; + url.reserve(base_.size() + endpoint.size() + q.size() + extra.size() + 32); + + url.append(base_) + .append("/rest/") + .append(endpoint) + .append(".view?") + .append(q); + + if (!extra.empty()) { + url.push_back('&'); + url.append(extra); + } + + return url; + } + + static std::string hexEncode(const std::string& in) { + static const char* hex = "0123456789abcdef"; + std::string out; + out.resize(in.size() * 2); + + for (size_t i = 0; i < in.size(); ++i) { + unsigned char c = static_cast(in[i]); + out[i * 2] = hex[c >> 4]; + out[i * 2 + 1] = hex[c & 0x0F]; + } + return out; + } + + static std::string urlEncode(const std::string& in) { + static const char* hex = "0123456789ABCDEF"; + std::string out; + out.reserve(in.size() * 3); + + for (size_t i = 0; i < in.size(); ++i) { + unsigned char c = static_cast(in[i]); + bool esc = (c <= 0x20) || (c == '&') || (c == '%') || + (c == '+') || (c == '?') || (c == '=') || + (c >= 0x7F); + + if (!esc) { + out.push_back(static_cast(c)); + } else { + out.push_back('%'); + out.push_back(hex[c >> 4]); + out.push_back(hex[c & 0x0F]); + } + } + return out; + } + + bool parseJson(const std::string& text, + Json::Value& root, + std::string* outError) const + { + Json::CharReaderBuilder builder; + Json::CharReader* reader = builder.newCharReader(); + + const char* start = text.data(); + const char* end = text.data() + text.size(); + + std::string errs; + bool ok = reader->parse(start, end, &root, &errs); + delete reader; + + if (!ok) { + if (outError) *outError = "JSON parse error: " + errs; + return false; + } + return true; + } +}; diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..895c49e --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,751 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "lib/subsonic.hpp" +#include "lib/audio.hpp" + +#define SCREEN_W 960 +#define SCREEN_H 544 + +struct Button { int x,y,w,h; const char* label; }; +struct Tab { int x,y,w,h; const char* label; bool active; }; +struct Item { + SDL_Rect art; + SDL_Rect titleRect; + SDL_Rect artistRect; + std::string title; + std::string id; + std::string contentType; + std::string artist; + std::string coverId; // <-- NEW: Subsonic coverArt ID + bool isTrack = false; + int trackNum = 0; +}; +struct ArtRequest { + std::string id; + std::string url; +}; + +struct ArtResult { + std::string id; + std::string data; +}; +struct AudioQueueEntry { + std::string id; + bool isFlac; +}; + +std::queue gArtRequestQ; +std::queue gArtResultQ; +Item sel; + +SceUID gArtThread = -1; +SceUID gArtMutex = -1; + +bool gArtThreadRunning = true; + +bool hit(int x,int y, SDL_Rect r){ + return x>=r.x && x<=r.x+r.w && y>=r.y && y<=r.y+r.h; +} +// scrolling +int scrollOffset = 0; +int maxScroll = 0; + +bool dragging = false; +int lastY = 0; + +// inertia +float velocity = 0.0f; // current scrolling speed +float friction = 0.92f; // deceleration factor (lower = more friction) +bool inertial = false; // true after releasing a drag +std::string selectedAlbumId = ""; +std::vector gPlayQueue; // holds Subsonic track IDs +int gQueueIndex = -1; // index in queue; -1 = not playing +const int LEFT_X = 0; +const int LEFT_Y = 60; // below the tabs +const int LEFT_W = 350; +const int LEFT_H = SCREEN_H - LEFT_Y; + +#include +#include // for fabsf if you want + +// Simple cache: coverArt ID -> SDL_Texture* +std::map gArtCache; + +void log(SDL_Window* win, const char* fmt, ...) { + char buf[512]; + + va_list args; + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + SDL_ShowSimpleMessageBox(SDL_MESSAGEBOX_INFORMATION, "Log", buf, win); +} + +SDL_Texture* text(SDL_Renderer* r, TTF_Font* f, std::string s, SDL_Color c){ + SDL_Surface* surf = TTF_RenderUTF8_Blended(f, s.c_str(), c); + SDL_Texture* t = SDL_CreateTextureFromSurface(r, surf); + SDL_FreeSurface(surf); + return t; +} +void requestCoverArtAsync(const std::string& coverId, const std::string& url) +{ + sceKernelLockMutex(gArtMutex, 1, NULL); + gArtRequestQ.push({coverId, url}); + sceKernelUnlockMutex(gArtMutex, 1); +} +SDL_Texture* getCoverArtTexture(SDL_Renderer* rd, + const SubsonicClient& ss, + const std::string& coverId, + SDL_Texture* fallback) +{ + if (coverId.empty()) + return fallback; + + auto it = gArtCache.find(coverId); + if (it != gArtCache.end()) { + if (it->second != nullptr) // loaded + return it->second; + return fallback; // still loading + } + + // mark as pending + gArtCache[coverId] = nullptr; + + // queue request for async thread + requestCoverArtAsync(coverId, ss.coverArtUrl(coverId)); + + return fallback; +} +int artThreadFunc(SceSize args, void* argp) +{ + while (gArtThreadRunning) + { + sceKernelLockMutex(gArtMutex, 1, NULL); + + if (!gArtRequestQ.empty()) { + ArtRequest req = gArtRequestQ.front(); + gArtRequestQ.pop(); + + sceKernelUnlockMutex(gArtMutex, 1); + + std::string err; + std::string data; + + // Perform blocking curl request + // Safe in this thread + SubsonicClient* ss = (SubsonicClient*)argp; + ss->httpGetBinary(req.url, data, &err); + + if (!data.empty()) { + sceKernelLockMutex(gArtMutex, 1, NULL); + gArtResultQ.push({req.id, data}); + sceKernelUnlockMutex(gArtMutex, 1); + } + + continue; + } + + sceKernelUnlockMutex(gArtMutex, 1); + + // avoid 100% CPU + sceKernelDelayThread(5000); // 5ms + } + + return 0; +} +void processPendingArt(SDL_Renderer* rd) +{ + sceKernelLockMutex(gArtMutex, 1, NULL); + + while (!gArtResultQ.empty()) { + ArtResult r = gArtResultQ.front(); + gArtResultQ.pop(); + + sceKernelUnlockMutex(gArtMutex, 1); + + // Convert binary → SDL texture (main thread only!) + SDL_RWops* rw = SDL_RWFromConstMem(r.data.data(), r.data.size()); + SDL_Texture* tex = IMG_LoadTexture_RW(rd, rw, 1); + + if (tex) + gArtCache[r.id] = tex; + else + gArtCache.erase(r.id); // failed + + sceKernelLockMutex(gArtMutex, 1, NULL); + } + + sceKernelUnlockMutex(gArtMutex, 1); +} + +void renderLeftList( + SDL_Renderer* rd, + SDL_Texture* listTex, + SDL_Texture* fallbackTex, + TTF_Font* fontSmall, + const std::vector& items, + int scrollOffset, + const SubsonicClient& ss) +{ + SDL_SetRenderTarget(rd, listTex); + SDL_SetRenderDrawColor(rd, 25,25,35,255); + SDL_RenderClear(rd); + + for (int i = 0; i < items.size(); i++) { + + SDL_Rect art = items[i].art; + art.y -= scrollOffset; + art.y -= LEFT_Y; // <-- YOU FORGOT THIS + art.x -= LEFT_X; + + if (art.y >= LEFT_H || art.y + art.h <= 0) + continue; + + // TEXT RECTS + SDL_Rect rt1 = items[i].titleRect; + SDL_Rect rt2 = items[i].artistRect; + + rt1.y -= scrollOffset; + rt1.y -= LEFT_Y; + rt1.x -= LEFT_X; + + rt2.y -= scrollOffset; + rt2.y -= LEFT_Y; + rt2.x -= LEFT_X; + + + // LOAD ART (cached) + SDL_Texture* artTex = fallbackTex; + if (!items[i].coverId.empty()) + artTex = getCoverArtTexture(rd, ss, items[i].coverId, fallbackTex); + + if (artTex) + SDL_RenderCopy(rd, artTex, NULL, &art); + + // TEXTURES + SDL_Texture* t1 = text(rd, fontSmall, items[i].title, {255,255,255}); + int tw1,th1; SDL_QueryTexture(t1,NULL,NULL,&tw1,&th1); + SDL_Rect td1 = { rt1.x, rt1.y, tw1, th1 }; + SDL_RenderCopy(rd, t1, NULL, &td1); + SDL_DestroyTexture(t1); + + SDL_Texture* t2 = text(rd, fontSmall, items[i].artist, {180,180,180}); + int tw2,th2; SDL_QueryTexture(t2,NULL,NULL,&tw2,&th2); + SDL_Rect td2 = { rt2.x, rt2.y, tw2, th2 }; + SDL_RenderCopy(rd, t2, NULL, &td2); + SDL_DestroyTexture(t2); + } + + SDL_SetRenderTarget(rd, NULL); + + SDL_Rect dst = {LEFT_X, LEFT_Y, LEFT_W, LEFT_H}; + SDL_RenderCopy(rd, listTex, NULL, &dst); +} +void renderRightPanel( + SDL_Renderer* rd, + TTF_Font* fontBig, TTF_Font* fontMed, + SDL_Rect selArt, + SDL_Texture* fallbackTex, + const SubsonicClient& ss) +{ + SDL_Texture* artTex = fallbackTex; + if (!sel.coverId.empty()) + artTex = getCoverArtTexture(rd, ss, sel.coverId, fallbackTex); + + if (artTex) + SDL_RenderCopy(rd, artTex, NULL, &selArt); + SDL_Texture* Tt = text(rd,fontBig, sel.title,{255,255,255}); + SDL_Texture* Ta = text(rd,fontMed, sel.artist,{200,200,200}); + + int w1,h1,w2,h2; + SDL_QueryTexture(Tt,NULL,NULL,&w1,&h1); + SDL_QueryTexture(Ta,NULL,NULL,&w2,&h2); + + SDL_Rect rT = { selArt.x+selArt.w+20, selArt.y+20, w1,h1 }; + SDL_Rect rA = { selArt.x+selArt.w+20, selArt.y+40+h1, w2,h2 }; + + SDL_RenderCopy(rd,Tt,NULL,&rT); + SDL_RenderCopy(rd,Ta,NULL,&rA); + + SDL_DestroyTexture(Tt); + SDL_DestroyTexture(Ta); +} +void renderSlider(SDL_Renderer* rd, SDL_Rect sliderBar, float sliderVal) +{ + SDL_SetRenderDrawColor(rd,150,150,150,255); + SDL_RenderFillRect(rd,&sliderBar); + + SDL_SetRenderDrawColor(rd,255,255,255,255); + SDL_Rect knob{ + sliderBar.x + (int)(sliderVal * sliderBar.w) - 8, + sliderBar.y - 3, + 16, + sliderBar.h + 6 + }; + SDL_RenderFillRect(rd,&knob); +} +void drawButton(SDL_Renderer* rd, TTF_Font* font, Button& b) { + SDL_SetRenderDrawColor(rd,60,60,90,255); + SDL_Rect r = {b.x,b.y,b.w,b.h}; + SDL_RenderFillRect(rd,&r); + + SDL_Texture* tt = text(rd, font, b.label, {255,255,255}); + int tw,th; SDL_QueryTexture(tt,NULL,NULL,&tw,&th); + + SDL_Rect td = { r.x+(r.w-tw)/2, r.y+(r.h-th)/2, tw, th }; + SDL_RenderCopy(rd,tt,NULL,&td); + SDL_DestroyTexture(tt); +} + +void renderControls(SDL_Renderer* rd, TTF_Font* fontSmall, + Button& b1, Button& b2, Button& b3) +{ + drawButton(rd, fontSmall, b1); + drawButton(rd, fontSmall, b2); + drawButton(rd, fontSmall, b3); +} + +void renderTabs(SDL_Renderer* rd, TTF_Font* fontMed, Tab tabs[3]) { + for (int i=0;i<3;i++){ + SDL_Rect r = {tabs[i].x,tabs[i].y,tabs[i].w,tabs[i].h}; + + if (tabs[i].active) + SDL_SetRenderDrawColor(rd,80,80,120,255); + else + SDL_SetRenderDrawColor(rd,60,60,80,255); + + SDL_RenderFillRect(rd,&r); + + SDL_Texture* t = text(rd,fontMed,tabs[i].label,{255,255,255}); + int tw,th; SDL_QueryTexture(t,NULL,NULL,&tw,&th); + + SDL_Rect td = {r.x+(r.w-tw)/2, r.y+(r.h-th)/2, tw, th}; + SDL_RenderCopy(rd, t, NULL, &td); + SDL_DestroyTexture(t); + } +} + +void loadTracks( + std::vector& items, + const SubsonicClient& ss, + const std::string& albumId, + SDL_Window* win) +{ + std::string err; + auto tracks = ss.getAlbum(albumId, &err); + if (!err.empty()) + log(win, "getAlbum error: %s", err.c_str()); + + items.clear(); + items.reserve(tracks.size()); + + const int artSize = 80; + const int rowH = 100; + const int baseY = 70; + + for (int k = 0; k < tracks.size(); k++) { + const auto& s = tracks[k]; + + Item it; + int y = baseY + k * rowH; + + it.art = {20, y, artSize, artSize}; + it.title = std::to_string(s.track) + ". " + s.title; + it.artist = s.artist; + it.coverId = s.coverArt; + it.isTrack = true; + it.trackNum = s.track; + it.id = s.id; // optional, depends on your Subsonic struct + it.contentType = s.contentType; + + it.titleRect = { 20 + artSize + 15, y + 10, 240, 24 }; + it.artistRect = { 20 + artSize + 15, y + 36, 240, 22 }; + + items.push_back(it); + } + + scrollOffset = 0; + maxScroll = items.size() * rowH - LEFT_H; + if (maxScroll < 0) maxScroll = 0; +} + + +void loadAlbums( + std::vector& items, + const SubsonicClient& ss) +{ + std::string err; + auto allAlbums = ss.getAllAlbums(&err); + if (!err.empty()) + SDL_Log("getAlbum error: %s", err.c_str()); + + items.clear(); + items.reserve(allAlbums.size()); + + const int artSize = 80; + const int rowH = 100; + const int baseY = 70; + + for (int k = 0; k < allAlbums.size(); k++) { + const auto& a = allAlbums[k]; + + Item it; + int y = baseY + k * rowH; + + it.art = {20, y, artSize, artSize}; + it.title = a.name; + it.artist = a.artist; + it.coverId = a.coverArt; + it.id = a.id; + it.isTrack = false; + + it.titleRect = {20 + artSize + 15, y + 10, 200, 20}; + it.artistRect = {20 + artSize + 15, y + 35, 200, 20}; + + items.push_back(it); + } + + scrollOffset = 0; + maxScroll = items.size() * rowH - LEFT_H; + if (maxScroll < 0) maxScroll = 0; +} + + +int main() +{ + SubsonicClient ss( + "http://192.168.8.102:4747", + "admin", + "admin", + "vita-player", + "1.16.1" + ); + + + SDL_setenv("VITA_DISABLE_TOUCH_BACK", "1", 1); + + Audio_Init(); + SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER); + IMG_Init(IMG_INIT_PNG); + TTF_Init(); + gArtMutex = sceKernelCreateMutex("art_mutex", 0, 0, NULL); + gArtThread = sceKernelCreateThread( + "art_loader", + artThreadFunc, + 0x40, // priority + 0x10000, // stack size + 0, 0, + NULL + ); + + sceKernelStartThread(gArtThread, sizeof(ss), &ss); + + SDL_Window* win = SDL_CreateWindow("UI",0,0,SCREEN_W,SCREEN_H,0); + SDL_Renderer* rd = SDL_CreateRenderer(win,-1,SDL_RENDERER_ACCELERATED); + SDL_Texture* listTex = SDL_CreateTexture( + rd, + SDL_PIXELFORMAT_RGBA8888, + SDL_TEXTUREACCESS_TARGET, + LEFT_W, + LEFT_H + ); + + TTF_Font* fontBig = TTF_OpenFont("app0:Roboto-Regular.ttf", 32); + TTF_Font* fontMed = TTF_OpenFont("app0:Roboto-Regular.ttf", 24); + TTF_Font* fontSmall = TTF_OpenFont("app0:Roboto-Regular.ttf", 18); + + SDL_Texture* albumTex = IMG_LoadTexture(rd, "app0:album.jpg"); + + Tab tabs[3] = { + { 20, 10, 150, 40, "albums", true }, + { 180, 10, 150, 40, "artists", false }, + { 340, 10, 150, 40, "tracks", false } + }; + + std::vector items; + loadAlbums(items, ss); + int artSize = 80; + int rowHeight = 100; + int startY = 70; + + // maximum scroll amount + maxScroll = items.size() * rowHeight - LEFT_H; + if (maxScroll < 0) maxScroll = 0; + + SDL_Rect selArt = { 420, 90, 180, 180 }; + + SDL_Rect sliderBar = { 430, 310, 360, 14 }; + float sliderVal = 0.3f; + + int controlsCenter = sliderBar.x + sliderBar.w/2; + + Button btnBack = { controlsCenter - 150, 340, 80, 50, "back" }; + Button btnPlay = { controlsCenter - 40, 340, 80, 50, "play" }; + Button btnNext = { controlsCenter + 70, 340, 80, 50, "next" }; + + bool running=true; + + while (running) + { + SDL_Event e; + int tx=-1, ty=-1; + bool touch=false; + + while(SDL_PollEvent(&e)){ + if(e.type==SDL_QUIT) running=false; + if (e.type == SDL_FINGERMOTION && dragging) { + int newY = e.tfinger.y * SCREEN_H; + int dy = lastY - newY; + lastY = newY; + + scrollOffset += dy; + + // clamp + if (scrollOffset < 0) scrollOffset = 0; + if (scrollOffset > maxScroll) scrollOffset = maxScroll; + + // update velocity (for inertia) + velocity = dy; // the last drag movement speed + } + + if(e.type==SDL_FINGERDOWN){ + touch=true; + tx = e.tfinger.x*SCREEN_W; + ty = e.tfinger.y*SCREEN_H; + + if (tx >= LEFT_X && tx <= LEFT_X + LEFT_W && + ty >= LEFT_Y && ty <= LEFT_Y + LEFT_H) + { + dragging = true; + inertial = false; + velocity = 0.0f; + lastY = ty; + } + } + if(e.type==SDL_FINGERUP){ + dragging = false; + inertial = true; + } + } + + if (touch) + { + // Tabs + for (int i=0;i<3;i++){ + SDL_Rect r = {tabs[i].x,tabs[i].y,tabs[i].w,tabs[i].h}; + if (hit(tx,ty,r)){ + for(int j=0;j<3;j++) tabs[j].active=false; + tabs[i].active=true; + if (strcmp(tabs[i].label, "albums") == 0) { + loadAlbums(items, ss); + } + + } + } + + for (int i = 0; i < items.size(); i++) { + SDL_Rect r = items[i].art; + r.y -= scrollOffset; + if (hit(tx, ty, {LEFT_X + r.x, r.y, r.w, r.h})) { + if (tabs[0].active) { + selectedAlbumId = items[i].id; + loadTracks(items, ss, selectedAlbumId, win); + + // switch to Tracks tab + for (int j = 0; j < 3; j++) tabs[j].active = false; + tabs[2].active = true; + } else if(tabs[2].active) { + gPlayQueue.clear(); + for (int t = i; t < items.size(); t++) { + if (items[t].isTrack) + gPlayQueue.push_back( AudioQueueEntry{ + items[t].id, + items[t].contentType=="audio/flac"||items[t].contentType=="audio/x-flac", + }); + } + + gQueueIndex = 0; // start at selected track + sel = items[i]; + auto q = gPlayQueue[gQueueIndex]; + if(q.isFlac) { + Audio_Play(ss.streamUrl(q.id).c_str()); + } else { + Audio_PlayMP3(ss.streamUrl(q.id).c_str()); + } + btnPlay.label = "Pause"; + } + break; + } + } + + // Slider + if (hit(tx,ty,sliderBar)){ + float rel = (tx - sliderBar.x) / (float)sliderBar.w; + if (rel<0) rel=0; + if (rel>1) rel=1; + sliderVal = rel; + } + + // Controls + SDL_Rect rB = {btnBack.x,btnBack.y,btnBack.w,btnBack.h}; + SDL_Rect rP = {btnPlay.x,btnPlay.y,btnPlay.w,btnPlay.h}; + SDL_Rect rN = {btnNext.x,btnNext.y,btnNext.w,btnNext.h}; + + if (hit(tx,ty,rB)) { + // Previous track in queue + if (gQueueIndex > 0) { + gQueueIndex--; + auto prevId = gPlayQueue[gQueueIndex]; + + // Update UI selection + for (auto &it : items) + if (it.id == prevId.id) + sel = it; + + if(prevId.isFlac) { + Audio_Play(ss.streamUrl(prevId.id).c_str()); + } else { + Audio_PlayMP3(ss.streamUrl(prevId.id).c_str()); + } + btnPlay.label = "Pause"; + } + } + if (hit(tx,ty,rP)) { + if(!sel.id.empty()) { + if (strcmp(btnPlay.label, "Pause") == 0) { + Audio_Pause(); + btnPlay.label = "Play"; + } else if(Audio_IsPaused()) { + btnPlay.label = "Pause"; + Audio_Resume(); + } + } + } + if (hit(tx,ty,rN)) { + if (gQueueIndex >= 0 && gQueueIndex + 1 < gPlayQueue.size()) { + gQueueIndex++; + auto nextId = gPlayQueue[gQueueIndex]; + + for (auto &it : items) + if (it.id == nextId.id) + sel = it; + + if(nextId.isFlac) { + Audio_Play(ss.streamUrl(nextId.id).c_str()); + } else { + Audio_PlayMP3(ss.streamUrl(nextId.id).c_str()); + } + btnPlay.label = "Pause"; + } + } + } + + float prog = Audio_GetProgress(); + + if (Audio_IsPlaying()) { + sliderVal = prog; + } else { + // Track finished → advance queue + if (gQueueIndex >= 0 && gQueueIndex + 1 < gPlayQueue.size()) { + + gQueueIndex++; + auto nextId = gPlayQueue[gQueueIndex]; + + // update UI selection + for (auto &it : items) + if (it.id == nextId.id) + sel = it; + + if(nextId.isFlac) { + Audio_Play(ss.streamUrl(nextId.id).c_str()); + } else { + Audio_PlayMP3(ss.streamUrl(nextId.id).c_str()); + } + btnPlay.label = "Pause"; + } else { + // queue finished + btnPlay.label = "Play"; + gQueueIndex = -1; + sel = {}; + } + } + + + // ------------------------- + // DRAW UI + // ------------------------- + SDL_SetRenderDrawColor(rd,25,25,35,255); + SDL_RenderClear(rd); + processPendingArt(rd); + renderTabs(rd, fontMed, tabs); + + // INERTIA SCROLLING UPDATE + if (inertial && !dragging) { + + // apply inertia + scrollOffset += (int)velocity; + + // decay the velocity + velocity *= friction; + + // stop when very slow + if (fabs(velocity) < 0.3f) + inertial = false; + + // clamp + if (scrollOffset < 0) { + scrollOffset = 0; + inertial = false; + velocity = 0; + } + if (scrollOffset > maxScroll) { + scrollOffset = maxScroll; + inertial = false; + velocity = 0; + } + } + renderLeftList(rd, listTex, albumTex, fontSmall, + items, scrollOffset, ss); + + if (!sel.id.empty()) { + renderRightPanel(rd, fontBig, fontMed, + selArt,albumTex, ss); + } + // Slider + renderSlider(rd, sliderBar, sliderVal); + + // Controls + renderControls(rd, fontSmall, btnBack, btnPlay, btnNext); + + SDL_RenderPresent(rd); + SDL_Delay(16); + } + + for (auto& kv : gArtCache) { + if (kv.second) + SDL_DestroyTexture(kv.second); + } + gArtCache.clear(); + + if (albumTex) SDL_DestroyTexture(albumTex); + if (listTex) SDL_DestroyTexture(listTex); + + gArtThreadRunning = false; + sceKernelWaitThreadEnd(gArtThread, NULL, NULL); + + SDL_Quit(); + sceKernelExitProcess(0); + return 0; +} diff --git a/upload.sh b/upload.sh new file mode 100755 index 0000000..5518047 --- /dev/null +++ b/upload.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# File to upload +FILE="make/subsonic.vpk" + +# FTP server + target directory +FTP_URL="ftp://192.168.8.31:1337/ux0:/VPK/" + +# Upload with curl +curl -T "$FILE" "$FTP_URL" diff --git a/vpk/Roboto-Regular.ttf b/vpk/Roboto-Regular.ttf new file mode 100644 index 0000000..bba55f6 Binary files /dev/null and b/vpk/Roboto-Regular.ttf differ diff --git a/vpk/album.jpg b/vpk/album.jpg new file mode 100644 index 0000000..a221dd5 Binary files /dev/null and b/vpk/album.jpg differ