diff --git a/.gitattributes b/.gitattributes index c7d9f3332a950355d5a77d85000f05e6f45435ea..a8cbcf2bc162fefbee144cfb64bc4a8dd3391f56 100644 --- a/.gitattributes +++ b/.gitattributes @@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +results/test_output.wav filter=lfs diff=lfs merge=lfs -text +test_output.wav filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..c18db5d97435f411d05b6687b7821b5aab1e4237 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +.idea +*.pyc +__pycache__/ +*.sh +local_tools/ +*.ckpt +*.pth +infer_out/ +*.onnx +data/ +checkpoints/ +processcmd.py +.vscode +WPy64-38100 +Winpython64-3.8.10.0dot.exe +*.pkf +*.wav +*.json +*.flac +*.xmp \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000000000000000000000000000000000000..0ad25db4bd1d86c452db3f9602ccdbe172438f52 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/README.md b/README.md index 74e0543c7c9b8563b1ab7786b0085a635235443e..e5f7b8080698706ff92ea738af4651a71339d5ee 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,60 @@ ---- -title: Model -emoji: 💩 -colorFrom: red -colorTo: gray -sdk: gradio -sdk_version: 3.15.0 -app_file: app.py -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# Diff-SVC +Singing Voice Conversion via diffusion model + +this just replaces parselmouth with world as a pitch estimator for inference (hopefully) + +## updates: +>2022.12.4 44.1kHz声码器开放申请,正式提供对44.1kHz的支持\ +2022.11.28 增加了默认打开的no_fs2选项,可优化部分网络,提升训练速度、缩减模型体积,对于未来新训练的模型有效\ +2022.11.23 修复了一个重大bug,曾导致可能将用于推理的原始gt音频转变采样率为22.05kHz,对于由此造成的影响我们表示十分抱歉,请务必检查自己的测试音频,并使用更新后的代码\ +2022.11.22 修复了很多bug,其中有几个影响推理效果重大的bug\ +2022.11.20 增加对推理时多数格式的输入和保存,无需手动借助其他软件转换\ +2022.11.13 修正中断后读取模型的epoch/steps显示问题,添加f0处理的磁盘缓存,添加实时变声推理的支持文件\ +2022.11.11 修正切片时长误差,补充对44.1khz的适配, 增加对contentvec的支持\ +2022.11.4 添加梅尔谱保存功能\ +2022.11.2 整合新声码器代码,更新parselmouth算法\ +2022.10.29 整理推理部分,添加长音频自动切片功能。\ +2022.10.28 将hubert的onnx推理迁移为torch推理,并整理推理逻辑。\ +如原先下载过onnx的hubert模型需重新下载并替换为pt模型,config不需要改,目前可以实现1060 6G显存的直接GPU推理与预处理,详情请查看文档。\ +2022.10.27 更新依赖文件,去除冗余依赖。\ +2022.10.27 修复了一个严重错误,曾导致在gpu服务器上hubert仍使用cpu推理,速度减慢3-5倍,影响预处理与推理,不影响训练\ +2022.10.26 修复windows上预处理数据在linux上无法使用的问题,更新部分文档\ +2022.10.25 编写推理/训练详细文档,修改整合部分代码,增加对ogg格式音频的支持(无需与wav区分,直接使用即可)\ +2022.10.24 支持对自定义数据集的训练,并精简代码\ +2022.10.22 完成对opencpop数据集的训练并创建仓库 + +## 注意事项: +>本项目是基于学术交流目的建立,并非为生产环境准备,不对由此项目模型产生的任何声音的版权问题负责。\ +如将本仓库代码二次分发,或将由此项目产出的任何结果公开发表(包括但不限于视频网站投稿),请注明原作者及代码来源(此仓库)。\ +如果将此项目用于任何其他企划,请提前联系并告知本仓库作者,十分感谢。\ +>This repository was established based on the purpose of acadamic exchange, not for production environment. And this repository is not responsible for any copyright issues associated with the output of it.\ +If you distribute/publish either the code or the output of the model, please cite this repository.\ +If you wish to utilize this repo as part of your project, please inform the author in advance, thank you. + +## 推理: + +>查看./inference.ipynb + + +## 预处理: +``` +export PYTHONPATH=. +CUDA_VISIBLE_DEVICES=0 python preprocessing/binarize.py --config training/config.yaml +``` +## 训练: +``` +CUDA_VISIBLE_DEVICES=0 python run.py --config training/config.yaml --exp_name [your project name] --reset +``` +详细训练过程和各种参数介绍请查看[推理与训练说明](./doc/train_and_inference.markdown) +### 已训练模型 +>目前本项目已在众多数据集进行过训练和测试。部分ckpt文件、demo音频和推理训练所需的其他文件请在下方QQ频道内下载\ +使用QQ扫描此二维码(如不能加入,请尝试一个合适的网络环境): + +For English support, you can join this discord: + +[![Discord](https://img.shields.io/discord/1044927142900809739?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/jvA5c2xzSE) + +## Acknowledgements +>项目基于[diffsinger](https://github.com/MoonInTheRiver/DiffSinger)、[diffsinger(openvpi维护版)](https://github.com/openvpi/DiffSinger)、[soft-vc](https://github.com/bshall/soft-vc)开发.\ +同时也十分感谢openvpi成员在开发训练过程中给予的帮助。 +>注意:此项目与同名论文[DiffSVC](https://arxiv.org/abs/2105.13871)无任何联系,请勿混淆! diff --git a/batch.py b/batch.py new file mode 100644 index 0000000000000000000000000000000000000000..07b283e6de56d70c3dac2883830ab84f132ec4c5 --- /dev/null +++ b/batch.py @@ -0,0 +1,43 @@ +import soundfile + +from infer_tools import infer_tool +from infer_tools.infer_tool import Svc + + +def run_clip(svc_model, key, acc, use_pe, use_crepe, thre, use_gt_mel, add_noise_step, project_name='', f_name=None, + file_path=None, out_path=None): + raw_audio_path = f_name + infer_tool.format_wav(raw_audio_path) + _f0_tst, _f0_pred, _audio = svc_model.infer(raw_audio_path, key=key, acc=acc, singer=True, use_pe=use_pe, + use_crepe=use_crepe, + thre=thre, use_gt_mel=use_gt_mel, add_noise_step=add_noise_step) + out_path = f'./singer_data/{f_name.split("/")[-1]}' + soundfile.write(out_path, _audio, 44100, 'PCM_16') + + +if __name__ == '__main__': + # 工程文件夹名,训练时用的那个 + project_name = "firefox" + model_path = f'./checkpoints/{project_name}/clean_model_ckpt_steps_100000.ckpt' + config_path = f'./checkpoints/{project_name}/config.yaml' + + # 支持多个wav/ogg文件,放在raw文件夹下,带扩展名 + file_names = infer_tool.get_end_file("./batch", "wav") + trans = [-6] # 音高调整,支持正负(半音),数量与上一行对应,不足的自动按第一个移调参数补齐 + # 加速倍数 + accelerate = 50 + hubert_gpu = True + cut_time = 30 + + # 下面不动 + infer_tool.mkdir(["./batch", "./singer_data"]) + infer_tool.fill_a_to_b(trans, file_names) + + model = Svc(project_name, config_path, hubert_gpu, model_path) + count = 0 + for f_name, tran in zip(file_names, trans): + print(f_name) + run_clip(model, key=tran, acc=accelerate, use_crepe=False, thre=0.05, use_pe=False, use_gt_mel=False, + add_noise_step=500, f_name=f_name, project_name=project_name) + count += 1 + print(f"process:{round(count * 100 / len(file_names), 2)}%") diff --git a/ckpt.jpg b/ckpt.jpg new file mode 100644 index 0000000000000000000000000000000000000000..589c02413686da308443a1b03b5f19e1c13d6d47 Binary files /dev/null and b/ckpt.jpg differ diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d49f0ecb2e118cc255d97e6077eb8e62046f4a05 --- /dev/null +++ b/config.yaml @@ -0,0 +1,349 @@ +K_step: 1000 +accumulate_grad_batches: 1 +audio_num_mel_bins: 80 +audio_sample_rate: 24000 +binarization_args: + shuffle: false + with_align: true + with_f0: true + with_hubert: true + with_spk_embed: false + with_wav: false +binarizer_cls: preprocessing.SVCpre.SVCBinarizer +binary_data_dir: data/binary/atri +check_val_every_n_epoch: 10 +choose_test_manually: false +clip_grad_norm: 1 +config_path: training/config.yaml +content_cond_steps: [] +cwt_add_f0_loss: false +cwt_hidden_size: 128 +cwt_layers: 2 +cwt_loss: l1 +cwt_std_scale: 0.8 +datasets: +- opencpop +debug: false +dec_ffn_kernel_size: 9 +dec_layers: 4 +decay_steps: 30000 +decoder_type: fft +dict_dir: '' +diff_decoder_type: wavenet +diff_loss_type: l2 +dilation_cycle_length: 4 +dropout: 0.1 +ds_workers: 4 +dur_enc_hidden_stride_kernel: +- 0,2,3 +- 0,2,3 +- 0,1,3 +dur_loss: mse +dur_predictor_kernel: 3 +dur_predictor_layers: 5 +enc_ffn_kernel_size: 9 +enc_layers: 4 +encoder_K: 8 +encoder_type: fft +endless_ds: False +f0_bin: 256 +f0_max: 1100.0 +f0_min: 50.0 +ffn_act: gelu +ffn_padding: SAME +fft_size: 512 +fmax: 12000 +fmin: 30 +fs2_ckpt: '' +gaussian_start: true +gen_dir_name: '' +gen_tgt_spk_id: -1 +hidden_size: 256 +hop_size: 128 +hubert_gpu: true +hubert_path: checkpoints/hubert/hubert_soft.pt +infer: false +keep_bins: 80 +lambda_commit: 0.25 +lambda_energy: 0.0 +lambda_f0: 1.0 +lambda_ph_dur: 0.3 +lambda_sent_dur: 1.0 +lambda_uv: 1.0 +lambda_word_dur: 1.0 +load_ckpt: '' +log_interval: 100 +loud_norm: false +lr: 5.0e-05 +max_beta: 0.02 +max_epochs: 3000 +max_eval_sentences: 1 +max_eval_tokens: 60000 +max_frames: 42000 +max_input_tokens: 60000 +max_sentences: 24 +max_tokens: 128000 +max_updates: 1000000 +mel_loss: ssim:0.5|l1:0.5 +mel_vmax: 1.5 +mel_vmin: -6.0 +min_level_db: -120 +norm_type: gn +num_ckpt_keep: 10 +num_heads: 2 +num_sanity_val_steps: 1 +num_spk: 1 +num_test_samples: 0 +num_valid_plots: 10 +optimizer_adam_beta1: 0.9 +optimizer_adam_beta2: 0.98 +out_wav_norm: false +pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt +pe_enable: false +perform_enhance: true +pitch_ar: false +pitch_enc_hidden_stride_kernel: +- 0,2,5 +- 0,2,5 +- 0,2,5 +pitch_extractor: parselmouth +pitch_loss: l2 +pitch_norm: log +pitch_type: frame +pndm_speedup: 10 +pre_align_args: + allow_no_txt: false + denoise: false + forced_align: mfa + txt_processor: zh_g2pM + use_sox: true + use_tone: false +pre_align_cls: data_gen.singing.pre_align.SingingPreAlign +predictor_dropout: 0.5 +predictor_grad: 0.1 +predictor_hidden: -1 +predictor_kernel: 5 +predictor_layers: 5 +prenet_dropout: 0.5 +prenet_hidden_size: 256 +pretrain_fs_ckpt: pretrain/nyaru/model_ckpt_steps_60000.ckpt +processed_data_dir: xxx +profile_infer: false +raw_data_dir: data/raw/atri +ref_norm_layer: bn +rel_pos: true +reset_phone_dict: true +residual_channels: 256 +residual_layers: 20 +save_best: false +save_ckpt: true +save_codes: +- configs +- modules +- src +- utils +save_f0: true +save_gt: false +schedule_type: linear +seed: 1234 +sort_by_len: true +speaker_id: atri +spec_max: +- 0.2987259328365326 +- 0.29721200466156006 +- 0.23978209495544434 +- 0.208412766456604 +- 0.25777050852775574 +- 0.2514476478099823 +- 0.1129382848739624 +- 0.03415697440505028 +- 0.09860049188137054 +- 0.10637332499027252 +- 0.13287633657455444 +- 0.19744250178337097 +- 0.10040587931871414 +- 0.13735432922840118 +- 0.15107455849647522 +- 0.17196381092071533 +- 0.08298977464437485 +- 0.0632769986987114 +- 0.02723858878016472 +- -0.001819317927584052 +- -0.029565516859292984 +- -0.023574354127049446 +- -0.01633293740451336 +- 0.07143621146678925 +- 0.021580500528216362 +- 0.07257916033267975 +- -0.024349519982933998 +- -0.06165708228945732 +- -0.10486568510532379 +- -0.1363687664270401 +- -0.13333871960639954 +- -0.13955898582935333 +- -0.16613495349884033 +- -0.17636367678642273 +- -0.2786925733089447 +- -0.22967253625392914 +- -0.31897130608558655 +- -0.18007366359233856 +- -0.29366692900657654 +- -0.2871025800704956 +- -0.36748355627059937 +- -0.46071451902389526 +- -0.5464922189712524 +- -0.5719417333602905 +- -0.6020897626876831 +- -0.6239874958992004 +- -0.5653440952301025 +- -0.6508013606071472 +- -0.628247857093811 +- -0.6809687614440918 +- -0.569259762763977 +- -0.5423558354377747 +- -0.5811785459518433 +- -0.5359002351760864 +- -0.6565515398979187 +- -0.7143737077713013 +- -0.8502675890922546 +- -0.7979224920272827 +- -0.7110578417778015 +- -0.763409435749054 +- -0.7984790802001953 +- -0.6927220821380615 +- -0.658117413520813 +- -0.7486468553543091 +- -0.5949879884719849 +- -0.7494576573371887 +- -0.7400822639465332 +- -0.6822793483734131 +- -0.7773582339286804 +- -0.661201536655426 +- -0.791329026222229 +- -0.8982341885566711 +- -0.8736728429794312 +- -0.7701027393341064 +- -0.8490535616874695 +- -0.7479292154312134 +- -0.9320166110992432 +- -1.2862414121627808 +- -2.8936190605163574 +- -2.924229860305786 +spec_min: +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -5.999454021453857 +- -5.8822431564331055 +- -5.892064571380615 +- -5.882402420043945 +- -5.786972522735596 +- -5.746835231781006 +- -5.8594512939453125 +- -5.7389445304870605 +- -5.718059539794922 +- -5.779720306396484 +- -5.801984786987305 +- -6.0 +- -6.0 +spk_cond_steps: [] +stop_token_weight: 5.0 +task_cls: training.task.SVC_task.SVCTask +test_ids: [] +test_input_dir: '' +test_num: 0 +test_prefixes: +- test +test_set_name: test +timesteps: 1000 +train_set_name: train +use_crepe: true +use_denoise: false +use_energy_embed: false +use_gt_dur: false +use_gt_f0: false +use_midi: false +use_nsf: true +use_pitch_embed: true +use_pos_embed: true +use_spk_embed: false +use_spk_id: false +use_split_spk_id: false +use_uv: false +use_var_enc: false +use_vec: false +val_check_interval: 2000 +valid_num: 0 +valid_set_name: valid +vocoder: network.vocoders.hifigan.HifiGAN +vocoder_ckpt: checkpoints/0109_hifigan_bigpopcs_hop128 +warmup_updates: 2000 +wav2spec_eps: 1e-6 +weight_decay: 0 +win_size: 512 +work_dir: checkpoints/atri +no_fs2: false \ No newline at end of file diff --git a/doc/train_and_inference.markdown b/doc/train_and_inference.markdown new file mode 100644 index 0000000000000000000000000000000000000000..eed0d6e9ad470db1d3fa8d8410d5c51fb026a19f --- /dev/null +++ b/doc/train_and_inference.markdown @@ -0,0 +1,210 @@ +# Diff-SVC(train/inference by yourself) +## 0.环境配置 +>注意:requirements文件已更新,目前分为3个版本,可自行选择使用。\ +1. requirements.txt 是此仓库测试的原始完整环境,Torch1.12.1+cu113,可选择直接pip 或删除其中与pytorch有关的项目(torch/torchvision)后再pip,并使用自己的torch环境 +``` +pip install -r requirements.txt +``` +>2. (推荐)requirements_short.txt 是上述环境的手动整理版,不含torch本体,也可以直接 +``` +pip install -r requirements_short.txt +``` +>3. 根目录下有一份@三千整理的依赖列表requirements.png,是在某品牌云服务器上跑通的,不过此torch版本已不兼容目前版本代码,但是其他部分版本可以参考,十分感谢 + +## 1.推理 +>使用根目录下的inference.ipynb进行推理或使用经过作者适配的@小狼的infer.py\ +在第一个block中修改如下参数: +``` +config_path='checkpoints压缩包中config.yaml的位置' +如'./checkpoints/nyaru/config.yaml' +config和checkpoints是一一对应的,请不要使用其他config + +project_name='这个项目的名称' +如'nyaru' + +model_path='ckpt文件的全路径' +如'./checkpoints/nyaru/model_ckpt_steps_112000.ckpt' + +hubert_gpu=True +推理时是否使用gpu推理hubert(模型中的一个模块),不影响模型的其他部分 +目前版本已大幅减小hubert的gpu占用,在1060 6G显存下可完整推理,不需要关闭了。 +另外现已支持长音频自动切片功能(ipynb和infer.py均可),超过30s的音频将自动在静音处切片处理,感谢@小狼的代码 + +``` +### 可调节参数: +``` +wav_fn='xxx.wav'#传入音频的路径,默认在项目根目录中 + +use_crepe=True +#crepe是一个F0算法,效果好但速度慢,改成False会使用效果稍逊于crepe但较快的parselmouth算法 + +thre=0.05 +#crepe的噪声过滤阈值,源音频干净可适当调大,噪音多就保持这个数值或者调小,前面改成False后这个参数不起作用 + +pndm_speedup=20 +#推理加速算法倍数,默认是1000步,这里填成10就是只使用100步合成,是一个中规中矩的数值,这个数值可以高到50倍(20步合成)没有明显质量损失,再大可能会有可观的质量损失,注意如果下方开启了use_gt_mel, 应保证这个数值小于add_noise_step,并尽量让其能够整除 + +key=0 +#变调参数,默认为0(不是1!!),将源音频的音高升高key个半音后合成,如男声转女生,可填入8或者12等(12就是升高一整个8度) + +use_pe=True +#梅尔谱合成音频时使用的F0提取算法,如果改成False将使用源音频的F0\ +这里填True和False合成会略有差异,通常是True会好些,但也不尽然,对合成速度几乎无影响\ +(无论key填什么 这里都是可以自由选择的,不影响)\ +44.1kHz下不支持此功能,会自动关闭,开着也不报错就是了 + +use_gt_mel=False +#这个选项类似于AI画图的图生图功能,如果打开,产生的音频将是输入声音与目标说话人声音的混合,混合比例由下一个参数确定 +注意!!!:这个参数如果改成True,请确保key填成0,不支持变调 + +add_noise_step=500 +#与上个参数有关,控制两种声音的比例,填入1是完全的源声线,填入1000是完全的目标声线,能听出来是两者均等混合的数值大约在300附近(并不是线性的,另外这个参数如果调的很小,可以把pndm加速倍率调低,增加合成质量) + +wav_gen='yyy.wav'#输出音频的路径,默认在项目根目录中,可通过改变扩展名更改保存文件类型 +``` +如果使用infer.py,修改方式类似,需要修改__name__=='__main__'中的部分,然后在根目录中执行\ +python infer.py\ +这种方式需要将原音频放入raw中并在results中查找结果 +## 2.数据预处理与训练 +### 2.1 准备数据 +>目前支持wav格式和ogg格式的音频数据,采样率最好高于24kHz,程序会自动处理采样率和声道问题。采样率不可低于16kHz(一般不会的)\ +音频需要切片为5-15s为宜的短音频,长度没有具体要求,但不宜过长过短。音频需要为纯目标人干声,不可以有背景音乐和其他人声音,最好也不要有过重的混响等。若经过去伴奏等处理,请尽量保证处理后的音频质量。\ +目前仅支持单人训练,总时长尽量保证在3h或以上,不需要额外任何标注,将音频文件放在下述raw_data_dir下即可,这个目录下的结构可以自由定义,程序会自主找到所需文件。 + +### 2.2 修改超参数配置 +>首先请备份一份config.yaml(此文件对应24kHz声码器, 44.1kHz声码器请使用config_nsf.yaml),然后修改它\ +可能会用到的参数如下(以工程名为nyaru为例): +``` +K_step: 1000 +#diffusion过程总的step,建议不要修改 + +binary_data_dir: data/binary/nyaru +预处理后数据的存放地址:需要将后缀改成工程名字 + +config_path: training/config.yaml +你要使用的这份yaml自身的地址,由于预处理过程中会写入数据,所以这个地址务必修改成将要存放这份yaml文件的完整路径 + +choose_test_manually: false +手动选择测试集,默认关闭,自动随机抽取5条音频作为测试集。 +如果改为ture,请在test_prefixes:中填入测试数据的文件名前缀,程序会将以对应前缀开头的文件作为测试集 +这是个列表,可以填多个前缀,如: +test_prefixes: +- test +- aaaa +- 5012 +- speaker1024 +重要:测试集*不可以*为空,为了不产生意外影响,建议尽量不要手动选择测试集 + +endless_ds:False +如果你的数据集过小,每个epoch时间很短,请将此项打开,将把正常的1000epoch作为一个epoch计算 + +hubert_path: checkpoints/hubert/hubert.pt +hubert模型的存放地址,确保这个路径是对的,一般解压checkpoints包之后就是这个路径不需要改,现已使用torch版本推理 +hubert_gpu:True +是否在预处理时使用gpu运行hubert(模型的一个模块),关闭后使用cpu,但耗时会显著增加。另外模型训练完推理时hubert是否用gpu是在inference中单独控制的,不受此处影响。目前hubert改为torch版后已经可以做到在1060 6G显存gpu上进行预处理,与直接推理1分钟内的音频不超出显存限制,一般不需要关了。 + +lr: 0.0008 +#初始的学习率:这个数字对应于88的batchsize,如果batchsize更小,可以调低这个数值一些 + +decay_steps: 20000 +每20000步学习率衰减为原来的一半,如果batchsize比较小,请调大这个数值 + +#对于30-40左右的batchsize,推荐lr=0.0004,decay_steps=40000 + +max_frames: 42000 +max_input_tokens: 6000 +max_sentences: 88 +max_tokens: 128000 +#batchsize是由这几个参数动态算出来的,如果不太清楚具体含义,可以只改动max_sentences这个参数,填入batchsize的最大限制值,以免炸显存 + +pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt +#pe模型路径,确保这个文件存在,具体作用参考inference部分 + +raw_data_dir: data/raw/nyaru +#存放预处理前原始数据的位置,请将原始wav数据放在这个目录下,内部文件结构无所谓,会自动解构 + +residual_channels: 384 +residual_layers: 20 +#控制核心网络规模的一组参数,越大参数越多炼的越慢,但效果不一定会变好,大一点的数据集可以把第一个改成512。这个可以自行实验效果,不过不了解的话尽量不动。 + +speaker_id: nyaru +#训练的说话人名字,目前只支持单说话人,请在这里填写(只是观赏作用,没有实际意义的参数) + +use_crepe: true +#在数据预处理中使用crepe提取F0,追求效果请打开,追求速度可以关闭 + +val_check_interval: 2000 +#每2000steps推理测试集并保存ckpt + +vocoder_ckpt:checkpoints/0109_hifigan_bigpopcs_hop128 +#24kHz下为对应声码器的目录, 44.1kHz下为对应声码器的文件名, 注意不要填错 + +work_dir: checkpoints/nyaru +#修改后缀为工程名(也可以删掉或完全留空自动生成,但别乱填) +no_fs2: true +#对网络encoder的精简,能缩减模型体积,加快训练,且并未发现有对网络表现损害的直接证据。默认打开 + +``` +>其他的参数如果你不知道它是做什么的,请不要修改,即使你看着名称可能以为你知道它是做什么的。 + +### 2.3 数据预处理 +在diff-svc的目录下执行以下命令:\ +#windows +``` +set PYTHONPATH=. +set CUDA_VISIBLE_DEVICES=0 +python preprocessing/binarize.py --config training/config.yaml +``` +#linux +``` +export PYTHONPATH=. +CUDA_VISIBLE_DEVICES=0 python preprocessing/binarize.py --config training/config.yaml +``` +对于预处理,@小狼准备了一份可以分段处理hubert和其他特征的代码,如果正常处理显存不足,可以先python ./network/hubert/hubert_model.py +然后再运行正常的指令,能够识别提前处理好的hubert特征 +### 2.4 训练 +#windows +``` +set CUDA_VISIBLE_DEVICES=0 +python run.py --config training/config.yaml --exp_name nyaru --reset +``` +#linux +``` +CUDA_VISIBLE_DEVICES=0 python run.py --config training/config.yaml --exp_name nyaru --reset +``` +>需要将exp_name改为你的工程名,并修改config路径,请确保和预处理使用的是同一个config文件\ +*重要* :训练完成后,若之前不是在本地数据预处理,除了需要下载对应的ckpt文件,也需要将config文件下载下来,作为推理时使用的config,不可以使用本地之前上传上去那份。因为预处理时会向config文件中写入内容。推理时要保持使用的config和预处理使用的config是同一份。 + + +### 2.5 可能出现的问题: +>2.5.1 'Upsample' object has no attribute 'recompute_scale_factor'\ +此问题发现于cuda11.3对应的torch中,若出现此问题,请通过合适的方法(如ide自动跳转等)找到你的python依赖包中的torch.nn.modules.upsampling.py文件(如conda环境中为conda目录\envs\环境目录\Lib\site-packages\torch\nn\modules\upsampling.py),修改其153-154行 +``` +return F.interpolate(input, self.size, self.scale_factor, self.mode, self.align_corners,recompute_scale_factor=self.recompute_scale_factor) +``` +>改为 +``` +return F.interpolate(input, self.size, self.scale_factor, self.mode, self.align_corners) +# recompute_scale_factor=self.recompute_scale_factor) +``` +>2.5.2 no module named 'utils'\ +请在你的运行环境(如colab笔记本)中以如下方式设置: +``` +import os +os.environ['PYTHONPATH']='.' +!CUDA_VISIBLE_DEVICES=0 python preprocessing/binarize.py --config training/config.yaml +``` +注意一定要在项目文件夹的根目录中执行 +>2.5.3 cannot load library 'libsndfile.so'\ +可能会在linux环境中遇到的错误,请执行以下指令 +``` +apt-get install libsndfile1 -y +``` +>2.5.4 cannot load import 'consume_prefix_in_state_dict_if_present'\ +torch版本过低,请更换高版本torch + +>2.5.5 预处理数据过慢\ +检查是否在配置中开启了use_crepe,将其关闭可显著提升速度。\ +检查配置中hubert_gpu是否开启。 + +如有其他问题,请加入QQ频道或discord频道询问。 diff --git a/flask_api.py b/flask_api.py new file mode 100644 index 0000000000000000000000000000000000000000..eaecd0b7305218ad61d0a5a23d44f3312b538f0b --- /dev/null +++ b/flask_api.py @@ -0,0 +1,54 @@ +import io +import logging + +import librosa +import soundfile +from flask import Flask, request, send_file +from flask_cors import CORS + +from infer_tools.infer_tool import Svc +from utils.hparams import hparams + +app = Flask(__name__) + +CORS(app) + +logging.getLogger('numba').setLevel(logging.WARNING) + + +@app.route("/voiceChangeModel", methods=["POST"]) +def voice_change_model(): + request_form = request.form + wave_file = request.files.get("sample", None) + # 变调信息 + f_pitch_change = float(request_form.get("fPitchChange", 0)) + # DAW所需的采样率 + daw_sample = int(float(request_form.get("sampleRate", 0))) + speaker_id = int(float(request_form.get("sSpeakId", 0))) + # http获得wav文件并转换 + input_wav_path = io.BytesIO(wave_file.read()) + # 模型推理 + _f0_tst, _f0_pred, _audio = model.infer(input_wav_path, key=f_pitch_change, acc=accelerate, use_pe=False, + use_crepe=False) + tar_audio = librosa.resample(_audio, hparams["audio_sample_rate"], daw_sample) + # 返回音频 + out_wav_path = io.BytesIO() + soundfile.write(out_wav_path, tar_audio, daw_sample, format="wav") + out_wav_path.seek(0) + return send_file(out_wav_path, download_name="temp.wav", as_attachment=True) + + +if __name__ == '__main__': + # 工程文件夹名,训练时用的那个 + project_name = "firefox" + model_path = f'./checkpoints/{project_name}/model_ckpt_steps_188000.ckpt' + config_path = f'./checkpoints/{project_name}/config.yaml' + + # 加速倍数 + accelerate = 50 + hubert_gpu = True + + model = Svc(project_name, config_path, hubert_gpu, model_path) + + # 此处与vst插件对应,不建议更改 + app.run(port=6842, host="0.0.0.0", debug=False, threaded=False) diff --git a/infer.py b/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..a671ed05af4248a13fcf9225ce21133fc766ae01 --- /dev/null +++ b/infer.py @@ -0,0 +1,98 @@ +import io +import time +from pathlib import Path + +import librosa +import numpy as np +import soundfile + +from infer_tools import infer_tool +from infer_tools import slicer +from infer_tools.infer_tool import Svc +from utils.hparams import hparams + +chunks_dict = infer_tool.read_temp("./infer_tools/new_chunks_temp.json") + + +def run_clip(svc_model, key, acc, use_pe, use_crepe, thre, use_gt_mel, add_noise_step, project_name='', f_name=None, + file_path=None, out_path=None, slice_db=-40,**kwargs): + print(f'code version:2022-12-04') + use_pe = use_pe if hparams['audio_sample_rate'] == 24000 else False + if file_path is None: + raw_audio_path = f"./raw/{f_name}" + clean_name = f_name[:-4] + else: + raw_audio_path = file_path + clean_name = str(Path(file_path).name)[:-4] + infer_tool.format_wav(raw_audio_path) + wav_path = Path(raw_audio_path).with_suffix('.wav') + global chunks_dict + audio, sr = librosa.load(wav_path, mono=True,sr=None) + wav_hash = infer_tool.get_md5(audio) + if wav_hash in chunks_dict.keys(): + print("load chunks from temp") + chunks = chunks_dict[wav_hash]["chunks"] + else: + chunks = slicer.cut(wav_path, db_thresh=slice_db) + chunks_dict[wav_hash] = {"chunks": chunks, "time": int(time.time())} + infer_tool.write_temp("./infer_tools/new_chunks_temp.json", chunks_dict) + audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks) + + count = 0 + f0_tst = [] + f0_pred = [] + audio = [] + for (slice_tag, data) in audio_data: + print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======') + length = int(np.ceil(len(data) / audio_sr * hparams['audio_sample_rate'])) + raw_path = io.BytesIO() + soundfile.write(raw_path, data, audio_sr, format="wav") + if hparams['debug']: + print(np.mean(data), np.var(data)) + raw_path.seek(0) + if slice_tag: + print('jump empty segment') + _f0_tst, _f0_pred, _audio = ( + np.zeros(int(np.ceil(length / hparams['hop_size']))), np.zeros(int(np.ceil(length / hparams['hop_size']))), + np.zeros(length)) + else: + _f0_tst, _f0_pred, _audio = svc_model.infer(raw_path, key=key, acc=acc, use_pe=use_pe, use_crepe=use_crepe, + thre=thre, use_gt_mel=use_gt_mel, add_noise_step=add_noise_step) + fix_audio = np.zeros(length) + fix_audio[:] = np.mean(_audio) + fix_audio[:len(_audio)] = _audio[0 if len(_audio) 50 * 1024 * 1024: + f_name = file_name.split("/")[-1] + print(f"clean {f_name}") + for wav_hash in list(data_dict.keys()): + if int(time.time()) - int(data_dict[wav_hash]["time"]) > 14 * 24 * 3600: + del data_dict[wav_hash] + except Exception as e: + print(e) + print(f"{file_name} error,auto rebuild file") + data_dict = {"info": "temp_dict"} + return data_dict + + +f0_dict = read_temp("./infer_tools/f0_temp.json") + + +def write_temp(file_name, data): + with open(file_name, "w") as f: + f.write(json.dumps(data)) + + +def timeit(func): + def run(*args, **kwargs): + t = time.time() + res = func(*args, **kwargs) + print('executing \'%s\' costed %.3fs' % (func.__name__, time.time() - t)) + return res + + return run + + +def format_wav(audio_path): + if Path(audio_path).suffix=='.wav': + return + raw_audio, raw_sample_rate = librosa.load(audio_path, mono=True,sr=None) + soundfile.write(Path(audio_path).with_suffix(".wav"), raw_audio, raw_sample_rate) + + +def fill_a_to_b(a, b): + if len(a) < len(b): + for _ in range(0, len(b) - len(a)): + a.append(a[0]) + + +def get_end_file(dir_path, end): + file_lists = [] + for root, dirs, files in os.walk(dir_path): + files = [f for f in files if f[0] != '.'] + dirs[:] = [d for d in dirs if d[0] != '.'] + for f_file in files: + if f_file.endswith(end): + file_lists.append(os.path.join(root, f_file).replace("\\", "/")) + return file_lists + + +def mkdir(paths: list): + for path in paths: + if not os.path.exists(path): + os.mkdir(path) + + +def get_md5(content): + return hashlib.new("md5", content).hexdigest() + + +class Svc: + def __init__(self, project_name, config_name, hubert_gpu, model_path): + self.project_name = project_name + self.DIFF_DECODERS = { + 'wavenet': lambda hp: DiffNet(hp['audio_num_mel_bins']), + 'fft': lambda hp: FFT( + hp['hidden_size'], hp['dec_layers'], hp['dec_ffn_kernel_size'], hp['num_heads']), + } + + self.model_path = model_path + self.dev = torch.device("cuda") + + self._ = set_hparams(config=config_name, exp_name=self.project_name, infer=True, + reset=True, + hparams_str='', + print_hparams=False) + + self.mel_bins = hparams['audio_num_mel_bins'] + self.model = GaussianDiffusion( + phone_encoder=Hubertencoder(hparams['hubert_path']), + out_dims=self.mel_bins, denoise_fn=self.DIFF_DECODERS[hparams['diff_decoder_type']](hparams), + timesteps=hparams['timesteps'], + K_step=hparams['K_step'], + loss_type=hparams['diff_loss_type'], + spec_min=hparams['spec_min'], spec_max=hparams['spec_max'], + ) + self.load_ckpt() + self.model.cuda() + hparams['hubert_gpu'] = hubert_gpu + self.hubert = Hubertencoder(hparams['hubert_path']) + self.pe = PitchExtractor().cuda() + utils.load_ckpt(self.pe, hparams['pe_ckpt'], 'model', strict=True) + self.pe.eval() + self.vocoder = get_vocoder_cls(hparams)() + + def load_ckpt(self, model_name='model', force=True, strict=True): + utils.load_ckpt(self.model, self.model_path, model_name, force, strict) + + def infer(self, in_path, key, acc, use_pe=True, use_crepe=True, thre=0.05, singer=False, **kwargs): + batch = self.pre(in_path, acc, use_crepe, thre) + spk_embed = batch.get('spk_embed') if not hparams['use_spk_id'] else batch.get('spk_ids') + hubert = batch['hubert'] + ref_mels = batch["mels"] + energy=batch['energy'] + mel2ph = batch['mel2ph'] + batch['f0'] = batch['f0'] + (key / 12) + batch['f0'][batch['f0']>np.log2(hparams['f0_max'])]=0 + f0 = batch['f0'] + uv = batch['uv'] + @timeit + def diff_infer(): + outputs = self.model( + hubert.cuda(), spk_embed=spk_embed, mel2ph=mel2ph.cuda(), f0=f0.cuda(), uv=uv.cuda(),energy=energy.cuda(), + ref_mels=ref_mels.cuda(), + infer=True, **kwargs) + return outputs + outputs=diff_infer() + batch['outputs'] = self.model.out2mel(outputs['mel_out']) + batch['mel2ph_pred'] = outputs['mel2ph'] + batch['f0_gt'] = denorm_f0(batch['f0'], batch['uv'], hparams) + if use_pe: + batch['f0_pred'] = self.pe(outputs['mel_out'])['f0_denorm_pred'].detach() + else: + batch['f0_pred'] = outputs.get('f0_denorm') + return self.after_infer(batch, singer, in_path) + + @timeit + def after_infer(self, prediction, singer, in_path): + for k, v in prediction.items(): + if type(v) is torch.Tensor: + prediction[k] = v.cpu().numpy() + + # remove paddings + mel_gt = prediction["mels"] + mel_gt_mask = np.abs(mel_gt).sum(-1) > 0 + + mel_pred = prediction["outputs"] + mel_pred_mask = np.abs(mel_pred).sum(-1) > 0 + mel_pred = mel_pred[mel_pred_mask] + mel_pred = np.clip(mel_pred, hparams['mel_vmin'], hparams['mel_vmax']) + + f0_gt = prediction.get("f0_gt") + f0_pred = prediction.get("f0_pred") + if f0_pred is not None: + f0_gt = f0_gt[mel_gt_mask] + if len(f0_pred) > len(mel_pred_mask): + f0_pred = f0_pred[:len(mel_pred_mask)] + f0_pred = f0_pred[mel_pred_mask] + torch.cuda.is_available() and torch.cuda.empty_cache() + + if singer: + data_path = in_path.replace("batch", "singer_data") + mel_path = data_path[:-4] + "_mel.npy" + f0_path = data_path[:-4] + "_f0.npy" + np.save(mel_path, mel_pred) + np.save(f0_path, f0_pred) + wav_pred = self.vocoder.spec2wav(mel_pred, f0=f0_pred) + return f0_gt, f0_pred, wav_pred + + def temporary_dict2processed_input(self, item_name, temp_dict, use_crepe=True, thre=0.05): + ''' + process data in temporary_dicts + ''' + + binarization_args = hparams['binarization_args'] + + @timeit + def get_pitch(wav, mel): + # get ground truth f0 by self.get_pitch_algorithm + global f0_dict + if use_crepe: + md5 = get_md5(wav) + if f"{md5}_gt" in f0_dict.keys(): + print("load temp crepe f0") + gt_f0 = np.array(f0_dict[f"{md5}_gt"]["f0"]) + coarse_f0 = np.array(f0_dict[f"{md5}_coarse"]["f0"]) + else: + torch.cuda.is_available() and torch.cuda.empty_cache() + gt_f0, coarse_f0 = get_pitch_crepe(wav, mel, hparams, thre) + f0_dict[f"{md5}_gt"] = {"f0": gt_f0.tolist(), "time": int(time.time())} + f0_dict[f"{md5}_coarse"] = {"f0": coarse_f0.tolist(), "time": int(time.time())} + write_temp("./infer_tools/f0_temp.json", f0_dict) + else: + md5 = get_md5(wav) + if f"{md5}_gt_harvest" in f0_dict.keys(): + print("load temp harvest f0") + gt_f0 = np.array(f0_dict[f"{md5}_gt_harvest"]["f0"]) + coarse_f0 = np.array(f0_dict[f"{md5}_coarse_harvest"]["f0"]) + else: + gt_f0, coarse_f0 = get_pitch_world(wav, mel, hparams) + f0_dict[f"{md5}_gt_harvest"] = {"f0": gt_f0.tolist(), "time": int(time.time())} + f0_dict[f"{md5}_coarse_harvest"] = {"f0": coarse_f0.tolist(), "time": int(time.time())} + write_temp("./infer_tools/f0_temp.json", f0_dict) + processed_input['f0'] = gt_f0 + processed_input['pitch'] = coarse_f0 + + def get_align(mel, phone_encoded): + mel2ph = np.zeros([mel.shape[0]], int) + start_frame = 0 + ph_durs = mel.shape[0] / phone_encoded.shape[0] + if hparams['debug']: + print(mel.shape, phone_encoded.shape, mel.shape[0] / phone_encoded.shape[0]) + for i_ph in range(phone_encoded.shape[0]): + end_frame = int(i_ph * ph_durs + ph_durs + 0.5) + mel2ph[start_frame:end_frame + 1] = i_ph + 1 + start_frame = end_frame + 1 + + processed_input['mel2ph'] = mel2ph + + if hparams['vocoder'] in VOCODERS: + wav, mel = VOCODERS[hparams['vocoder']].wav2spec(temp_dict['wav_fn']) + else: + wav, mel = VOCODERS[hparams['vocoder'].split('.')[-1]].wav2spec(temp_dict['wav_fn']) + processed_input = { + 'item_name': item_name, 'mel': mel, + 'sec': len(wav) / hparams['audio_sample_rate'], 'len': mel.shape[0] + } + processed_input = {**temp_dict, **processed_input} # merge two dicts + + if binarization_args['with_f0']: + get_pitch(wav, mel) + if binarization_args['with_hubert']: + st = time.time() + hubert_encoded = processed_input['hubert'] = self.hubert.encode(temp_dict['wav_fn']) + et = time.time() + dev = 'cuda' if hparams['hubert_gpu'] and torch.cuda.is_available() else 'cpu' + print(f'hubert (on {dev}) time used {et - st}') + + if binarization_args['with_align']: + get_align(mel, hubert_encoded) + return processed_input + + def pre(self, wav_fn, accelerate, use_crepe=True, thre=0.05): + if isinstance(wav_fn, BytesIO): + item_name = self.project_name + else: + song_info = wav_fn.split('/') + item_name = song_info[-1].split('.')[-2] + temp_dict = {'wav_fn': wav_fn, 'spk_id': self.project_name} + + temp_dict = self.temporary_dict2processed_input(item_name, temp_dict, use_crepe, thre) + hparams['pndm_speedup'] = accelerate + batch = processed_input2batch([getitem(temp_dict)]) + return batch + + +def getitem(item): + max_frames = hparams['max_frames'] + spec = torch.Tensor(item['mel'])[:max_frames] + energy = (spec.exp() ** 2).sum(-1).sqrt() + mel2ph = torch.LongTensor(item['mel2ph'])[:max_frames] if 'mel2ph' in item else None + f0, uv = norm_interp_f0(item["f0"][:max_frames], hparams) + hubert = torch.Tensor(item['hubert'][:hparams['max_input_tokens']]) + pitch = torch.LongTensor(item.get("pitch"))[:max_frames] + sample = { + "item_name": item['item_name'], + "hubert": hubert, + "mel": spec, + "pitch": pitch, + "energy": energy, + "f0": f0, + "uv": uv, + "mel2ph": mel2ph, + "mel_nonpadding": spec.abs().sum(-1) > 0, + } + return sample + + +def processed_input2batch(samples): + ''' + Args: + samples: one batch of processed_input + NOTE: + the batch size is controlled by hparams['max_sentences'] + ''' + if len(samples) == 0: + return {} + item_names = [s['item_name'] for s in samples] + hubert = utils.collate_2d([s['hubert'] for s in samples], 0.0) + f0 = utils.collate_1d([s['f0'] for s in samples], 0.0) + pitch = utils.collate_1d([s['pitch'] for s in samples]) + uv = utils.collate_1d([s['uv'] for s in samples]) + energy = utils.collate_1d([s['energy'] for s in samples], 0.0) + mel2ph = utils.collate_1d([s['mel2ph'] for s in samples], 0.0) \ + if samples[0]['mel2ph'] is not None else None + mels = utils.collate_2d([s['mel'] for s in samples], 0.0) + mel_lengths = torch.LongTensor([s['mel'].shape[0] for s in samples]) + + batch = { + 'item_name': item_names, + 'nsamples': len(samples), + 'hubert': hubert, + 'mels': mels, + 'mel_lengths': mel_lengths, + 'mel2ph': mel2ph, + 'energy': energy, + 'pitch': pitch, + 'f0': f0, + 'uv': uv, + } + return batch diff --git a/infer_tools/slicer.py b/infer_tools/slicer.py new file mode 100644 index 0000000000000000000000000000000000000000..35a888b906e7df8634cfdcec914f650c6cefd26a --- /dev/null +++ b/infer_tools/slicer.py @@ -0,0 +1,158 @@ +import time + +import numpy as np +import torch +import torchaudio +from scipy.ndimage import maximum_filter1d, uniform_filter1d + + +def timeit(func): + def run(*args, **kwargs): + t = time.time() + res = func(*args, **kwargs) + print('executing \'%s\' costed %.3fs' % (func.__name__, time.time() - t)) + return res + + return run + + +# @timeit +def _window_maximum(arr, win_sz): + return maximum_filter1d(arr, size=win_sz)[win_sz // 2: win_sz // 2 + arr.shape[0] - win_sz + 1] + + +# @timeit +def _window_rms(arr, win_sz): + filtered = np.sqrt(uniform_filter1d(np.power(arr, 2), win_sz) - np.power(uniform_filter1d(arr, win_sz), 2)) + return filtered[win_sz // 2: win_sz // 2 + arr.shape[0] - win_sz + 1] + + +def level2db(levels, eps=1e-12): + return 20 * np.log10(np.clip(levels, a_min=eps, a_max=1)) + + +def _apply_slice(audio, begin, end): + if len(audio.shape) > 1: + return audio[:, begin: end] + else: + return audio[begin: end] + + +class Slicer: + def __init__(self, + sr: int, + db_threshold: float = -40, + min_length: int = 5000, + win_l: int = 300, + win_s: int = 20, + max_silence_kept: int = 500): + self.db_threshold = db_threshold + self.min_samples = round(sr * min_length / 1000) + self.win_ln = round(sr * win_l / 1000) + self.win_sn = round(sr * win_s / 1000) + self.max_silence = round(sr * max_silence_kept / 1000) + if not self.min_samples >= self.win_ln >= self.win_sn: + raise ValueError('The following condition must be satisfied: min_length >= win_l >= win_s') + if not self.max_silence >= self.win_sn: + raise ValueError('The following condition must be satisfied: max_silence_kept >= win_s') + + @timeit + def slice(self, audio): + samples = audio + if samples.shape[0] <= self.min_samples: + return {"0": {"slice": False, "split_time": f"0,{len(audio)}"}} + # get absolute amplitudes + abs_amp = np.abs(samples - np.mean(samples)) + # calculate local maximum with large window + win_max_db = level2db(_window_maximum(abs_amp, win_sz=self.win_ln)) + sil_tags = [] + left = right = 0 + while right < win_max_db.shape[0]: + if win_max_db[right] < self.db_threshold: + right += 1 + elif left == right: + left += 1 + right += 1 + else: + if left == 0: + split_loc_l = left + else: + sil_left_n = min(self.max_silence, (right + self.win_ln - left) // 2) + rms_db_left = level2db(_window_rms(samples[left: left + sil_left_n], win_sz=self.win_sn)) + split_win_l = left + np.argmin(rms_db_left) + split_loc_l = split_win_l + np.argmin(abs_amp[split_win_l: split_win_l + self.win_sn]) + if len(sil_tags) != 0 and split_loc_l - sil_tags[-1][1] < self.min_samples and right < win_max_db.shape[ + 0] - 1: + right += 1 + left = right + continue + if right == win_max_db.shape[0] - 1: + split_loc_r = right + self.win_ln + else: + sil_right_n = min(self.max_silence, (right + self.win_ln - left) // 2) + rms_db_right = level2db(_window_rms(samples[right + self.win_ln - sil_right_n: right + self.win_ln], + win_sz=self.win_sn)) + split_win_r = right + self.win_ln - sil_right_n + np.argmin(rms_db_right) + split_loc_r = split_win_r + np.argmin(abs_amp[split_win_r: split_win_r + self.win_sn]) + sil_tags.append((split_loc_l, split_loc_r)) + right += 1 + left = right + if left != right: + sil_left_n = min(self.max_silence, (right + self.win_ln - left) // 2) + rms_db_left = level2db(_window_rms(samples[left: left + sil_left_n], win_sz=self.win_sn)) + split_win_l = left + np.argmin(rms_db_left) + split_loc_l = split_win_l + np.argmin(abs_amp[split_win_l: split_win_l + self.win_sn]) + sil_tags.append((split_loc_l, samples.shape[0])) + if len(sil_tags) == 0: + return {"0": {"slice": False, "split_time": f"0,{len(audio)}"}} + else: + chunks = [] + # 第一段静音并非从头开始,补上有声片段 + if sil_tags[0][0]: + chunks.append({"slice": False, "split_time": f"0,{sil_tags[0][0]}"}) + for i in range(0, len(sil_tags)): + # 标识有声片段(跳过第一段) + if i: + chunks.append({"slice": False, "split_time": f"{sil_tags[i - 1][1]},{sil_tags[i][0]}"}) + # 标识所有静音片段 + chunks.append({"slice": True, "split_time": f"{sil_tags[i][0]},{sil_tags[i][1]}"}) + # 最后一段静音并非结尾,补上结尾片段 + if sil_tags[-1][1] != len(audio): + chunks.append({"slice": False, "split_time": f"{sil_tags[-1][1]},{len(audio)}"}) + chunk_dict = {} + for i in range(len(chunks)): + chunk_dict[str(i)] = chunks[i] + return chunk_dict + + +def cut(audio_path, db_thresh=-30, min_len=5000, win_l=300, win_s=20, max_sil_kept=500): + audio, sr = torchaudio.load(audio_path) + if len(audio.shape) == 2 and audio.shape[1] >= 2: + audio = torch.mean(audio, dim=0).unsqueeze(0) + audio = audio.cpu().numpy()[0] + + slicer = Slicer( + sr=sr, + db_threshold=db_thresh, + min_length=min_len, + win_l=win_l, + win_s=win_s, + max_silence_kept=max_sil_kept + ) + chunks = slicer.slice(audio) + return chunks + + +def chunks2audio(audio_path, chunks): + chunks = dict(chunks) + audio, sr = torchaudio.load(audio_path) + if len(audio.shape) == 2 and audio.shape[1] >= 2: + audio = torch.mean(audio, dim=0).unsqueeze(0) + audio = audio.cpu().numpy()[0] + result = [] + for k, v in chunks.items(): + tag = v["split_time"].split(",") + result.append((v["slice"], audio[int(tag[0]):int(tag[1])])) + return result, sr + + diff --git a/inference.ipynb b/inference.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..129c5ee984fddc9b505b3a9bd3b065ef2ba988c5 --- /dev/null +++ b/inference.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "e:\\Software\\Anaconda3\\envs\\diffsvc\\lib\\site-packages\\tqdm\\auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| load 'model' from './checkpoints/nyaru/model_ckpt_steps_112000.ckpt'.\n", + "| load 'model' from 'checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt'.\n", + "| load HifiGAN: checkpoints/0109_hifigan_bigpopcs_hop128\\model_ckpt_steps_1512000.pth\n", + "| Loaded model parameters from checkpoints/0109_hifigan_bigpopcs_hop128\\model_ckpt_steps_1512000.pth.\n", + "| HifiGAN device: cuda.\n", + "model loaded\n" + ] + } + ], + "source": [ + "from utils.hparams import hparams\n", + "from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import IPython.display as ipd\n", + "import utils\n", + "import librosa\n", + "import torchcrepe\n", + "from infer import *\n", + "import logging\n", + "from infer_tools.infer_tool import *\n", + "\n", + "logging.getLogger('numba').setLevel(logging.WARNING)\n", + "\n", + "# 工程文件夹名,训练时用的那个\n", + "project_name = \"nyaru\"\n", + "model_path = f'./checkpoints/{project_name}/model_ckpt_steps_112000.ckpt'\n", + "config_path=f'./checkpoints/{project_name}/config.yaml'\n", + "hubert_gpu=True\n", + "svc_model = Svc(project_name,config_path,hubert_gpu, model_path)\n", + "print('model loaded')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "load chunks from temp\n", + "#=====segment start, 0.46s======\n", + "jump empty segment\n", + "#=====segment start, 6.702s======\n", + "load temp crepe f0\n", + "executing 'get_pitch' costed 0.066s\n", + "hubert (on cpu) time used 0.6847963333129883\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "sample time step: 100%|██████████| 50/50 [00:02<00:00, 21.95it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "executing 'diff_infer' costed 2.310s\n", + "executing 'after_infer' costed 1.167s\n", + "#=====segment start, 8.831s======\n", + "load temp crepe f0\n", + "executing 'get_pitch' costed 0.063s\n", + "hubert (on cpu) time used 0.8832910060882568\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "sample time step: 100%|██████████| 50/50 [00:02<00:00, 18.36it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "executing 'diff_infer' costed 2.749s\n", + "executing 'after_infer' costed 1.894s\n", + "#=====segment start, 5.265s======\n", + "load temp crepe f0\n", + "executing 'get_pitch' costed 0.065s\n", + "hubert (on cpu) time used 0.5448079109191895\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "sample time step: 100%|██████████| 50/50 [00:01<00:00, 28.39it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "executing 'diff_infer' costed 1.780s\n", + "executing 'after_infer' costed 1.038s\n", + "#=====segment start, 1.377s======\n", + "jump empty segment\n" + ] + } + ], + "source": [ + "wav_fn='raw/test_input.wav'#支持多数音频格式,无需手动转为wav\n", + "demoaudio, sr = librosa.load(wav_fn)\n", + "key = 0 # 音高调整,支持正负(半音)\n", + "# 加速倍数\n", + "pndm_speedup = 20\n", + "wav_gen='test_output.wav'#直接改后缀可以保存不同格式音频,如flac可无损压缩\n", + "f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=pndm_speedup, use_crepe=True, use_pe=True, thre=0.05,\n", + " use_gt_mel=False, add_noise_step=500,project_name=project_name,out_path=wav_gen)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ipd.display(ipd.Audio(demoaudio, rate=sr))\n", + "ipd.display(ipd.Audio(audio, rate=hparams['audio_sample_rate'], normalize=False))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABMIAAAGsCAYAAAA/sQstAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAADfKklEQVR4nOzdd3hb1f3H8beG944znL1DBiuQEMIeYVM2LTtsShmllEKhlD37K6sUKJSWTSm7YYY9ExJCEpJAduI4ceIM7y3JOr8/jmTZWV4atvV5PY8fS+ce3XtkyXd87/ec4zDGGERERERERERERLo5Z6wbICIiIiIiIiIiEg0KhImIiIiIiIiISFxQIExEREREREREROKCAmEiIiIiIiIiIhIXFAgTEREREREREZG4oECYiIiIiIiIiIjEBQXCREREREREREQkLrhj3YD28Pv9rF+/noyMDBwOR6ybIyIiIiIiIiIiMWSMobKykn79+uF07jjvq0sGwtavX8/AgQNj3QwREREREREREelE1q5dy4ABA3a4vEsGwjIyMgD75jIzM2PcGhERERERERERiaWKigoGDhzYGDPakS4ZCAt2h8zMzFQgTEREREREREREAFocQkuD5YuIiIiIiIiISFxQIExEREREREREROKCAmEiIiIiIiIiIhIXFAgTEREREREREZG4oECYiIiIiIiIiIjEBQXCREREREREREQkLigQJiIiIiIiIiIicUGBMBERERERERERiQsKhImIiIiIiIiISFxQIExEREREREREROKCAmEiIiIiIiIiIhIXFAgTEREREREREZG4oECYiIiIiIiIiIjEBXesG9Ah1dWQkQEOh33u8YDXC243JCU1rweQkgLOQOzP67X1XS5ITm5f3ZoaMMaWuVy2zOeD+nr72pSU9tWtrQW/374Hd+AjamiAurq21XU4IDU1VLeuzi5LTISEhLbX9fvt9gDS0kJ16+vte0lIsPXbWtcY+/cB24atP8+21G3NZx+O78n2Ps9wfE+Cn2dHvydbf54d/Z7s6PPs6Pek6efZ0e/Jjj7P9n5PtI9oe13tI0J1tY9oe13tI2yZ9hFtr6t9RKiu9hGtq6t9hPYRoH2E9hHaR3THfURrmS6ovLzcAKYcjNm0KbTgrruMAWMuvrj5C1JTbfnq1aGyhx6yZWed1bxuz562fNGiUNlTT9myE09sXnfwYFs+e3ao7MUXbdmUKc3rjh1ryz//PFT21lu2bL/9mtedMMGWv/tuqOyjj2zZHns0r3vwwbb81VdDZd98Y8tGjGhe99hjbfkzz4TK5s2zZf36Na972mm2/O9/D5UtW2bLsrKa15061Zb/5S+hsnXrbJnb3bzub35jy2+9NVRWWmrLwBiPJ1R+3XW27LrrQmUeT6huaWmo/NZbbdlvftN8e263LV+3LlT2l7/YsqlTm9fNyrLly5aFyv7+d1t22mnN6/brZ8vnzQuVPfOMLTv22OZ1R4yw5d98Eyp79VVbdvDBzevusYct/+ijUNm779qyCROa191vP1v+1luhss8/t2VjxzavO2WKLX/xxVDZ7Nm2bPDg5nVPPNGWP/VUqGzRIlvWs2fzumedZcsfeihUtnq1LUtNbV734ott+V13hco2bQp9nk399re27KabQmVVVaG6VVWh8ptusmW//W3zdQTrah+hfYQx2kcEaR8Ron2EpX2EpX2EpX1EiPYRlvYRlvYRlvYRIdpHWJ1sH9EYKyovNzujrpEiIiIiIiIiIhIXHMYYE+tGtFVFRQVZWVmUr19PZl6eUhE7Uyqi0pWVrqx05VBd7SMs7SPaXlf7CFumfUT76mofYR9rH9G6utpHaB8B2kdoH6F9hPYRobpdeB9RUVdnY0Xl5WRmZrIjXTsQ1sKbExERERERERGR7q+1sSJ1jRQRERERERERkbigQJiIiIiIiIiIiMQFBcJERERERERERCQuKBAmIiIiIiIiIiJxQYEwERERERERERGJCwqEiYiIiIiIiIhIXFAgTERERERERERE4oICYSIiIiIiIiIiEhcUCBMRERERERERkbigQJiIiIiIiIiIiMQFBcJERERERERERCQuKBAmIiIiIiIiIiJxQYEwERERERERERGJCwqEiYiIiIiIiIhIXFAgTERERERERERE4oICYSIiIiIiIiIiEhcUCBMRERERERERkbigQJiIiIiIiIiIiMQFBcJERERERERERCQuKBAmIiIiIiIiIiJxoU2BsCFDhuBwOLb5ueKKKwCoq6vjiiuuIDc3l/T0dE499VQ2btzYbB0FBQUcd9xxpKam0rt3b/7whz/g8/nC945ERERERERERES2o02BsO+//54NGzY0/nz88ccAnH766QD87ne/45133uG1117jyy+/ZP369ZxyyimNr29oaOC4447D4/EwY8YMnnvuOZ599lluueWWML4lERERERERERGRbTmMMaa9L77mmmt49913Wb58ORUVFfTq1YuXX36Z0047DYAlS5YwZswYZs6cyb777ssHH3zA8ccfz/r16+nTpw8A//jHP7jhhhvYvHkziYmJrdpuRUUFWVlZlJeXk5mZ2d7mi4iIiIiIiIhIN9DaWFG7xwjzeDy8+OKLXHjhhTgcDn744Qe8Xi9TpkxprDN69GgGDRrEzJkzAZg5cya77bZbYxAM4KijjqKiooKffvpph9uqr6+noqKi2Y+IiIiIiIiIiEhbtDsQ9vbbb1NWVsb5558PQFFREYmJiWRnZzer16dPH4qKihrrNA2CBZcHl+3IvffeS1ZWVuPPwIED29tsERERERERERGJU+0OhP3rX//imGOOoV+/fuFsz3bdeOONlJeXN/6sXbs24tsUEREREREREZHuxd2eF61Zs4ZPPvmEN998s7EsLy8Pj8dDWVlZs6ywjRs3kpeX11hn9uzZzdYVnFUyWGd7kpKSSEpKak9TRUREREREREREgHZmhD3zzDP07t2b4447rrFs7733JiEhgU8//bSxbOnSpRQUFDB58mQAJk+ezMKFC9m0aVNjnY8//pjMzEzGjh3b3vcgIiIiIiIiIiLSojZnhPn9fp555hmmTp2K2x16eVZWFhdddBHXXnstPXr0IDMzk6uuuorJkyez7777AnDkkUcyduxYzj33XP7yl79QVFTEzTffzBVXXKGMLxERERERERERiag2B8I++eQTCgoKuPDCC7dZ9tBDD+F0Ojn11FOpr6/nqKOO4vHHH29c7nK5ePfdd7n88suZPHkyaWlpTJ06lTvuuKNj70JERERERERERKQFDmOMiXUj2qqiooKsrCzKy8vJzMyMdXNERERERERERCSGWhsraveskSIiIiIiIiIiIl2JAmEiIiIiIiIiIhIXFAgTEREREREREZG4oECYiIiIiIiIiIjEBQXCREREREREREQkLigQJiIiIiIiIiIicUGBMBERERERERERiQsKhImIiIiIiIiISFxQIExEREREREREROKCAmEiIiIiIiIiIhIXFAgTEREREREREZG4oECYiIiIiIiIiIjEBQXCREREREREREQkLigQJiIiIiIiIiIicUGBMBERERERERERiQsKhImIiIiIiIiISFxQIExEREREREREROKCAmEiIiIiIiIiIhIXFAgTEREREREREZG4oECYiIiIiIiIiIjEBQXCREREREREREQkLigQJiIiIiIiIiIicUGBMBERERERERERiQsKhImIiIiIiIiISFxQIExEREREREREROKCAmEiIiIiIiIiIhIXFAgTEREREREREZG4oECYiIiIiIiIiIjEBQXCREREREREREQkLigQJiIiIiIiIiIicUGBMBERERERERERiQsKhImIiIiIiIiISFxQIExEREREREQiqri4mJdffpkhQ4bw/fffx7o5IhLH3LFugIiISFzzVsJXJ0NSD9j/FXDoHpWIiHQvzz77LBdccEHj81NOOYW1a9fGsEUiEs90ti0iIhJLBa/Bxk/t77IFsW6NiIhI2L300kvNntfX18eoJSIiCoSJiIjEVskPYLA/ZQtj3RoREZGwKykpafa8R48eMWqJiIi6RoqIiMTWljlwZ+Dxq/mxbImIiEhEbNmypdnzgoICvF4vCQkJMWqRiMQzZYSJSNg1NDRwzjnnMHr0aDZt2hTr5oh0Xn4fFPwIS7E/q5bEukUiIiJh5fV6WbduHQDffPMNALW1tWRmZrJhw4ZYNk1E4pQCYSISVsYYzjnnHF566SWWLl3K9OnTY90kkc6rYjGUNhknZU1h7NoiIiISAevWrcPv95OcnMx+++3XWF5XV8c777wTw5aJSLxSIExEwmrOnDm88sorjc/nz58fu8aIdHbFc6Ci6fOSHVYVERHpitasWQPAoEGDcDgcfP7559ssExGJJgXCRCSs5s6d2+z5Rx99RHV1dYxaI9LJbf4amsa+Kit2WLVLWbYMxo6Fo4+OdUtERCTGCgoKSHTB+z3Wwy6ZHFK0gPvuu69xmYhItCkQJiJhNW/ePACODlwAL1q0iL59+1JUVNRYp6GhISZtE+kMZs2axTHHHM0XrzwEN70ATzZZWFUZs3aFVXExLF4My5bx4osv8vLLL+Pz+fD7/cyZMwefzxfrFoqE3Ztvvsnjjz9OfX09fr+/sbysrEw3hCSurVmzhr/tCcO/q4JllXDebxllvABs3rw5to0TkbikQJiIhNXy5csBOP300+nZsycAlZWV7L777gA8//zzZGZm8sEHH8SsjSKxdMstt/Ab93QOueha+Lx5QMjfDS6W/X4/0158EYAat5tzzz2Xs88+m4SEBFwuFxMnTuT++++PcStFwqusrIxTTz2VK664guTkZFwuF++88w4///wzOTk57LrrrhhjYt1MkZhYu3Ytv2pa4IVD3n8JQJMqiUhMKBAmImFVUmL7efXr148ZM2bQp08fwN7xq6mpYerUqdTU1HDhhRfGspkiMVNbto7jVgI1YDKbL3PW1uOpr41Ju8LlD3/4Ay88/jgApTvI/rz55psVFJBuZXvjHJ1wwgmMGzcOgPz8fObNm8eJJ57Ihx9+GO3micRWXRHZ+YHHVx8AQPZ3S8lCGWEiEhsKhIlIWJWWlgLQIyODkRkZrF69unHZggULGh87HI6ot02kM9jNVYhzMRgHDK6AXsBXwYX1sGjuNzFsXcd89dVXPPjggwTjexvr6nZYV+PCSHeydu3aFusceOCBTJs2jWOOOSYKLRLpPHZx5ENx4Mm1D0A/cHgNpyTZbEoRkWhTIExEwiqYEbbnb8+Gvn1J+cddHH744QBMmzatsd7w4cNj0j6RWDLGMMlbDkBVbxdrgS3A3B49bIU6yF82d4ev7+y++sqG9IKBsFU7udO/cePGKLRIJDq2Duymp6dvU6empiZazRHpVMbU2P29t2cyDN4H9rbHvNOzoKqqSmPHikjUKRAmImHj8/morKzklB6Q+EOgm8hjDzF27FgAPvnkk8a6FRXdZHY8kTYoLy9nV499nDZ5V+rr63n22We54MorbWE9+Gq2xK6BHbRixQoAegeeF3ntYMhjx47llltu4ZNPPmHMmDGAxoWRTsjnsxM9tLGrlt/v57XXXgPgwgsv5K233qKwsJAvvviCTz75hHPPPTcSrRXpMoZV2olg6oba4TLYb28A9rKHCJ0TikjUKRAmImETTG+/uGeTwvxaJo+0d/6+//77xmJlg0g8Ki4uZlCVfezcaw8SExOZOnUqWf362cJ6cPjKYta+jgpmhB4ReJ4f+P3jjz9y++23c/jhhzNkyBBA48JIJ/Ttt9CzJxx8cJte9p///IcvvvgCgF122YWTTjqJzMxMDj74YA4//HAmTJgQgcaKdB19y2w3+bpRI23BxMMA6FkJDuxNIhGRaFIgTETCJngRPMHTZPyvBjik8XI4ZPPmzUqFl7hTXFxMVlngyW5NLo7T0uzvenD6uu6d8bKyMvoDwXe2JPDb7XY31snIyABsdxiRTiUx0f72eNr0socffpicNHj8MDgwc3XzhbNn88sXXuD6JkWpqakda6dIV+L3klFpJ0fxjNrTlu1+BLjB5YPBaJwwEYk+BcJEJGxKS0txAbkbArPB9UoCIG/VT+y2227N6vr9foqLixGJJ2WFa3CXBp6MPzS0IDieUD24Giqj3q5wKSsrY0rgcT7wAXYWyabSAkG/6urqaDZNpGXtDIT17duX9ybA5Z/B5Ov+AdP/Flo4fz55c+ZwZr9+nHPOOYAdK0yzpkrcqFqFM5AA7Bg90T7oMQ762of7pykjTESiT4EwEQmbsrIyJqWAsx5IBs4+AQDHvMU8//zzAIwZM4acnBwAfvrppxi1VCQ23D9/i8OASQMGjg0tCGaE1UGC6bqZUuXl5ewReNz/iiv44quvuPfee5vVCQbClBEmnU47A2G+2mImrQo8qQZOuwZe/Yd9vmwZAHv+8pc89thjja+pr6/vWFtFuoiG0pWNM0YmjR5tH7iSYZDNDj4wQ4EwEYk+BcJEJGwqKio4NCvwZHAiHH6GffxTNXuO6IExhp8/+4w/JySQDhx22GGNs8yJxIOMVQsBqO/jAmeTQ3BW4B+nGhJM182UKi0tZUjgccKYMRx44IG4XK5mdYKz6SkjTDqddgbCRlQvxbk28KRvElQZOPNymP4m5Ofb8mHDSElJaXxNbW1tx9sr0gV4l80DP/hdkDZiRGjBiP4A7OFWIExEok+BMBEJm4qKCvZMCDwZ0gMOPQpSnbAFePNxW37eefxu0yaeCVQLzrQlEg8y1hcAUN0rpfmCAQPs7zJI8nfNAFFw1thBwYL6D8Hv3aZeWloaPTNgz4y54Oua71W6qXYEwowxTPDZ/s4NwzNh4WLb18sP3Pw7CE4Mk5dHQkJCY2C4rq4unC0X6bR8y2z2f1U6JDUJBjNmHABDvQqEiUj0KRAmImFTXl7OqOCwJyMG2u5eJ+5lnz/1L/v7448BOC1QbZdddolqG0ViKaPUTihRnZPdfEGfPvidDvBDehftMhgc7HhocK4Mz7uw5tVt6qWlpfHeH2Dq6K/gp3u3WS4SM+0IhNXV1bEHfvtkzBDIHQr33WOfzy2AFSvs4z59AEhOTgaUESbxw7Ha9hsuSXXgcDSZTGmPSQDkVkBZaen2XioiEjEKhIlI2FRUVJAXvH4Ipr///n77e8YW+ODZUOU/QYITDRgscSW9wmZAVffKa77A5aI+x46dlVFRE+1mhUVJSQmpQI/gv3QusOmLbeqlpaWxz/DAkwJlhEonEgyEeb3QymNTWVkZQwIxLcf4wEDgux8DOdissKIiWxYIhAW7RyoQJvHCsd7+D2xJSWy+YM8p4AB3Lfg3rI9By0QknikQJiJhU1FRQWawt8fgQKbX3ofBPnlggGMvCCwDxkKfbGhoaIh+Q0ViJKXCRorr+g7bZlldbiYA6RVdcxDt0tJShgRv9qcAacDmr7epl5meHHqSNmib5SIxk9jkQt27bbfe7akoLSJzi33s3PdI+yB9GAzd6hRbgTCJU65NZQBsTt5qSIDeu2J62Yd5RUuj26goqqur44cffsDTxrEHRSSy2hwIKyws5JxzziE3N5eUlBR222035syZ07jcGMMtt9xC3759SUlJYcqUKSxfvrzZOkpKSjj77LPJzMwkOzubiy66SLNHiXQDlWVlJAX/lQePCi146nlIb1Jxgv3VJ0uBMIkj/gYSymyWiW/IHtssru3dE4C00q55slxSUsJR/QNPcgO/K5aCp6xZvZS0LE57JPDE1zWz36SbahoIa+VFa93Kr3EEhgFjwiH2t9MFo5tkfSYmNk6IoUCYxBv3Fruf35yS2XyBMwFvvyQAfLM/73YzqW7evJmRI0eSkpLChAkTuPvuu2PdJBFpok2BsNLSUvbff38SEhL44IMP+Pnnn3nggQfIyclprPOXv/yFv/3tb/zjH/9g1qxZpKWlcdRRRzUbFPTss8/mp59+4uOPP+bdd9/lq6++4tJLLw3fuxKRmHBs3ozDDziAgU0CYXscAW/+DQ7sAacPhF/Y4t6ZCoRJHNmyDEdgbHj36EnbLK4dYKNIyVu65v9EaWkpk4Kzxg7oA8k2A4aqVc3qpaWlcewK4B6geF00myiyc+0JhH39PzDgzXBC796hBbs1OQb27QuBsZGCgTANli9xwd+As8RmV25O77XN4vqh9jgxwQ3Tp0+PatMi7eWXX2ZFcIxA4Nlnn41dY0RkG20KhN1///0MHDiQZ555hn322YehQ4dy5JFHMny4HezDGMPDDz/MzTffzIknnsjuu+/O888/z/r163n77bcBWLx4MR9++CFPP/00kyZN4oADDuDRRx/llVdeYf169Q8X6cqSNm+yD7KB9H7NFx5xFXxVDK8WwKCjAWWESZxZPhsAfxJkDx66zeKaQfZYmrjZgL/r/V+UlJQw3B14MrA/JAcyYsoXN6uXlprKhSXAT8Bbha0ei0kk4lwucAZOjVsZCMtevQSAqv4ZzRfsNTH0OC+UHabB8iWu1G/CYeeIoSyn/zaL0w44AIARtbBgwYJotix8fDWw4DbY8FGz4hUrVjAyA/7dGy5xQ22NMqBFOpM2BcKmTZvGhAkTOP300+nduzfjx4/nn//8Z+Py1atXU1RUxJQpUxrLsrKymDRpEjNnzgRg5syZZGdnM2HChMY6U6ZMwel0MmvWrO1ut76+noqKimY/ItL55FYX2wc5QFLPHVcMZIooECbxxLN8EQC16dCz17Z3xuuGjgWw3azqN0ezaW3z01x46BQontusuLS0lP6ByfMYMhzcdvB/Zp4Dfl9jvSy/P/SitQ3g0Wxh0okEs8JambHVs8ge9yqHbDXe3eh9ITXweMyYxmJ1jZS4smUlBDKhfX23DYQ597XXjNklsGjevGi2LHwW/xX+fjucejQsDw0XVLhqCQuy4IJN8JQPjqnYwqZNm2LYUBFpqk2BsFWrVvHEE08wcuRIpk+fzuWXX87VV1/Nc889B0BRYGacPoEBQYP69OnTuKyoqIjeTVPHAbfbTY8ePRrrbO3ee+8lKyur8WfgwIFtabaIREnfehuk9ue6wZmw44rJdh+grpEST7wr7XiZlamQnp6+zXL/iJH2QQWwcfk2yzsFjwcOPhiufQsu27tZ5lpJSQk9gtf2w8fBoNPBlQLj/w8crsZ6/Zpm2hRATUmo64hIzAX/N6urW65rDFmb7LhG9eP2br4sawz8FjjcDfff31isQJjElfyFADQkQvJW138A7HEkJIPTB7XzZke5cWGy4B14Gphp4DfnNxb/rupbkpv0/v9jNnz88cfRbp2I7ECbAmF+v5+99tqLe+65h/Hjx3PppZdyySWX8I9//CNS7QPgxhtvpLy8vPFn7dq1Ed2eiLRPntee2Pt7JO+8ojLCJA6ZggIAylNdOByObZYn5ubiDfau+nnuNss7hVmzoDgwI8YHwLrQrJA1FZtIKg88GbUPjL4GTq+AMdc1jo8EkL333kwNPlkPK7+eFoWGi7RSMBBWWdliVVO1BnfgQjdh/+OaL8wYAbu54UIfVH0ADTZgpkCYxJV8OxtkTTpkNxlTulFqP8xgezm6S8k6Sku7YIbwjJ/tzOgAn/wES5fiW/0ZB/xo/8d9U2zX6JFl8NzTT8emjSKyjTYFwvr27cvYsWOblY0ZM4aCwMl9XmAMhI0bNzars3HjxsZleXl526SF+nw+SkpKGutsLSkpiczMzGY/ItL59AlMN9/QK2PnFRUIkzjk2LQFgPKUpO0uT05OpjI42PyyxdutE3NLm0xxXwO89+/Gp6n1BRDoHc2o3e1vp5ttZGVxwuuvUxE4lKfO+mjbOiKxkhE4flWUtVi15ru3oAb8Luhz2BHNFzoTICkwfep358P8PwIaLF/iTMFqAMqSaDa5WiOHA8eIbAAmJsJnn30WxcaFgacMCpqP/eW76wYcV5+GoxyqMsH5/DuQDm4P+Od+j9G4mCKdQpsCYfvvvz9Lm54EA8uWLWPw4MEADB06lLy8PD799NPG5RUVFcyaNYvJkycDMHnyZMrKyvjhhx8a63z22Wf4/X4mTdp2Fi0R6Tp61ttxgPx9tnOy01Sga2S/HAXCJH44SmzX4eq07QeKMzIy2BIYVsusWBmtZrXN999v9Tw0psvgmkLwg0l0QL+tJsvYyqmnnsqmYTYg3vOnpTutKxJVifaGDvMeaLGq77O3ACjOdZCSlbVthX5NssT6HAIoI0zizDqbMrnJDf37bztGGADD7PFiBDB//vzotCtcatZBYK63dbvY3+4X/4frXZvZdltuH5x5e2GG2eEBDnBVa5wwkU6iTYGw3/3ud3z33Xfcc889rFixgpdffpmnnnqKK664AgCHw8E111zDXXfdxbRp01i4cCHnnXce/fr146STTgJsBtnRRx/NJZdcwuzZs/n222+58sorOeOMM+jXwomziHRuOfU2qNWwg+zORlm7AjC2P6Q5yyLcKpHOwVVhL3xrs3K3uzw7O5tNgaH1zNpOOIuyMfDee/ZxYL4bM39N4+KhtTYdzJeX1qwr5I6sHbsXABlLKqChdTP0iUReoFdD/if89a9/pWYnM70l//AjACtzdpAFPeFR6HMoDDkb+h1rX6NZIyWebLATvxQCAwYM2H6dYXbG5H4+Ox51l1K/GTbYh7cUQ8Oo0KJn8+CnXcaDw4ljF3sDeL8EmDu3kw59IBJn2hQImzhxIm+99Rb/+c9/2HXXXbnzzjt5+OGHOfvssxvrXH/99Vx11VVceumlTJw4kaqqKj788MPGAz/ASy+9xOjRozn88MM59thjOeCAA3jqqafC965EJPqMITN4Xt9/Byc7Qan9KK7PBuC0gdMj2iyRzsJVaTMmPbl9t7s8LS2N4AiYZv2WKLWqDRwOmDMHrugFRwXK1lTaABkwqC4wHsqAbWfE3J6EE88HwFkArPw8zI0Vaac+PezvD2HPz//Ag/ffuf16qz4iaabN8pw7aNft13GnwuGfwX4vNk4go4wwiSemyGZGrW7YSUbYLrsB0KMaNqzvhDeBdqZuE5TYh19ugUnl8J/RcHEvuKAIhg0bZhfuZifDGeOlWc8pEYmd7QzesXPHH388xx9//A6XOxwO7rjjDu64444d1unRowcvv/xyWzctIp2Yr7SUBHudj2Pw8Bbrr6kZSG5SGWmuGnsh3YoMEpEuyxhcFX4AvL0Hb7eKw+FgdaCnsHddMa7t1oqxvDw4PAHsTX4cJUBJPuQOZUCl7VLmH71Lq1a165FHUtEDMkvA8++7SLzvqJZfJBJhZpcMHO8D+TAlH3J9jwP3blvvoRtxVEFZFniOOqXV61cgTOLKZptRWeBI3u5syQCMsjOuJniget267dfprIrXgZ0Hg43Aqo1wVpOhsocOHWof7DUB+Iq+FbBwwYJot1JEtqNNGWEiIjtSX1hoHySBu+f2L/Sb+mDzMRxxL/x14SkKgkn35ynHEZhs0d93xA6rFSakApBQ1nnHzvPVVUA6EOgNVjfnc2prqukRSGJz7ntoq9aTnZ3Nh7l2xPyEJ7+BNYsi0FqRtlm9WyaMo/EMec+ZFVC8vHklfwO8acfHezLJwS/PPLPV65+Qu4RnL4Mx6UvC1GKRTspXg6PY3gAqztp+JjQAucMhOD5mV8sI25APgNcN1cA111zTbHFjRtjoieAAdwOUL9W4mCKdgQJhIhIW3qIi+yAdEjJ2kP7ehM+RyieLwOuLcMNEOoP1SxunV08atONA2K/vfxQAVx1QURGFhrVDQzUA9YEekPU/zKB81fe4ArHw5ENObPWqPtjraGp7gqMMuP3K8LZTpB3+s/5IHD9Boh/qM8BRCeaVu5tX+ulzHOsNxgkf9Nu9TWPcDkzbwNSDoH/KxpYri3RlpSug3D7sOeGAHddLHQjZgXqeGqqqqiLetLApsge+yiTYa6+9ePDBB5uNcxacUI7sERDodZ1YWKiJokQ6AQXCRCQsGgNhaeBMaWGwfMDlsh2/dDIgcWH9CgAaUiCnd+8dVhu6zxSwSWFULvoxGi1rswdWXMXIa2GD7eGFWfIzjo9eAx94s8AxYtTOV9DEnpP347lAooB5ewb4/RFosUjrlZTYAX8uveIKZgYmQC5/6dXmx6qPXwOgMg8G775Hm9bvcCXZB/76DrdVpFNbabMm/W6YcsZOsiYTsiDHXpKOSYX1XSkrLHDuW+aGM844A4fDwaBBg3AEejqMGhU4HqYNgcChfzgNrOtqXUBFuiEFwkQkLBo2Be5uZwBJLQ+WrUCYxJUN9g6xJ8WOk7kj6TkD8QcWl879Ohota7MNZQ5WbISCwAyX7tVrSfrqKwDWD05oU1fn8ePHc99aIAkcpV7WvaPxQyW2goGwAQMG8HKlLctcXctnn3wYqvTVNwAsy4Jx48a1af0Ot40gO41mSpXurX7JDwDUpsPEffbZcUWHA3ra8cNGJUNhcKiNrmCTnS15ixPyAjOmu1wuCgsLyc/PJyMjMIZAUi70tkNz757SBWfHFOmGFAgTkbAwmwrs7zTsAb8FCoRJPDEb7HyQNUk7D4ThcODNsoGkusXzo9CytqustNGBZYF4V8K6ElLmrgRg1cCW//ebmjRpEpXOHpQPtM+3PPt/YWunSHuUltpZ7nJycpiXYwe6dhZB/qcvNtYxP64G4CsDhx7aujHxgpyBQJhDgTDp5jzL7LiPZSkOcnNbODb0tumXg9x0rWypzWWAHSi/Z8+ejcV9+/YNdYsEG+zrZ9/jqERYvXp1FBspItujQJiIhIVrs01lr08GnC1PSKtAmMQT33obKK5IaCEQBtRlBVKt1qyIdLPapSIwdtmPgev4pPwaktbaGfDWj53YpnUlJSXx6aefsijTno70X74yfA0VaYdgRliPHj3480MPUxW4fu+1KJChWVMFBfb7/i057L333m1avyswIYYTb3gaLNJJmYI1AJSlJbVcuY/tN9jPAZs3b45ks8Kr2I5nts7fPBC2Xf3tOAADHcoIE+kMFAgTkbBwlmwCoCapdbsVBcIknjQUbQCg3O0gJSVlp3Vrc2wXkcSNRRFvV3vU19uxjRb605qfRfSD2sH7t3l9e+65J6knngVAj/xqjRMmMRXMCOvRowcnnHACZryd9W2X4sC0qLPew+EHfzrUD52E09m2U2lXog2EudBMMdK9uYvs/0xZRmbLlfsNAKCnPxSM7vSMwZTYoHiBF3r1amFYkIE2Q6xPAyxcuDDSrRORFigQJiJh4Sqz4yRUJ7acDQYKhEk35/fBZ0fB3Ovs8y32xL4sIbHFl9bl2oyx1LLOOWukz2cv4MsTBmGanPeXD4UefUe2a525v/w9Jglc1fCnoyZ1nQsh6XaC372cHNuNyTPRjm00cFNgcPuZdqyw8l4wZtyubV5/MCNMgTDp7hKK7QzD5T1ankCJ/rYbcmZ9FwqE+SpxlNnpoFd7WpERNsQOnJ9dBwsWLIh060SkBQqEiUhYuAMX7ZXuhFbVDwbC/Mr+kO6oZA4UfQRLHgBfLc4SO65WefLOs8EA6vP6AZBW3jlnlQsGwvr2H0JFk2FffkxrMkNWGw0avSdVQ+w+YfSqOdx5550dbqdIezTtGgmQdJzNVkxbB6t/+hHmzgZgWRKMHTu2zet3JaYB4HYqECbdmLeKhBJ7o7Om/+iW6w/aBYDUGiguLo5ky8KnfguU24cbHAmkpaXtvP7Q3QFIqoPSwkKMMRFuoIjsjAJhIhIWrkp7568isRVjQUBjdxIFwqRbcrhCjysW4yy13SdKU1vuIuIbOAKA5IrOeaEczOIcMmQI3wdvgCfDKzUpjBkzpt3rLRtru40caGDJkiUdbaZIm9XW1lJXVweEMsLSJx+HPxMcPih765/wUz4A39a3LxCWkBQIhDmUDS3dV23RPDuCPDD6hF+2/IIhuwHgqoOqrjJGWO3mxkCYLzcXR0szJvcbC4FT5F5eb9fJfBPpphQIE5GwcFfZC/3yxJYzXoDGEwYFwqRbaqgNPa5eg7PMDoxdnt5C1wnADA1cENQD5eWRaF2HBDPCJkyYwKU/Q9HFMPtcqBhwbGOmZ3vknXs5AAM3QVVF5+wWKt1bcHwwl8tFZmYgaO10UjHEdmnO/eELzOoaAN7eQrsCvwnJGQC4nTr2Sfe1+Zv/QT34XTDuhBNafkHeGAh0KEgp6ZzjY26jeC0EJo3xtzQ+GED6UAjMlTPcBevXr49c20SkRQqEiUhYuCrt2UBFUkar6gczwpQaLt2Sr0kgrGolznL7Pa/M6dfiSxP6joVgD4vArFudSTAQlpOTgztnJH2fhkn/hLG77dWh9SYcdVHjOGHDSjrnjJnSvQUDYTk5Oc2yO9YPsn2A+0//CUc9+JNgfe6QULCsDRKS7WQYCS6/jn/SbTkW2C7EVT1cOBJaMWRGUjYm2/7P9a3vIhlhhXbmR18ipPfp03L9xB7Q294smpgOhYWFkWydiLRAgTAR6TifD1etvbtdlpzVqpeoa6R0a74amAesAzbMAptEgqf34BZfmpo7AhO4a8yqnyLVwnYLBsLcbjezZs0CbAbNiSee2LEVp+ZgxtrAwtH+zQoSSNRtPVB+0KbRgSzNQHx7zWCgndmPwYywJDd4PJ72NVSkE/P7/VR+MwOA4p7prX6dybX9Bvt7u0hG8Ia1ANSktGLGSACHA4ZmAzAhQRlhIrGmQJiIhNTWwl13we03QmCclFYJ3EUHqEzJ3UnFEAXCpFt75n/wV+B2YMFMAIwbUvq2nBGWmZ1LfSDRxLf8x8i1sZ2CY4S53W5ycnIoLCzk+++/Z9y4cR1f+YE2q2yyz7Bly5aOry9SKpaDp3Tb8spK+POf4fPPo98m6bCtB8oP2vv3j2KaJDs/64DrrruuXdtICgTCkhPsmGQi3c1r/32FEWX2OLF2YMs3f4JMIGg2EE/XuBESCIRVJbZixsig0cMA2BUoKuoiXUBFuikFwkQk5JFH7EXcbffBObtCa09EgjP8pEJ9QutOBhQIk27LGHh+un1cA0y33R/qM2HAwIEtvjw9PZ3ywFB7vpWLI9TI9muaEQbQr18/xo8fH5Z1O/c7GoD+1bBu3bqwrDPs5vwdDhsFxw+A8rXNl918kb2ZcNhh8PUXO15HwWyY+3nr97ESFU27RjaVkTeK2gt3gyQoOwiG3/APLr300nZtwx0YLD9JgTDpptYveJNE22uQ/FH7t/6FeTaraqATqqurI9CyMCuyGV1bXG0IhE04AIBBFbB5Rxlh/gZY8jCUzA1DI0VkRxQIE5GQjz8OPf7fSvj5g9a9LnBXjEzwupQRJnHO4YCXrwk9/8b+Ks2wMy22xOVysSUpMD5RQX64W9dhwUBYRwbG36F9TgIgYQu88vTD4V9/ONx9K/wAfFwDFx8dKq8qghdeDz0//WhYMh/O3hfOGw1Fc2z5U7+CMZNg78PgqMHgq49m62UndpQRBpD61+/4+a1b+PzXT3DeBZc1HsPayuFKBmzXSAXCpDvavfxHqAGfG3IOndLq1zn724zpPD9UVlZGqnnhU7QJsJNjtqprJMD+J0M6JHjgxyce2/4Nn/wXYO7v4MO9w9fWMFm3bp26dEu3oUCYiIQsXWp/OwAfLL/2Qqbs6uCQ/fekvn4nF2uFywHwZ4AjKbtVm1IgTLqz4oRUys8KPKmyvzaktC4QBlDotLPUOdd3vq4TW2eEhdWQUZhkoAHWv/9855teviofvmjSptd/hneft48fvxxKjZ35LNcBG+thzHh4eRa8sBTOORRe/wv8/tXGMeP4eC3ceJR9vGwxXDYZThwC/7g3eu9JGgUzwrYXCMOdythjbufkM3/dsY0EA2EJUNeWIQhEuohdiuzN0fzcRI476aRWv84xdDgAPeu6SCCs0B4LVvrbkBHWez8a9rTH9+t7wH777cdLL73Ee++91zjsAJu+tseITwET5XPk+hJ4/wi4eRdY9l2zRW+//TaDBg3i3HPPjW6bRCJEgTARserrIZimfab9NfKjjXyyAl7v/SNfTH9jx69dvxqA2hTIyGjdLFrBGbkUCJPuaNqP6Ux6uXnZapftRtgaBS7bfcq1uTzcTeuwiAbCHA7ob8eJmZAOP//8c/i30RFz34YyIMEBRwe6uV56Gbz+T7j7bfv88tPg1ee2fe2nVXD6DTYwOq43/Ol8W/7Ql3BMBuy1Kzz1HUxbA5ffBP+4OdLvRrayo8Hyw8ppBwRXRph0R8ZXT79Ce+M0++RftS1zctyeAGSWQEV55zv2NWMMbLDdN3+qb0MgzOnCd8YxABxRDhVr13LOOedw/PHH8/TTT9s6nhq4G/g3cN/5YW/6Ti39G/zhE7h7GUw6CDZutOVlpZT+7ixu7G949dVXqamp2fl6RLoABcJExCostAf2BPhpDJhgD8d66Pk29P/2/h2/dkMBAJUJtHo6+eDJUZcYEFWkjbZs2cJSwNdkwqzCQYNa3Z0w320vxJ1b6jvdOFJNB8uPBEc/28VkdDKsXr06Ittot9lf2d/DcuHJNyAT2FAHp18KFcCwNLj3OUr2PI5XRuZSn+bAe88FcO2poXUc7obPZ8Cd/4YjR0ED8GEVVPthCLBH4O96/d0w/+movr14t7OukWHjSgIDyS6o1cWkdDMb1/yIc4l9nHnqeW178a6TwAHuWqhcuTL8jQsnTwlssjdy51e3oWskkHTeg5gB4PLCLU0mWv/666/tg+/mQz6QBOx9eLha3DLjhxceh+D9pzIvXPQL+PYVzK59uSC/lrvXwTejYNWqVdFrl0iEKBAmIlahHdCbHJi+BC4eBEW/3oXNw+xResz7C8G3g7vXGzcAUOZueyBMGWHSHQUzPVaMDRQMh18++vqOX7CV1Ql9AXDUA1s2hLl1HRPRMcIA+ucB0M8PmzZtisw22mvBIvt73AgYNBGeuBd6OyAb2C8Bpn8Mqam8//77nLm8mORqw8PuMfB/r8LTU/E/Mp66p17Fmz3IZr+99xP+B66BX+wN542Dj16G70pgWBZUgjnvMuo3LIzhG44vOxosP6x+fwucC853oK62KnLbEYmBzZ+9BxVgEiDxwAPb9uLckdDbPnzkvDM774QpABt+sNnBwBIf9OnTp/WvzRiG49wJAPzOn8B3N94IwI8//mhvfP0YCDIdPQWOnBrGRu+AvwFWPQs/3AHPbLZlh462UYL3vocDzsRRWA+BQ/7+y6D6nf9Evl0iEaZAmIhYwROOHJi3Bl5Zmkqfxxfz6cV/wDjBtcjAl//Y/ms32i6VhT7IyMjYfp2tKBAm3Vmw28DfRxxM1e05eJ44n7zhE1v9enevofjTAk+WzIhAC9svol0jAQIza/b0wcZgt4zOojAwZtvoXe3vs/4Iy36m+P07+OD3j+EZZAc3/vbbbxtf8sgjj2AcDv6vZBzJ1y0iZfgpZGZm8sADD/DIY4/h+v3DON75gdOqR8PIMyE5A/77Mf4kB46FfhYfMb7zdRHtpqKSEZZgM8LwQH1tJ+/+JdJG5osvAKgb4oKkpLa9OCED73A7bMbRSfDHP/4Rv7dm52PUxkjJO/8GoC4ValJSWt81MuiqRyAPHJVeJt17Ly8MgMWLF+OtLIDVdjD6NXlDWbNmTbibvq1lj8I/L4CTb4diIC8Nps2i9peh/WD93jmMS4GNgQmix/zzcfugZj1UF0S+jSIRoECYiFj5PwFgejj48EfIy8vD4XCQs9cElvcN1Hnqse2/dp3N2vi5ToEwEQhlhGUN2Z/0W0pIPOKZNr1+6NBhVAXPq+d8FebWdUzEA2GDRgCQVdPJMsKMHzYHMngGjaayspKTTjqJex57k9En/I1jT72U5ORk0tLS+Mc/QjcNCgsLmTp1Ktdffz1erxewg6Rfd911XHPNNY313njjDf70pz/x8ccf0/vY47i7v+0Su8eyBv5+y01Re5vxbKeD5YdLeqAvVD3460ojtx2RGMhcaCddqh7VunPBrXnH2XE5DnTAqbVv4dgjjfX7pvDz/Hlha2NHLVy4kI1v/ReA5emQmJTUOO5tq/XdD56/Ag62T88qhhxHA/O+fNl2iwQue/KfjBw5snG/FDGLX4OHgHXYybIevhuTlsGI6SU8egJcORFS5pbycxXcnwq4IXN1GZ4vP4O3+8P/BsMP10S2jSIRoECYiFirbfeb+h6ZbKm0gTCAgQMH8mjgZpz5ZCV4KkKv2bgKNqzDrLMXh99V2PqtoUCYdGfBjLCUlJR2vX7QoEGsD44dMndumFoVHpEeI4xBowBIroGNGzvRrJl1G+2skABDxvHyyy/zv//9jz/96U9s2bIFsGMeBj/7gQMHMnr0aABeeOEFwO5XKysr+b//+7/tbuKee+7hyCOPZPPmzdy+Grz9weGFScu/jPCbE4jSYPlpgVTPevDXFUduOyIxkLvWBm2qdmn9mFlNpR45BYBxlXDyWzU4FsPQ+YaPT9yLDRs6wTAB9cUk/+tXjFlgn35YByeffHL71jXlUXh1JvR14qyFvw6CH975FwQOe/MAr9fL/ffvZIzecPhqHtQDvVLgg7/Cr37LE088wfpSuHoaPPZ9aKjS534E/7728eabLg+tY+kjkW2jSAQoECYiVqk9Ia9OstM6BwNhw4cP5zVPJv4EcJQAM16x9Z+9AoYMh34DcdQYTCLMqEtnwIABrdqcZo2U7iyYEZaamtqu1/ft25fFCYEni1aEFlQsg/z/ULb2ez744IPQdOtRFPkxwmzwyFkFNeWd4MInqCofAjfmv99QxOzZs3da/aWXXuLArcbIue+++0hPT+e6667jkUfshcOll15KWjA40sQRRx3NksH2+3NweRn5+fkdfguyY36/PzoZYcF9Qj12wG2R7sLnI63cduurHdi6GZK3sdcxEOyF0GSemOOdcMMNN3SsfeHw6lmMfHQxFIM3zYnn/Ku59dZb27cuhwN67wsXHAvA6aUwZMVKMFCZ5SKYD/3ZZ5+Fp+3b4ymDHwPj/55zCRz1ewDefPPN7Va/9o93MStwv7vvvGXgjVzTRCJNgTARsersgbC42v6ePHkyAElJSVz35z+zPjAOaOGTD8La1XD1E1AXennRcOg3cvdWp4crI0y6s2AgrL0ZYf369WNm8ARz2WYIBJ/47nyYcRZznzmGY489luuvv55zzjmHU045hS+/jE7WUMS7RvYNnGVXQUNlJ8oIK14O1fbhkRdcwL//bceIueGGG3j55ZebVf3iiy848MADmTRpUmPZ999/z9SpoYGPr776aowxPPnkk3z66afb7DvffPNNdrvhXgAGlsMnn3wUiXclARUVFY2zGEc0IywYCPOAw1sWue2IRNuGDTgN4AJv3pD2rWPACZgrMmA4lO4HDwSGYxy4BT7+KMb7wAYPvPYp+GF5Frx+6z386ZFHGDx4cMfW+9u/YtIhtRSO+dwW1U4ax5IldvrNRYsWRW6G9eoCOy4Y8Ng3M7nkkku46667+PTTT5tVe+utt/j888+56aab8Bx+PmSDsxa4EHgaTFkCjz/+OJs3b45MO0UiQIEwEbECgbCNlbabY9NU70suuYSvbKIYabOW8uMRe0OloWEQfHw8LDkcji+h2UVfS4KBsIgd3EViKNg9riMZYW8UAmlAjR+++Rw85bBlJgAr19oz1wcffJCXXnqJt956i0MOOYS6urodrzRMIh4ICw467IeMui2dZx+xxs4Y6XM1ThYGwNSpUznzzDMb92kABx9sB34544wzOOmkk7jsssuYMGHCDlc9adIkVq1aRVFREbfddhvz58+3QdSDz8U4wFUGVXPfi8CbkqBgt8jU1FSS2jrId1s07RrprY7cdkSirSowhmIyOLKGtW8didk4rpzFjbsmMHoRvFIKuCCxClwbN+LxeMLW3DarWglLbBb2XeUw/OBDw7Pe3rtQelLfZkXVx59Iv342q662trbxnCLs6jdDYM6Od7//nqeffpo///nPgL0RXlBQwOzZsznppJM45JBDcDgc7H/mX/EFD2d+4HNwJHi56sorOO200yLTTpEIUCBMRKzABXSlzx78hg4d2rgoKyuL4sNOBSB7Neyx1HYfeWYYHPkujPkUFhS7ufLKK1u9OWWESXfW0Yywnj17sqoYqm0vQT487Ujee+kuqAH88K8vtv+69evXt2t7bRHxQFhiIibNdrsc6PJTVlYWme20VcFyACqSQ0W5ubmMGmXHNJs+fTq9evXi9ddfb1yelpbGW2+91Wzw/B0ZMmQIffr04dZbb2WPPfawhVk5NPSxfWT7rl0Upjci2xOVbpHQrGuk31sb2W2JRFPguEciJGaPbP96ssaQte8d1PjTuefJVyEw3NjuCcR2nLCSJbDaPpzpcLDrrruGbdXJv7uz8bHJhIxfXk56ejoJCXb/X1wcofEEPSWNgbCt52h+9dVXGThwIBMnNp/x2p2ayyd9mwyD0gdIg5RE+OqrzjW5j8jOKBAmIlZgeuoqHwwePLhZdgPAVf98He+gUJnZDW4LDBY6ZcoUli1bxrBhrb8DqECYdGcdzQgL/n+8E+ihdVQJrPvXX+EJMFNh1IrtT0xRWFjYvga3kt/vb8zQitgYYYAj22bkDErsRDNHBv62JW7Yc889eeONN/jss88a/w5Tpkxh48aNnHrqqWHdrLd/NgC9yra+TJFwispA+dCsa6TxRT6DUyRaTGVgEMVESO01tkPr+uMf/0hpaSlHHHc69LM3XSamRP4Yt1Mrl4APfAng3mWXdh/ftyd1z/OoPjcZ+sCsgyC3t525PTfXzqIZ3D+FXe1mqLQPR+2/P5mZmY2LTjjhhB2+rOLY/+Pj4IQ+dogzUgI9RyKWvSYSZgqEiYhVb9PNK3w2M2F7nDeeCG4gDb7YJ4HCErjsssv4+OOPm2WQtYYCYdKddTQjDODjjz/mraR98U4Gh4HLFgEF4PDbIT3uvfdevF4vPp+vcUy/4OyFkdJ0cP6IZYQBZNu/Wz83bNzYSQJAm+zfdqMLdtttN0455RR23333ZlVaO0Zimwy2d977VFd3nm6i3VDwQjPiGWHBrpEe8PuUESbdh2dTPgD+BMjqNaTD62s8xgy0EZfdE6KT9bxDK1cCUJkBe+y5Z3jX7UzA93/zufPY4byzy68bjyXB/VHEAmGbCyFwWB990EHceOONQMsTE4waPZojy2FAjx6YI+2Nq9RAj/KKioqdvFKk84jgWayIdCmtCIS5zn0EchaBr5RDT5zF7MuKGTu2fXf9FAiT7qyjGWFgM4ymTJnCwv/dy9jLb8LVpEfIWmD06NGNFwrBWQcjfSc22C0SIhwI65EJFJPn7EQZYZts/5H10Kbs145KHLsrvDmPPvVQVFRE3759W36RtFmwa2TUMsLqgAZlhEn3UVu8kSSgwQ0ZTTKLOmxYHlDMcAOfFRSEb71tlW+3XZxqs4LDLavPLvz53yualQUztCorK8O+PQCKbIZdbSL0yMvj6quv5pJLLmnxhsDIkbbra2FJCX5HFi7qSQnMdF0VHCtOpJNTRpiIWB57gVvm3XEgjLSB8MulcGYRpA9j4sSJjRfgbRW826VAmHRH4cgIC9rtxBvx/fWfzcoKsIGwoGDArdsEwnpm21+GzjML1WZ7cr/GB3369InaZl0j7c2G9EpYvXp11LYbb6KWEdakayT++shuSySK6krsvtqfEObs2GE2K3ZAPcyaNSt8622rNfZuVFESoXEcIywYCItYltVGOzNzZQL06mUHY8vNzW3x80tLS2PAAPu51PlsOCGYEVZdrUlApGtQIExErGAgzLOTQBiAwwHOjo8NpIww6c6CAalwBMIAks68iLLABfQ67Ni2TYPQwceRPgFt2jUykmOE0dMGI7IaiMpMmC3yN0CxzZpd5YXevXtHb9uDRwCQWAn5+fnR226cifpg+R6gQYEw6T48ZTaY7E8IcxfxYXbojexa+CaGg7H7C+2A9YWuyGSEbU9Wlu0WWl5eHpkNbLTBy1J3KBDWWsGJYqpq7XlBcIwwZYRJV6FAmIhYHnsgazEQFibBQJjGvJHuKJgRFrbBdB0OsmfO5MfUVO5n20Fsg9uJdCAsahlhOXaA4HQf1Nd3gmCBpwQC40Avr6PNYyJ2yEB7seGsgI2FMewW1M1FbbD8YADbC04FwqQb8ZXZnWTYA2EDh4MD3H7wFRVFfCzMHfEX2WBUcUoqeXl5UdlmxDPCttjPbIuz7Td4dtllFwBKygPnOwqESRejQJiIAGC8NjOrpD66gTBlhEl309DQgMdjs4fClREGwO6703PZMtzXXMP999/fbFG0xwhzOBzbzCwbVoFAWEpnyQir39IYCCtNTYtaNgAAA0aBA/CDt2Bp9LYbZ6LeNRJI8CgQJt1H/qSh8Bhs/GUYj3sAGf0g2z4cBCxZsiS8628l5xb7/1qTG72u8ZEOhJliG9zbRNszwg488EAAyqu9QCgjTF0jpatQIExEwN+Aw2szsyr9CVEZ/0aBMOmugtlgEMaMsID+/fvz0EMPNRsfrOl2opURFtFsMIAce0Ke5O0kGWElayDwsWaOHh3ZIODWEpMxGfZhwvpV0dtunIla18jk5MaHbq83stsSiSIfHsiG+vSk8K44uQ/YeyMMAr744ovwrr81fD4cpbbnxKbUjKhtNvIZYfacobABevbs2aaXHnHEEQDUBA7RygiTrkaBMBEBXxUEzseTcvtF5SJPgTDprpoGwpKbXPRGUpsywhrq4NMpMOvSNo9RFAyERXR8MIAcG4xPrO8kgbACO5NXQwL0C3QHiSZ/tg08Jm9ZH/Vtx4uodY10OPAn2P+fxAYFwqT7cBqbCe31h/n40CQQNhB49NFHo3/uWFSEwwAuKEvuJoEw44cye3zd5E4mISGhTS/v2bMn06ZNo9Z+7BojTLocBcJEBLwVjYGw3AFDorJJBcKkuwoGo5KTk6OWOdSmjLCqVbDxU1jzCjgT27Sd4GD5Ec8Iy7Xjr7jqO0nXyHV2tsbaNBgZGCA4mvw59nNKqyiO+rbjRdQywgCTZP9/3E3G3BPp6pzYiIgv7IGw3o2BsIeBLzZtYv2334Z3Gy1Zt87+zgZ/QnrUNhvRQJi3AkdgDP7ytPbt937xi19w9PGnAHDIARMBdY2UrkOBMBGB+koIjFmfF4XxwSA0tbYCYdLdBDPCwjo+WAvalBFWudL+zhhhZ4Ftg6h1jcztD4CzFhq8neCkev1aACqTYcSIEdHffrYNdKbW6E57pEQtIwwwSTbzIrFBgTDpPiKWEZaYCz1Dx6oxQMMzz4R3Gy1ZY2+G0AP8zvAOebAzEQ2E1RdDcLV5/du9GleCPf/ISLXnBcoIk65CgTARgdrQhWa/KM2Gpoww6a6Cwahwjw+2M8FAWKvuxNasDbxoSJu3E71AWF/7uwZcvgiNjdIW64sAKE2MTSDM2cNmIKTVeztHhlw3U19f3/h/G42MMBJtICzB1xD5bYlEyYKaSWRcBE/+uG94V+x0QV5ms6LkmTPDu42WbN5gf2eCI4oZYVlZWQCUl5eHf+X1xRBYbfLgwe1fj9ue66Qn2/N6ZYRJV6FAmIhAfejCamCULvKCgTBjTFS2JxItwYywaI0PBqGgW6sywrxl9ndi2zNfohYIyw60zQ+pntgHwryBQNhmJ4wbNy7q23f2sBdD2X4oKiqK+va7u2C3SKfT2ZiBEVHJdjDxRN0Ikm6kwe+gqg7q/WEeLB+gX/OB3PssWQKffx7+7exIjY0YNbjBnRi9bO+IZoSVriPQm5WMjpz7u+zfIy3ZZu0pI0y6CgXCRATq7IW7ccLgYcOiskllhEl35Q3MBJeY2LbxtzqiTRlhnsAt4ISsNm8nOEZYxAfLT0/HBHrCZHpjHwjzb7Bjc1Wlp5GeHr1sgCBHrv2sshpCQRsJn2C3yOzs7OiM65diAwUJDUY3g6TbCJ7PReT4MGg7XfeiOU5YtT0OeV3RvckV0UDYOjsJjC8B+nTk3D+QEZaSqECYdC0KhIkIvsBBy7hhSJTGCFMgTLqrqAWLmmhbRlggEJbY9kBY1DLCHA5Mit1HpHtif1LtKLEBxroevWLTgBz7WWX4ItRFJs5Fc6B8AEdg/MAkAx6PJyrbFIm04PlcRILJfQbCZCCJxoHzieLwA9RUAuBxQFJSBDLediCigbD1+QDUJkNeXl771xPICEtJsJ+/ukZKV6FAmIhQsqEQsIGwPn36RGWbCoRJdxW1YFETbcoI87Y/Iyya782k2kBiprcVwb0Ic5XZLD9/30GxaUCuDdCkehUIi4RoDpQPzQNhGvNNuouIBsKSesNvgKeAsbaooqws/NvZkSaBsFhkhHk8Hurr68O78iI7E2ZlIuTm5rZQeSdcNiCZnGCzW5URJl1F9M7SI8FXDSYjNOtVgweMFxxucCU1rwc2Yu0I7Jz9XvB7wOECV3I769YABpzJdiBHAL8P/PWAE9wp7axbC/jBmQTOwEfkbwB/XRvrOhrTVe3fpw5MAzgTwZnQ9rrGDw22Cx3utCZ168H4wJEArsR21DXQELjQcaVu5/NsS91WfPZh+Z5s7/MMx/ck+Hl29Huy1efZQt3i9evpDZgEcBkP+Fr72bf/exI8UUp2N9i/fYe/Jzv6PNv5PdE+ou11tY+wL29oICUR0pMd9u8XhX1E7/oZbHwcCsuK7GfjSt7xZ19nu/k1C4S18nvi8/lISoCMFKd9PxHcR/hTE3DhJdtRF9t9hDE46/yQBAlDdwm9Ppr7iNxcSIQUAxXlTbpGah8RqNuxfURJSQkJLujbO9OuP8L7CEdqKiRCIlBbU9U4IHZXPo9o/2ff0e9Jk89T5xFtqBv+fYTf7ycpIXBe1/T4EI7vSVKuTd8I/gAb1q0lM/i3jPR5RE01uKA+EVKTtsr2juC1RnrVd3x9C/z+JZsV1is3O3z7iCI7AUCJC3rmZgfa1o7PPlA/yeUjNQlqqytDr9c+Yjt143cfYf8+UTiPaKWunRH2Zj+o3xJ6vvj/4NV0mHNl83pv9Lbl1QWhsmWP2bLvLmpe939DbHn54lDZqmdt2bdnNK/73lhbXjo3VLbmv7bsqxOa150+0ZZv/jpUVviuLftsSvO6nxxkyzdMD5Vt/MyWfTS5ed0vjrHl694KlRV/Z8ve36N53a9PteX5L4XKyhfasndGNq8741xbvuKpUFnlSlv21lb99GdfZsuXPhIqq91gy17Lbl537rW2/Kd7QmXeclv2arr9Agct+JMtW/CnUJnxheoGsxrAru/VdLv+pl7LtuW1G0JlSx+xZbMva173rf62vHJlqGzFU7ZsxrnN674zMvA9WRgqy3/Jln19avO67+9hy4u/C5Wte8uWfXFM87ofTbblGz8LlW2Ybss+Oah53c+m2PLCd0Nlm7+2ZdMnNq/71Qm2fM1/Q2Wlc23Ze2MpXb/elrkd9nv+arr93geVL7Zl/xvSfL3fXWTLlz0WKqsusGVv9G5ed86Vtnzx/wHgcDjomQEFfy2x5U3Nv8GWLbo9VNZQE/rsgwcpsHVeTbevaSpYV/sI7SMgqvsIn8/H8gfg66vmRW0fMWD1VfTOgvGDG2DzDFu+o33Exo/t46ZdI1vYRwQ1NDTwypUw78blEd9H+NMSIQNOO2tTbPcRtSU4/gL8G3rt2mRA4WjuI7Jz4U5IfQzSa+aFyrWPsDq4jygtLeWmE+F/Z30alX2EIzUVHoDev4CG4vmhul34PKKZKJ1HAPb/N/h5NqXzCCuK+4iGhgbe+C08fsjz4d9HbPgw9DgQb9i0aVn0ziNqq+FE6H8RHNW7yd8MInqt4fryaA7YBWbdEegeGcZ9hL/BziC92QF9zYL27yMCGWE9zRKq/w0HDCoM1dU+IkT7CCua5xEt6NqBMBEJi7JNG+2DBEfUthmVAYlFYiA4Rli0+cqAGmgo+bF1L+hA18ho8KdHb2aunTEFPzc+zhs7PjaNyA51W6lvzThw0ibBrpFRkxz6bqtrpHQXER3qIm2I/Z2zV+PV6+aiwh1WD7va0H7X5Ypyh6pA8kt5mLuCmlKbubXeQFp6Wgu1dyKQwePAfv5h78IpEimmCyovLzeAKS9eb4zfH1rgqzfGW2WMr675C7xV9sffECpr8ATq1nagbrUtb/A1qesNrKOmA3VrAnW9Ter62lG3unldX22grqd9df0Nob9Ps7p1gb9PfTvr+pv83bf3ebalbis++7B8T7b3eYbje1LTjrqt+OxbqPvKKXsZA8YzyN3Gz77935Ply5cbwOT1zAjT92RHn6f2EdpHmKjuI1599VWTkog58rD9o7aP8N78B2MSMGYXzIYPztuq7laf/Zv9jHkJY7Z8v5317vx78v7775ukBMx+++wR8X1E1aEjjAEz75CEmO4jaqY9akwSxtsLU1VZ0eTvE8V9xMo3jEnEmCTM/Tf8Yed1jdE+orFu6/YRV155pUlwYW7/8/XROY+48AxjEjG1p2DmfD9r53WNCcs+ItLnEc1E6TzC1vW3r67OI8K+j7j11ltNUgLmmisvidw+wltn/Ic7jAHzn5F9o7aPMEfsZowL8/2hmKefemyrv08E9hHG2L/Xtbsa48CY32BmfPyf8O0jPBXGv6f9O96Zl9qxfcS6d4x5CVP52miTmoQZNnTQjus2/n20j7B/n/jaRxhjonIe0RgrKi83O9O1xwhzp4X640Kgf+92pqt3byfK7UwI9TVtd93tzFbidIf6xra77nbugjtd4NxO29pSt2l/4vbUdTi3//dxJWGncWlvXccO6m7n82xLXeh43TZ99uH4nmzv84z896Rqix0zyJHgbONn3/7vSTAjrKrObFu/o98TiOBnr33EDutqHwHYjLBaD/hICo3BsIO6dr0d30e4eg8EL7AUfnr/f+QdvaO6LvAGxu9omhHWyu+Jz+ej3ht8b03eSwT2ESbDdmNIqfPHdB9Rs2whKfVQlwDp6RmhBdHcR6TlgAE8YMord14XtI9orNu6fURJSQneBkjP7tN8TBeIzLEkJQ084K6H2jrPzutClziPaCZK5xG27o4+e51H7Lxu+D97v9+//eNDmPcRfmcCLjw4fZ7onUfU1UEDVHohMTmj+bJIXms8uMg+fhychy0E5xnb1jX+wFhSW/Xq2NlnX1OIY4sd3H5NQk7HvieBrpEuPNTUg9dndly38TXaRwBxt48AonQe0bpMa/VNEhFqA1PHOxJdLdQMH80aKd1VsPugyxW9/yfHVVexOstub2hB7Y4r+hvAFwikJGa3eTvRfG8mKxuAVF9s9xG+NXYsl5rU6H2e23ClQeB80FRW7ryutFmwa2SPHj2is8FUexLvboC62p38v4p0IRGdNbIpl12/8Xkju52mau2FdbUfkpKSWqgcJvX1cHBoDKysn2Ztv976DwLjhp3W+nVXroDAsFZVPXvvvG5LXDYI4jQ2qB+r4SFE2kqBMJE45yueTy93BQCOZAXCRDoqeBLodkc36TrnrJMB6L3cs+P/K19F6HE7xgiL6nvLskGJlKZ3l2OhyE4mUpO+nTu80ZKQ3niDuaG6Onbt6KZKAzeDohYISwkM2uyB+tqKndcV6SKiFghzxiAQFsjcrPJDcvJ2slQiISkJbhwGgXHJ875YsP16VavtwO6mDefT6+Y3Js009OvXoWYGs4ycxq5Q5/XSVSgQJhLPGupwTx/PrybYp47E6F24KxAm3VUsMsIA0s6+wP5eDhuWzNt+JU9gBixXcmi68DYIvrdoBMIcPXoBkOQFY2IXDEsq2gxARUZGCzUjyJ0GwVnGNVh+2AUzwnJycqKzwdRAIMwLnlpl+En3ELxREvlAWODYGs1AWL0NhFU0RDEjDKChFg6xDzO/3wLbO2euWmV/pw9t/Xrn2VkDK1Mho0+fjrUxmBHmt4PkKyNMugoFwkTimafM/g5M8BLNrpGOwFgGCoRJdxOrjLCE/Y7BlwMOL5Q/e/f2KwWngm9HNhhENxDmzLUn54ne6M5WubXkIpuxU9I7L2ZtwBUKhFGjjLBwi3pGWGooI8xTp0CYdA/B87lI3wRyBNfv90XvJkm9DbpV+KIdCKuB0UASOKsMLNjOTa5am7VM6sDWr3ehnV16VRL07NmzY21szAiz3bx1Xi9dhQJhIvEsmEZdFXieuZ1BGCMkeMcwlpkeIpEQq4wwHA42jbMntGmfTG+2qLCwkMMOO4wvP5lmCzoYCIvGe3P2st01EupjOx174kabCVA5cFTM2oA7rbFrpNujQFg4+f3+GGSEBQYs9oJXgTDpJqI3Rpg9/riAurrWDYrdYXU2EFbmjWLXSICxN/Bp6aEQPPx8/Pq2dco3ww3A1H9Aa86pjYHFBQD80BCGQFhSLzjsY9aOeh5QRph0HQqEicQzEzhYzQ88z4zeGDjqGindVawywgBSzzgXgP4rayjetKmx/MYbb+Tzzz/nr/feYgtWueD442HOnDatP5rvzd1nEADOOqivKY/49rarZCPOwBBOdaP2iU0bAFzJmMB9CrdHXSPDqbKysvE4FLVAWPBC2gM+BTalm4hWIMwROP64HFBVVdVC7TDx2GNfiTfKGWHDLyI//WzWBO9dzf522zrfroZ1wOylMHduy+usWQur7Q2ezyshNze3Y210JUHeFDxZEwGd10vXoUCYSDwzXigEAsMLxCIQZoxRVph0KzHLCAOyp/4JkwzuCrhkbB/mz5/Pb3/7W1544QUAsoIzZ79dAu+9BxMntu4OckA0u0a6e/W3D2rAU71p55UjZaEdR8WfCRlDdo1NGwAcDkyi3Wcm+zTLYDgFu0WmpKSQkrKdKeAjIbgdD3jro3QhLxJhUQuEBcYIS3BAdbQmD6m3763UE+WMMGDkyJEsCtwIMT8u3rbCpiYTbmze3PIKN88FmxDGbBOGjLCA4OeujDDpKhQIE4lnDR5Y3eT5uCiNj0LzEyUFwqQ7iWVGGOm5VIyyd6svzoZTTz2Vv/3tb42Ls4OBsMImXQ3XvtXq1UczEEZPO1g+VeCr3Bj57W3PotkA1PSAvh2dWauDTIK9+Ev0xa6baHcU9W6REAqEeaFBGWHSTURtsHyXXb+LaGaE2UDYlvooZ4QB++67L3OcgeDbqi2wdXfQskCW8MVnwtFHt7zC+Z/aIHwCLAf69u0blnYGb/4pI0y6ijbtqW677TYcDkezn9GjRzcur6ur44orriA3N5f09HROPfVUNm5sfvJaUFDAcccdR2pqKr179+YPf/hDTAfBFYlr/nooCjyeBOzawfToNmh6oqSDpnQnscwIAygfbzOXJvpg1apVzZY1ZoQNanLRP/+frV53VN9br14YB2DArM+P/Pa2w//zfAA2p0JeXgwHywdMYFbfxAYFwsIp6gPlQ7OukQ3q6irdRNTHCItW10ifDwJJTlvqo58RlpiYSI/9ToJMcDRA3fffN69Qbrs50ruVsz/OmQXA2nQwwPDhw8PSTmWESVfT5j3VuHHj2LBhQ+PPN99807jsd7/7He+88w6vvfYaX375JevXr+eUU05pXN7Q0MBxxx2Hx+NhxowZPPfcczz77LPccsst4Xk3ItImPm9NKBA2DHBE78JdgTDprmKaEQYMvOA6AHputF1HACZPnsy1115L78xApcQmXcA2FNFaUc0Ic7vxBSbXo3D1TqtGin/pMgBWOsPXfaTdkuzfPLnBG9t2dDMxzQhTIEy6kWjNGkmga6Q7Wl0jm8zUW1wX/YwwgINOuAKG2Me3TDmIsWPHctlll/HOtP9BeSDw1KuVgbCFywH40QlZWVlh2/cpI0y6mjYHwtxuN3l5eY0/wRPD8vJy/vWvf/Hggw9y2GGHsffee/PMM88wY8YMvvvuOwA++ugjfv75Z1588UX23HNPjjnmGO68804ee+wxPB5PeN+ZiLRoc1FhKBCWR+PJRTQ4HI7GxzpoSncS64wwx+STMcngqIOTAz0ebrnlFs455xz6BAbcNabJDLGFrRhTJCDaQb76dLufcKxfF5Xtbc2xxo5NtsKdGrPPM8gk2TEck/zKog+nYCAsqhlhTbpGmgaN+SbdQ/Qywuzxx+2A2too/P9Uho6Rm2IUCNtjwgFUDLfHgOsToWLZYp566il+dfpJjTO/v/zpF3i9Ldwo8ZTCT2UAfFINw4YNa3Y+3hHKCJOups17quXLl9OvXz+GDRvG2WefTUFBYPrVH37A6/UyZcqUxrqjR49m0KBBzJw5E4CZM2ey22670adPKGJ91FFHUVFRwU8//bTDbdbX11NRUdHsR0Q6buP6guaBsBhlhGmMMOlOgieBMQucJCbhGGu7Of83GT7/lYMpB+3NrrvuSt8c+3/304Imx9zFG6GhHr/f3+IJbFQzwgBPpg3Y1a5aGZXtNdPQgHOdvchantLKO+0R5Ei2F18KhIVXTLpGNskI83sVCJPuIWqBsJp8APYdFt1AmHFDfUP0u0YG1Z9zFqRCzypYlQYDnJCaCFTa5f9+dzqHHHII6xcssLNCjx+/7WQ4S99rnCDrf3U2EBYuwXMeTYIlXUWb9lSTJk3i2Wef5cMPP+SJJ55g9erVHHjggVRWVlJUVERiYiLZ2dnNXtOnTx+KiuyVdlFRUbMgWHB5cNmO3HvvvWRlZTX+DBw4sC3NFpEdKM5fBcFziF6oa6RIGETtYmBnfnGw/b0KDnnL4P7uPhISEhiU7IRyqKpqEkz53AcLHuWwww5j7NixO83Qjna2W0OPNLvdNWublW/ZsoX999+fk08+ucMn3HV1ddTUhLqnLV68mMMPP5z3H7wNRx2YBFiVHZ4xVDokcPGVbPy6yAijmHSNDF5Ie8Hvq9t5XZEuImrHPmOPUUmu9gfCPvroI0aPHt3Ya+mZZ57hr3/9K3VbD0QPUFVsN+u27y1Wwx70OvEJ+PeFmDwHiRXwfjakJtEYCKtNS2PGjBlMOOwwzAcfwPz5ULA8dLyo3QwP3QEGqvsmUUh4A2E6r5eupk17qmOOOYbTTz+d3XffnaOOOor333+fsrIyXn311Ui1D4Abb7yR8vLyxp+1a9e2/CIRaVFlIKMTN5CEAmEiYRA86QxXd4N2ue4xOD1wYe8B3n8P8vMZcocProfspsMSFYO58ja+/vJLli1bxty5c3e42mCQLDExMXJtb8IMsX070zcUNyv/05/+xIwZM3j77bf5fuuBg3dgXcFqfvGL4+nTpw8TJ05kzZo1NDQ0cNBBBzFm6EA2ffQvStevZbfdduOzzz5j4ZN3AVDXG3LyOsENuEAWUQo2U17CI6YZYV7ApzHCpHuI2qyRwdX72x8IO/dXx/H0QUsZcelkano6OO3qC6n40x+48sort61cYZM1/G7IzMzcdnm0uJLhV//C8fT14ILdSuD3hx7cGAh7afp0xo8fz4biYkqT7Xn1uocn43K5eOSyY2HXfvCMHR/s0cDsw7vvvnv4mtfkBpnO66Ur6NCeKjs7m1GjRrFixQry8vLweDyUlZU1q7Nx48bGmZby8vK2mUUy+HxnszElJSWRmZnZ7EdEOq62aL19EJxJToEwkQ7rFIGw9Dx4ZRP89lz7fG4+THsLRz1QBaMDCWH/dIJxgWNGNXcEkp62e2Hhq4UFt7JX2rc4HJCWlhaNd0HSXhMAyCmubzy/MMYw68v/cft+8IYDHHfeudN1rF27lqtPPYL0q4fxt4Hv4a3exJw5c7jnnnt47fl/81D596yuKKH3UReTOnYQo9z2Yu7owO5wVVrsZ4wEcKbaHXUyUeoOFCdiOlg+4PYqI0y6h6gNlt//KADqPO3fF54zwccBb0HPhZBaDBlVcIcHqp57btsB+DfZyVrqE9jmOjcmjvgjTLDn0L9NAAKn0EP23psZM2Zw6aWXkhBI9BrweQmrfmm4+r0PYJUPvwu+6AV/Dgx7dvzxx4etWU3P6zVOmHQFHQqEVVVVsXLlSvr27cvee+9NQkICn376aePypUuXUlBQwOTJkwE7a9XChQvZtGlTY52PP/6YzMxMxo4d25GmiEg7+DYH/hcVCBMJm2AgLKZdIwGcbjj8JPv453r4dvo2VT7xw3cj7OMrA2PsFhYWbruuuo2w6A6OH/A1xkB6evq2dSIg8yB7wZOxGTafcQb4/SxZsoQ3em7klhlwioG8QNeW7THGcNxxx3H+jE/I/h8MfQLeHWWXFeTnc+B9V7H/MnAGYhFJ5fBuX8hwwq4bbNlbxg4NEWuOVBt8TEGBsHCKyWD5TcYYSvAou0+6h6h1jcweDYDX2/594XkuYAt40uHfk8B/qC2/xeXjk08+aV650A6qVZVkZ1mMucRsOGQ3+/gDOw43KU5ITiY5OZknn3wSz8mD7FX+jzDkv+AohPo0GO+GQzeDD7jjjju2GdKoI5QRJl1Nm/ZU1113HV9++SX5+fnMmDGDk08+GZfLxZlnnklWVhYXXXQR1157LZ9//jk//PADF1xwAZMnT2bfffcF4Mgjj2Ts2LGce+65/Pjjj0yfPp2bb76ZK664IiYzcIjEvTLbJUSBMJHwCX6fY5oRFnRA4Ox+I/Dp19ssPu+CC3g0azTGCVkFcM5gmt3QCqrcYu+IV3psl8hoZYQ5xh5gxy8ERk6fDm+9xdxpLzJ8RqhOSWCWrA0bNnD11VdzyCGHsHq1be+3337L8oUL2X1LqP5+8+Dy3nDEyh/ov6we3PDYWLh4GOCAYflQPDYZVyU09IAxtz7HCSecEJX3uzPBQFiyUSAsnGLSNdLtxrjs/sHtVSBMuoeoBcLcNpDsdmDHd9wyG/PDtfhL5u/0ZcYYVq9ejd9by7jA0NRPu+CiWTBtVCq4YGw9zH7llWav862xk8tsBKZNmxbud9M+hx9jf5cFxvTMTGi2OHffHLgOGA/sAmuHZzChGhbUw9SpU/nVr37F5ZdfHtYmKSNMupo2jfa3bt06zjzzTIqLi+nVqxcHHHAA3333Hb162bPUhx56CKfTyamnnkp9fT1HHXUUjz/+eOPrXS4X7777LpdffjmTJ08mLS2NqVOncscdd4T3XYlIi3w+H66qwJzLwV4aUQyENQ0SKBAm3Umn6BoZlJMDg1KhoAaKtx2L6LgTT+S4f/8b7wEDSfh2HfelwoBnn+Wqq65ir732aqx3yx+v4KETYO0mG4CJVkYYaQOouyKJ5NtssGDBbbfRL2sDNBkr/uPqcsb5fFxyySW89957AIwbN47x48djjOEwwO0DeoBvLwfuTwyPVoAps+OO/byfg7G3fcr+OTlwy5nwzhISFtkUMded13Lq2edF5722JNX+zROBUgXCwiYmXSMBk+TCUeMj0bvjySlEupKoBcJcoUDYolnvYW5/AMfLtdTu8hDr/zmLJH8KZcaw6267NR6Hi4qKePLJJ7ntttt46s/ncUlg4uQp/36D1XvtRaYpgM8OhuWQOHtmkzflgyWLAdiQksWRBx4Y2ffWWrsfA1n3QXngefZWCSVHzYIDCuAPpZA7kYEOBwux5yeROjdRRph0NW0KhL2yVYR8a8nJyTz22GM89thjO6wzePBg3n///bZsVkQiYN26dWQEj1MxyAhreiDWDGjSnXSqQBjAHoOgYMn2lwVmv0q4+2+YQ0+h/2J4Kg+OPvpoZs+ezZAhQ6ioqGDTWnshsCUwKG+0MsJwONkyem8G3DoDbocxixfh7B1YdhKQBh98BX3+8x8WLFjQ+LLa2lpmzJhBEvCaEzuGyl7gfuYbzD4H4doQuFvdG/6SOIxnDw1kzv3jQ1g+EZZshguOhsv/Gp332RrJ9m+e2IEBomVbMckIA0xKAtT4SApkNIp0dVEbLD8pA7CBsGsGzMPxOOCH1JmQctYB9FvkpaEnPHzDA5x08sn88Y9/bDaxm+eV58EHNT1g1Mkng8MBZhD+XRJxLvewR8laGhoacLlcmA2f414QGFTzoBM7z3E9Zw8YCswPPM9Obb7clQSZI7d5WSTbr4ww6WpiPICJiMRKfn5+TANhEDpo6s6RdCedLhC2Xyizi6FbLUsIdKc4+GQcV+4HwCVF8Bibuf322wH46aef6JMFzIXywLCCgwcPjmybm0gZcymMALIgoQFcG8AkAr8aA8dCz/52vNF169Zt89prgV8Edy8H5MKA/XB88gXl+7thL3h9Mph++4de0G8w/LwRSkrg3x/YC6TOIpARlqBAWNh4PB6qApnRUc8IS7HdjBO9vqhuVyRSopYRlmQnTXMYOHgVjYPFA/Rf4MXhhyGb4IM/X8/48eObBcFeAq6wEyeyenBCaB/vcOLY0449trvxs2qVHRes9Nn7YCN4EmHiHTufmCWqErNgdHboeY/YTySnjDDpahQIE4lT+fn5pG8TCGtTkmiHKRAm3VGnC4Sddnqo+/OeQNPMF3eT//kHP4bzhwBw+mZI+9SOhbJo0SJGpQAPwSnPOZnx+uvst99+UWi4lbv7VL6oP4d5h2fT4AZfMqy85mwYYgewH5kHL7zwAsYYhg8fzoQJExpf+2+gtlc6nA8cZMcrZewBNPxvA1fufQavuE/l5ptvbr5Bh8N2Ke1s0uyFjrtBgbBwCWaDORyO6A+CnWoDYSk+BcKke4jarJHB9XuBn+3DWwb3x2x1VXtSRgPl5bbv4B577AFA03DRvLzmwSPHXgcAMKAGCtasASDxnW8BWDKmDz0GDQrfewiHPUaHHg/oE7t2BDQ951FGmHQF0b3qFZFOIz8/n8Y8CGWEiYRNpwuEDdgfrgeWA2dOhgKPzXiC5oEwdyr8axn+4n4439nCNbUl5Ofns3DhQs4vBvzgGDeAyaeeGvW3cMhFL8BFsDY/n/VFRUzad19YdDcAI5qc/x9++OHceOONvPrqq3g8Hurq6kg6fjOsegpydm+s1yO3J39/6j/Rfhsdk2oDNQqEhU8wEJadnR35i/etpdtxjpJ8/oiO2yMSLVHLCBs3zv5eaH8ZB9yxcDH/PmgYvyreQloqsBT2Coy6cc011/Dggw8yffp0kt96A556GobCp4lDOafpescfDa7HSfRC2cKFVO89lrSfApNZnP+byL6n9tjnACAwa/KggTFtSpDL5aKhoUGBMOkSFAgTiVP5+fnkBM+7g4EwpwJhIh0VtYuB1kruBb+8D/Jfgn0ehnGPwQ8/2GUJzWeawpmA87o/wLs3MGILDBo6lLXA7SMCy48/OIoN39bAIUMYOGSIfZJhxz8ZmRdafthhhzFkyBCuv/76UOFnRwXqj6BLS7XZC66GwExp0mGxGigfwJFm0zSTG8Dr9ZKYmBj1NoiEU9SOfRMnQs902GK7NTtGZkNGBp/vejQXvfgi7x4Jxy2FYbXB6hNxOBwcffTRlI3rAWOepjoJVv0nufl6++4HfYF14Fr4PfOfXMD+VeBPgF1/fT2dzqiDYP+/Qgnwy1/GujWA/ewbGhp0Xi9dQic5SxeRaMvPzycjOEZ9DGaNhFDGjA6Y0p10uowwgLE3wLELoOc+cPPNMH48HHEEBLqLNDPpYswo+/CiNEgDslcHlh13bLRa3LJAYGtkk4ywww47bNt6VSvt7/ThUWhUBKVnA+D0KSMsXGI1UD6AI8NOfpCqMd+km4haIMzphClNjl37jAFg7NixAKwIFOcE/q2aduXPTvZCb9hQC7/81RnN15uUS11/e3Ood8EC+Px/ANQMTseZvFXQrDPoewTcfhg8PBmGHxPr1gChbrHKCJOuQIEwkTi1sXAVScFZ24MTwKlrpEiHdcpAWFMjR8LcufDRR5Cauu3ypB5wmA0aXZyVzOV54GgAegF7HR7Vpu5UIBDWOwsyUyAxMZFevXo1r+P3QfWaQP0uHghLywYUCAunmGaEpdsDb7K6uko3EbVZIwFObRL4Of0UAKZOncqQIUNIGmnHzkrwwW9++UuGBLOIAXw2mzan1wAuvfTSbVZb2b8nAD0KVjJ8kw2Uu/efGIE3EAauZDj8UzhyBrg6R0apzuulK1EgTCQO+Xw+EjyFEOxdE6OMsOABMxg4EOkOOn0grBUcp54JQP/1dfwluJ+YnAMpvXb8omhLyISc8cxZm0lWKtx4443b1qktBOMDZxKk9It+G8MpLRCs8SpwEi7BQFhsMsLsLKAKhEl3EdVhAY67EG7A/kw5C4B+/fqxcuVKfn3dLZBtqz121lmwcCFUV9uCBntAy+09gISthwYAKofaIFq/ijp6F9my5GN+FcE30r0oI0y6Eo0RJhKH1q1bx+BcP+QHCjRYvkjYdIdAGJMugN3ugoXgqMAGy2/8e6xbta1j5tJ/zw3ckP4mF1988bbL0wbDL6ugphAcXfzeX3ouAA4/eKvKY9yY7iGWXSNJt4GwpAYor6uL/vZFwixqs0YCpPSFS58D44XU0E0Op9MJSbmQC5RhhwJYtAguvhj++c/GjDDc28mGBur3OAT4nMyN9rlxgmPKaRF8I92LzuulK+niZ4Ui0h75+fkM600oI0xdI0XCplsEwtKHwSOXwDnA5WmwYAbse1asW7Vdffv25YorriApKWn7FdxpkDkquo2KhPRQsMZRXRrDhnQfsewaSYad/CBRGX7STUR9ophh58Hwi7YtT+oJwd3lokX29/BA1/hARhiu7QfCkiadQkPTCRjHD4Dc3LA0Nx4oI0y6EmWEicSh/Px8hucCgVmhY901UoEw6U463ayR7XXwEzDuXEgfCqkDYt0aSclofOisLYtdO7qRmGaEBQNhGvNNuolOc+xLyoWsrcqGDbO/W8gIGzBkFM8OhYs2g/GA4/o/R66d3ZDO66Ur6eJn6SLSHvn5+YxsepKgQJhI2HSLjDAApwt6H6ggWGfhcmECQ9q4atU1MhximxFmD8JuZYRJN9FpAmGJ2wmE9Q2c6DbsPBCWmJjI+im3c/m+Q9ny6Vvwy20H1JcdU0aYdCXKCBOJQ/n5+ZwVHPM6idCeIMqBsGCgQIEw6U66TSBMOp8EwAvu+qpYt6RbiOVg+WRlA+D2KBAm3UNUZ43cGXcaZLuAJsGYXXYLLMuA9BE7nTzlz3++Bf58S2Tb2E3pBrd0JQqEicSh9WtXMTh4DtAjicY+ksoIE+kwBcIkUkyCAwcGlwJhYRHbrpE2C83lgToNli/dQFQHy98ZhwN6ZgKBsRR7pELvIfbxqN/YH4kIZYRJV6KukSJxqId/GUnBgfJ7ZoYWxCgQFgwciHQHCoRJpJgEu89M8tS0UFNaI6ZdI7Ns8M1Zr4ww6R46TddIgF5N/qfHDI9dO+JMMBCmG9zSFXSCPZWIRJPP52PywE12WmmA3CbjJCgjTKTDFAiTSDGJdh+d4FXgpKOMp5yK8hh2jcy023TUQ11tdfS3LxJmnSoQ1r9P6PHwYbFrR5wJfvbKCJOuQF0jReLMptlvc9UyA6sCBT0UCBMJp2AgrFNcDEi3YlLcgIcUj7rSdZR33i1sfryBP78eq4ywnvZ3LXhqK6K/fZEw61SBsH59Q4/3PyB27YgzygiTrkSBMJE4k/7QLThnNSnITQs9ViBMpMOC32dlhEm4+VMTgRqSFQjrMLPxSzJSoLzWTUpKSssvCLdgIKwBTHVp9LcvEmadZrB8AHcK3AxsAM7XzI/Rooww6UoUCBOJM0mr1jQv6J0ReqxAmEiHqWukRExaEgApHk+MG9LFGUNC9WIAlhbnxOZ/NTOr8aGzSoEw6fo6VUZYQy2Mwf4kZLRUW8JEGWHSlXSCPZWIRFPChq3GlhneM/Q4yoGw4MWHDpjSnSgQJpFi0pMBSPZ6Y9ySLq5+M84VHvxzIcOT1XL9SHC7MYn2oauqLDZtEAmjTjNrJEDP/UKPdSyOGmWESVeijDCReFJbhnNTkxka+/SBMX1D44UpI0ykwxQIk0hxZNiu7Ck+X4xb0sVVF8B0cH4LJw6J3d/SJIHDA+5qjREmXV+nyggb+Wvwe2HI2bFuSVxRRph0JZ1gTyUiUbNinh2PxA1fXXstfPopJCWFlscoEBYMHIh0BwqESaQ4MjMBSG0wutDoiJq1EIg9ebKzY9YMk2yPgQk1lTFrg0i4dKpAmDsNxv0R0gbGuiVxRRlh0pV0gj2ViERN4QoAfBmQcNppMG4cOJokhjqVESbSUZo1UiLFGQjapPqhtrZ255Vlx6oLGgNhDbm5MWtGYyCsrjpmbRAJl04VCJOYUEaYdCXaU4nEkYa1tg9kfSoMGTLEFjbNAlPXSJEO06yREinO7B4ApDRAdbWCJ+1Wu74xEObo3TtmzQgFwhTUlK6vU80aKTGRlGB/KyNMugLtqUTiSM2qZQBUJ0OfPn1sobNJRpgCYSIdpq6REinBQFiSD6qqqmLcmi6sej2U24fO/v1j1gx/sj3+JtUrECZdnzLC4lj1WnhvN967+EccDp3XS9egPZVIHPEWFABQm5oQOlFRRphIWCkQJhGTlQNAok8ZYR1SWAB+8DkgcdCgmDXDpNhAWGJ9fczaIBIunWrWSImulDyoWkl2io+RecoIk65BgTCROGI2bQKgNiMlVOiIXUZYMFCgQJh0JwqEScRk9QQgwaOMsA5ZWwjAFjfk9OwZs2b4UxMBSPJ4YtYGkXBRRlgccyZAzp4A7DFIgTDpGrSnEokjrmLbF6Q+JztU2AkywjRrpHQnCoRJxGTboI3bo4ywDllvbwqtc0JOTk7MmmFSbCAs0euNWRtEwkWBsDiX0g+A3pm6wS1dg/ZUInHEXW7HIfH3ygsVNhsjzE00qWukdEeaNVIiJseO7eisUyCs3fw+KKwEIN8PPXr0iFlTTGoyAEleX8zaIBIuCoTFuaReAPTKVEaYdA3aU4nEkYRKe9fZNWBwqLATZIQpECbdiWaNlIjJtoEwRx1UV5bFti1dlacU7LwxzPDFNiOMNDtMQbJPF43S9WnWyDiXHAiEZei8XroG7alE4khChc1USRkxLlQYwzHCFAiT7khdIyVicm3XEwz4S4pi25auqm4LLLEPPzOxzQgjPQ2AJK+OgdL1abD8OKeMMOliFAgTiRO+ykqcgYmpcnafGFqgjDCRsFIgTCImIwcT2E07StbHti1d1dplUAfGCYuA7OzsmDXFkWEDYYkKhEk3oGzoOLehAR6GYVU6r5euQYEwkThRNG+OfeCG3F0mhBY4lREmEk4KhEnEOBwYGzvBXboptm3pqlbYdDBPJqRnZcU0e8WZmQlAks9o0hjp8nw+O9ad2x3d8Walk7j+X/A9jH9BGWHSNSgQJhInShbMAsBkgTM5N7SgafDLGd0LgmCgQIEw6U4UCJNIMqn21C2hojjGLemi8lcBUJUZ426RgCMQCEv0gVczR0oXFwyEJSQkxLglEhML7OCLTq/O66VrUCBMJE7ULF8AQEOmo3nAqxOMEaY74dKdKBAmkdSQZvebSRosv33y1wJQlhrjgfIBd7YNxCX6oK6uLqZtEekoZYTFOU8omO/1emLYEJHWUSBMJE441q0EwJux1QmKxggTCStNIS+R5E+z2RZJVeUxbkkXVWS7lBYnxj4jzBXYfoIXamtrY9oWkY4wxigQFu8WzGl8WF2syVyk89NZukicSN5UCEB9dlrzBRojTCSslBEmkeRLTwIguboyxi3pokrK7C9iHwhzZGQD4PYoECZdW9MxodQ1Mk6N3h0TOO1xFK2KbVtEWkGBMJE4kVVcAoCnf5/mC5QRJhJWCoRJJDUEZhpMq6uJcUu6qPIqADaZ2HeNJMtu3+WD2qqq2LZFpAOC2WCgjLC45XZDun2YsGVdbNsi0goKhInEieyyegDM8BHNF3SCMcIUCJPuRIEwiSR/lh1gPbWuPsYt6aLKbACxyBf7jDDSsxsf1hdr8gPpuppO9qBAWPzyZ9jriOTyzTFuiUjLFAgTiQM+n4/0MhtsSt5z3+YLHTsYOD8KFAiT7kiBMIkk08PO+pvq0SyD7VJuB6Vf7+0EGWFpmRA4BPvKymLaFJGOUEaYAPgzEwFIqyyNcUtEWqZAmEgcWL/wR9yB4Ucy9vvFVkubzNiYkBm1NkEoUKBA2HbMnw/z5sW6FdIOCoRJJDl72u7tKR7tN9ul0l6wF9R3gowwdyrYId/wlpTEti0iHaBAmACY7GQA0mvU1Vs6PwXCRHZg/vz53H///Xg8XX8K4PJv3wPA5ICzz67NF/qqQ4+jHAgLZoQFAwcSsHEj7LMP7LUXrF4d69ZIG2nWSIkkV95AAJLrjW4itJXHA7X2eLOqphMEwlwpkGIfKiNs58rLy3n68fs55ZRTOOussyhR4LBTCXaNdLlcugkUx0yPbAB6eDT5h3R+CtmL7MD48eMByM7O5rLLLotxazrGueBbAOp6ukhxbjUOmK/JXRunukZ2CqtXQ3C8jYULYejQ2LZH2kQZYRJJSQOH2d91UFlZSVZWVoxb1IVsCgzg7IDVVZ2ga6QrpTEjrEGBsB2687Y/k7biLs7ZH279AtaXwrBhw7jrrrti3TQJCGaEKRssvjnz+gKr6WkaqK2tJSUlJSrb9fl8+u5Jm+l2tch2vPDCC42Pf/rppxi2JDxSVi0DoKJ32rYLm2aERZkCYTvQ9E53fn7MmiHto0CYRFLiSHuTxl0B5cqKaZvC5QCYdKjoDF0jXclgexLhr6iIbVs6Kb/fz9333MWhY6F3FtzzS1v+zjvvxLZh0kwwEJaQkBDjlkgsuQM3ano2wIYNG8K34vpiWP8BbKcHyR133EFGRgbTpk0L3/YkLigQJrIdt9xyS+Pj9PT0GLYkPDKL7Owtlf3ytl046HT7u89hUWyRpUDYDjSdPWyrQJjP58Pr9fLyyy8zePBg/vOf/0S3bdIiBcIkoobthXGAowFql/0Q69Z0LYWrAPAFDuudIiPMji2NX2PqbNfPP/9MvReuewp4AKbOhtNGw4IFC1i1alWsmycBwa6RysqJb45ddgcguwLefffd7Veq3bjdgNZOfXwAfHEst1y8N88++2xjcU1NDbfeeit1dXWceOKJGmpF2kSBMJGtzJs3j/wmwYeNGzfGrjFhkrHJThdfO2LctgtT+sLplXDYx1FuVfwGwv7+979z4IEHcu+9925/nJOmXWTWrGl82NDQwCGHHMKAAQM4++yzKSgo4Pzzz6eysjLyjZZWUyBMIiopFX+wN+QKTajRJhvs/rQu0Fsn9hlhKRBIoHHUKCNse2bMmAHAEWuAucA8eCHFicMBb7zxRkzb1lbPPfccU6ZMYd26daFCj6f5Mb+LUtdIAWD8gQAkbIb/PPfMtsuNgekTYdowKGtlj5uK5bBqCQDDEuZx4YUXUldnZ//97rvvGqsluqGoML9DzZf4okCYyFbuuOOOZs+Liopi1JIwqasjsdhemDv3PnT7dRLSwRH93UE8BsLq6uq46qqr+Oabb7jpppu48cYbt61UUxN63CQoO2fOHL799ls2bdrUWObxePjb3/4WwRZLWykQJpFWn2X3na78JTFuSRdTVAhAdRIkJiaSmpoa2/a4khsDYdQqI2x7Zs6cCcAlTbL3kuf5uWQo/O9//4tVs9rl/PPP59NPP+Xkk0+2gaPSUpg8GaZObXuGTCejrpECwMgJmCSbsZxdsKgxU7BR+SKoWQt1GyF92A5XU1dXx9dff83dd9/N+cddAL8FboPzc6BPlmHx4sUArFy5ErBBsEX3Q8a3h4Dft8P1ijSlQJjIVpYsWYLLCReeuCu9Mrt+IMw380McfjBp0GOf42LdnGaCgYJIBMJqazvnjDXTp08HYFRP+NUeMP3D97etFGz7fvtR8+67vPfee9TW1jYe+DOBD9JhyyD47xHw4AN/obS0FICqqqrukRq+5kOYv50gYRegWSMl0qoy7cVmcsHKGLekiykqAKDEZbtFxjxY7XDQ4LZtcNTFbrzOzmzmzJn0A3JLS8HlgoOGAHCTw87u3VVupFWVFPL2SXDtofam1hFHHIH/qadg7lyYPr1Z9ndXpK6RAoDLDSPsDYaT0n0sXbq02WLflkAWc48J4A4NpF9ZWclHH33EzTffzEEHHURWVhYHHXQQN998Mw3ffEsDwHLgPnhrL1i4cCEAK1asAGDPwTAyD9IbCmDz15F+l9JN6CxdpAmv14uzYBlbBsO/vlzEmsPAUb6oS3c9q33vRQAq+kOf/kNi25itBAMFHQ3c1NXVMWPGDE444QTef/997r77bjIzM3n00UdZt25ds66usfbee+8xpQcsqYdXFsOTo9dx1VVXkpiYyEcffYQxBlNtMwP8g6o5+IRfcPzxx3PIIYfw8ssvMyYPFg+Go6sgtwB++Rn8c2wF9913H+eccw4ZGRlcffXV22y3srKSK664omvcQa9aBb84Fs68Dz58ONataTNlhEmkFfW2fSMzVq1tLGtoaGDmzJmNmRmyHRvWA7C+oRN0iwwwgUCYs66mhZrb9+6773LAAQdw5513hrNZnUJxcTFLly5leLBg8GC4+z4ABq2C43KrGzNCouXzzz/noYce4k9/+hNz5sxp9ev8lx3FiW/DAythv+HwxRdfsCE44P/NN8OQIRFpb1jVl8Ciu2DzzG0WBYd5yM7OjnKjpLNxHLwXAMcZeOSRRxq7Mf790Ue49bqp4IWS1zcx7fXX+f3vf88+++xDTk4ORx11FHfffTdff/01Ho+HPn36cPrppzPp0UdZ+vF0zOGZAExcAPPn/gDGcITjZbacBrMuwgbK/gz89YrYvHHpchS2ly6hvraa/952AF8s3MKyisH06tWrxZ+kpKSWV2wMLFwIeXnQuzcrVqzg3Qw/2avt4pS34clfeXj33Xc588wzI/oeI8X1vT1hWZWTzPhOlqESjq6Rr7zySrPPpulMUldffXVjUGjatGn84he/aPd2wmXFihX8Mwscge/YUd/CspLHODUdai89iodroF8t/Apwfv0jz1fCrElwX8FsfjsIzswDx3xsd5phibDUwykzYY+lfyG1Gk7vCyf9/e88+uijjdvcvHkzvXv3BuDxxx9n99135+WXX6a4uJg33niDAQMGdKrspX1rp7P/QhtMeubrdZT89ECb19G3b1/OOOOMmLwvBcIk0tb1HczubCJtVZk9jvn9XHPFb/j7k09x/PHH8/bbb+NyuRrrG2OYN28ew4YNi+8L1VU2EPZjHYzda2yMG2P53U7Aj9NT1+bXbtiwgT9cfirrtnj49ttvueiii+jXr1/4GxkjwfF/9s/Lg6IiGD4cDvgVHHAtjm/W81I1fPP+y4z87a1Rac+Cb79g+W8OY2YVvLYO7rnnHurq6lo+39ycT8a0wHhIBfDaGbl8sKqY/t9+a8smTYpsw8NlwS0w+zGKzZ+5eeH5PPbEvxqPscHxdPv06RPLFkpncNav4fFv6F8A8557mgEvPk3BpWP49arFuPcCPoYeLy0l86nTebDJy4YMGcJBBx3EgQceyEEHHcTIkSObn0dNWoPplYNrC2x85W/4fnskU/67DvKB15us6K+L4eq1MHBgVN6udF0KhEmXUPXbwznvmfmcOwwudq7j34Fzh1HAk5mQNwgeSoGnvg+9pm/PdCbukkklfUnL6btNoKx3Tg6T/vkIPd79DJPixPHsnyle7uGAjYAL2GcUzFzG3rPg02EfQhcLhBlj+MWRBzDtJ3viPydjIONj3KathSMQ9uGHH3LiUPjDIEhywfsb4Y6f4DAXeNLgy8D4wxdccAFbtmwJR7M7pGLNaoYFekAYBziq4ao5cBVA6VaVC2EMMMYD5+8DvAn4wbgcOP79IJx5FeakPXG8u4jhgbd2Yi38d7KdSSc4/s1///tfACZmwhXj4APPAnbdddfIv9mtOIH9B8CaWigobr5s9yR4ZTBkjoP+gevB1b3hwnvaHgQL6t27N1OmTGl/g9tJgTCJtHUjJkLS97hL/LDfbpgfF/Oo8XPZkTBh+rtce+21PPLII4DNmL323FN57b33GTRmL+bMmROf301vDSa/BgfwYTFccc45sW4RAP4EF+DD1Y5A2DM3nsDigzxU58Lu/4VFixZ1iUDYnM8+Y/63X3P+jX/aaVe64ED5/gMOgKOPhtxcu+DfH+Lbb0/cW/wceONtVKS5yTzvSkjM2uG68DeA07Xj5S3xehhx7lHsvhoudcEze8GFc2Hfffdl3jzb3euNN96gb9++7Lfffs1f+s/rSfCEnvd7pZiLAo89iQkkTpzY/nZFi9+H/7l/4XwMckfC995n+frr8zn44IMBGscuDd50kzg2+QwaJlyMa04dH6UBaZD6Nzu0B00mkvykb18uO+EEDjzwQA488EAGDRq08/VmZNMwsRfubzZzXjrUPng9GfnbVjPjx+Oo0piL0jIFwqTzKyujxzOzwAeOZfAv4MnsBDzJblKLaqECWARP5sL4w1P5+Oc67k71M6q8Cud3VZjE9SzoB/wAu5aAPwdW9ob+GyEjMCGko9aPOf929g2cQ1VOTCJj2rf4B/bBme9njx9epaTkoU7TlaI1Fi9ezKTyGTiLwe+GL1JHc0msG7WVcATCJm/8hks3hDKsJgC3ADQAFbBlV3iiEO4vLuamm27i7rvvjtlFoDGGfSvX2THb+jpwfPkTW47ei54ldRgX+JPAtT5Uv3BP6D8fmBf4ATwH7U3iI0/DnnsC4Hh7Phv+eCTZqz/DvQoS5sEpm2Bl/nJ2GbsHYKefH58AMz3gmglT+8LEQ+C6L2CAE97dox8jNm1h3bgsHuh7GI46+NWSubiTGnh5xARqXNve7XY3NLDPmnxyPDV8Mnw09S0MkDthSz6XzZpB0roGzGh4+sx9+bp8ZGBdPh777DVSlvlgWeg1iw8aw7kpE9r8d/7ss88oLCyksLCwza9lw0ew4p8w5g/Qc58Wq/t8Pj755BOGDBnCiBEjcLvdCoRJxDn770n5fpD1OfDdTwS/abt+BJ/sAQf+7W80NDSwzz770O+t3/PY+1t4NA32z5/L0qVLGT16dCybH3FVVVVcd911jB8/nssuu8wWLvoQRy0Yp71B8t6xx8a2kQE2EAYub32bXtfQ0MCvZ82BJZAGPL8//OOFFxhb8SV5DzxGcUk1zx91AX/421MRaHX7GGP47pE/s89NdzOhFpYseppd/rMGxw4yd4MD5Y848ki46KLQgpG7Me1PN3Hsw3eRvAYyL7kZ/00388m+gzjszZWNwbWNGzfy3pM3c9bCt0heUQrHHAJ3vEtVXQMXX3wx//3vf3niiSc46qij6NevH0lJSRhjGvfd06ZN47zzzuP3v/89F677kv6rA9GsBrhgISwYDQ/Pn8+aNWv49G+3svSV5/jberj2pps477zz2GWXXQCo+M+b5AJfDnNyYEUKzi2B8eAOhcI9vKz85j0OOvJ0EhMTw/43D5uKpThfDwRrl8M1p8Dvr72G2d//gNPpVEaYhDhduB7/Gxx+KT3KgDIwbigfDtmBIcMaRg/lrkXL7bh/beA++lD45lUmVUPqRza49sKAZM46aB+q3vuKyyrhrldfZcSIEWF9S9JNmS6ovLzcAKa8vDzWTZEoKHrgFmPAGDemZnSCfdz0Jxdj+mynvBU//lTM7OMdpnhA8/L1z95sjDGm/rKTjAHj7Y+58Q/XxPgv0TZPP/20WTXZvp+FfTC33357rJu0jQsuuMAA5r777mv3OsoHOYwB4+ufbMz+Q41J2f5nvbovBjCzZ88O4ztom02bNpk3+wa+e0f2NMYY8+OPP5rzzznNLPpxrin49nVjHKE2zz3UYRoO2Cv0Pq48eofr/uijj8yXr/zH+AOvf+Ls3YzH4zHGGHPIwQebgj5b/U2SMVW7Jhp/kqN5+en9jTlkTOh5P5cxF+xqzJ+OMuYvU4x54CRjLtnXmNykUJ3Bqcb8/EXzBvn9xnw305i/32DMybtv+5mMchpT9IOt9+v9tl0+yWlM9fp2/Z1PPfVUA5jHHnus7S/+9mxjXsKYWb9uLPL5fNtUmz59utlnn32My+UygBkN5sZhw4zX6zX77ruvAczbb7/drvaLtGTatGnm0oMw5hSMbxLmrYMx0waH/n/y8zBrd8fUjsEYZ6h8znjM888/H+vmR9zDD/7FXHcc5rwDMXvssbt59913jffOk40BU9Ib8/rrr8e6iY2qfpFpDJjZu+W16XVrv/lkm/1m/iCMSQg9XzIIM2fOnAi1vO1uv+k6UzOoSZtTMXNeun67dT0ej0lNTTWAWbRo0TbLfT6f+eVhe5iN+2BMUmidW/pgaiammM2X7WeeG4nxZzf/G23cN9Ukuu1+OysVk51qzw0AM3bsWOMA0wvM//bPMD/uj7lxF8ygFExDmn39+3smmNoJfe15R0/MwGTME7ti/IG/+6Z+GAeYpKQks2XLFpP/02zjD5yXTPvd+cYsXGjM5Zeawr/83mx8HGNewrz+W8wJJ5wQ6T9/x3z3RPPv202Yh8/F3HDDDcYYY6ZOnWoAc++998a4odJpvHebMX2cxp+CeWC3FON0YJZdN8WYS44xZvXq9q3zpw+bX8c5MOccepAxxpiDDz7YAObuu+8O33uQLqm1sSIFwqTTW3eEvYgumoxZV7DamPnPGvPab43563HGXDfZmJWfGFNaaswhI0I7x0P6G/Pq/xmTv9qY528y5ujexpw5xJgXf2vMI9cZ86vxxlxzlDGrZhu/328q8vONd+88Y8DUnbJHaOObNxp/sl3nAyMx33//fSz+BO1yxaUXmoYc2/Z3f3m4KS0tjXWTtnHRRRd16KDlW77Aft5OzPqvA0GHlTOMue00Y2a9ZMzHbxn/CcMavxcXjsA8+uijYXwHbTN//nyzPBiQ+uP+21Zo8BnTJxSYqr3hQGOmf2AvZPtiTNHCFrexaWSaMWDm7Y05+uijjdfrNVcPTG0MJpv3HjFm/ICtglJuY36167bBKHcLweRseyFjwJg0jDl7gjGFy4z5eaYxJ03atv5xPY358F/GpLnt832SjblwgDGuwPKHf2PMF58a85czjFn7Ubv/zsET8vvvv7/tL177tg2Evbe7Me+8Y9YecYA5pmfCNuvae/zuZlhve/F0QT9MgwtT48B8+uKLZuDAgQYwX375Zbvfg8jOzJo1ywBmcE9Mbrr9Hg4dOtR4Lh7dLJi+9Y+3N+bqq66IdfMj7sVz+xmzB8YcjPndofbvUzHR7hu/GJNkGhoaYt3ERhWn5tobH6Nz2/S6NbddaAyY+v6Y+j3Sm33O9YGgjd+JufHKzvF519XVmQ/3su1qSMR4A4Gj1Wf0327977//3gAmOzt7h5+X3+83gEkD8/24HX/vq7Iwn/aj8UZRfQ/MiuEY30hMw56YLw7ATB2JWTka49/Occ8fOEbV9f7/9u47PKoq/+P4ezLpPSEkAaSD9CJFCAoqsCKgouL+sOOuFRGxYEFdja4Ksru6YsGyKu66wOoKugIWlKbSexMQBEKAECCQhJBkkpnz++MmE0YIyaRNIJ/X88zD5Nxz7/3O3MOZO98591zMvz9415hD+41JsD7HCkJKtlv8mNzKanOffvqpWfvoUCtpFo4xv3kd30592Pq8+TfmhiTM9u3bq/x9ryrHXhrq+b7cjbmmB8Zut5u0tDRzxRVXGMB88MEHvg5VapPCfGMK801+fr7ZVdHk18mchcbZzM/dDveeh0lOTjbGGPP222+7k9pxcXF6ePEYMGBA5Y9NLaJEmJwzMtrHWL9sDo8/c0WXy5gl3xmzeU3Fd5aff0pRwR3WaJW8REyAH2bp0qUV334NeqFPY+sELhRjcnN9Hc5p3XPPPQYqPlrtyJtPWCe1iZiCgoLS63W0fnFf1R0zerTvvhR89913Jjuy6CTyn6XE0Su25ETztRutst0bjdm3ulz72PvUH92/VPv7Yfr17WvS6hWdzF/fwqrkcBjzr78a89xIYz77qzEFJ6zyp+62vkQHYMzE3xtzIMWYZ+42Zng3Y/o2N6ZjI2PaxRszuK0xE28xZt9iYzbNN6Z5+Gm/fBgbxrSxG9O7kTH/ftpK9BljzJxPPUapGDBmSHfr/3AVGD16tAHMn/70J+9XPrrR+mLydpQx/nbrfYvFDGpV8nHpcuSY9MEY0wrz/eWYoy2LRgIkYn5/4YUGMDabzWRlZVXJ6xH5rSNHjpiwsDD3SX9ERITZuHGjMQU5xsy6x7hGdzU5d/Y1x564zhT880Fj0lPdX9YfHNDG1+FXq/3795uMTiV9i7MZZlBjjKto1NDrV/X0dYgeMm+yfoRb3yrSq/UOXtvJGDCp3YJM4frPjSseY+phpidZ5yrOon7/9X6tqily72xZ+r1xRhR9FiXfYTZff5H7GDn+3s4U7PvOXdfpdJrHHnvMAGbIkCFn3O5jjz1mQkNDzRefzzIf33C++a4f5qcLMIXNMc4EzLftMUFF/0/e64gxwWX8wFP0KKyPOdgKd2LZZcPkTzvpB5Gv3/L4sWh3W39z9OYLjAGTUzQC/fHHHzepfazPxwO9Tz2HdRYWmh9eu9DsfBUTEVK50fHV7cjvG3m+R9dhundu5U74NW/e3ABm/vz5vg5VznGOh7q72+GnQy8wx44dM8YYc/DgQdOgQQP356Ie5X90797dx0e1apU3V6Q5wqTWCzlkTXh4NL6MSRRtNkgaULmdnWZ+Bv+XpmGmNycozfBOZxgxYgRz586lQ4cOldtXNXI4HLRPSwXA2S0O/+BgH0d0esXzYTgcjjJqnl7+soUAHIvzJ/4ME+7G3nAzPD2FNsdh//79pdarbkcPHCCsaPJ+iubvOkXrhrDcug05cUWTHjct/8T25z3xd/jbh9gPGx5uCYt/+oEEFxAAtj+9aFUKCIBbHjl15RfegdFPWrNH1m9qlT33zpl32BDYfhT+9Sw8/VfY7wAb0CgYnr0RbnsFAqM91xlyPcz9DCY+DXlOuOF2GP2o9X+4CoSFhQGwaNEiJk2ahM1mO+Xh5+d32vIgWx53RAFLMqHQ2p4tA17tZN10YMSIEaz/0zC6fmUt67+jaKc2+L9CWLhiBQBt27YlIiKiSl6PyG/FxsYyf/589u7dy+9+9zscDgdxcXHWwmvexnYNhP5mHdf5Edi3ZdMz/RcKCgoIKGNev7PV4q+mMWJHyd9+u+HroueFsXDiomt9EVapTNHnoL/T6dV6wbusORDT6kfRqPMw+HUvFGSzffKnXOCcQ27ITsIWHaFz2m6MMT6fs9D14Xj8sqEg1kbA+LfIm/clfPYTGAh48GdenjSIrh/MITw8nIsvvti9XlJS0hm3+/LLL/Pyyy9bfwy7BofDweeff87KfXu4csgVXNayHb+79lr8/f3p+tRTTFvwb/oe2ERjRz6064lzXxZ+H/wb28FcCjtFkfHg/Zxo15bbx/2NBx5+musOfwf//hLbyFEE3vhYyY4HjYJ/7KHw5ddwtG1E0+nrICMN/tua0AMwtjH87S8v82K0Vb1w2IhTYvez27n4geW8/cYksnMf54knnmDq1KkMGTKEYcOG0a9fv0q+61UnNMW6I09BgJ2AAieYHvS6+EJWb9jB73//e8Ca97Vz586+DFPqgIAXv6YgYgT5oY25/rEP3eeO8fHx/PLLL+zevdu3AZ6Fgmvp98RqVzN5uaqlEWF1SGGhe0j64r/+wXdxjCka2h6DaRqICQ8Pr9Xz/+zZs8f80rDoV8yHL/Z1OKUaN26cAcy4ceMqtP6hpPrWaMH+cWeu+MNX1nsRjumb1KNC+6oKM55+2voVKxRj0hafvtIL15X84jrv1Yrt6DrrMsfCRv7m55bWaLiCPgHGOEsfNVclChzGbP/OmKzd1bufMrz88suV+mVs56sYU3yZTfH/o3hMQgRm69atJvP804wguMBuLujaxb2NUaNG+fQ9EPkt1939jAHza2vM2rVrfR1OtfnHTda8is4gmzHLvjeuyJLLzZe0q30jVo7dY03rsK1xsFfr5baw5mmc/38dTru84M/WZ8nhBExqampVhFopB7pbc7nuHWCNTD5+/Lh1yX/RsYkvpT+ukUvMnU5jjh+vmm0Nb2+NAGuOebh50edHMMZklz5CeO3atad97bWmrRbmGVcj67Vs69zWOmYDBph//vOfHvE+/PDDvo5UREQjwuQckZqKzQnYIbCjD38Ze2kafFIP+8FCdtngSIvjXHHfNezf8AD3xqdiuzoZGnTyXXy/ceTIEc4vunOwrVt33wZzBkFB1t0I8/O9u1tWscAj2db6Dc47c8Uel2JsYDsOUZkpFdpXVbDvLBqmkAiENTp9paQ+wEwIBppX8M5ukz+D+e2w7yukLdYQNP+7fg9+1dzl+wdA60qOyqwCt99+O2lpaRw9ehRjTQFwysPlcp22/Opt20h8YSscsYaDPdUwmOcy8vBPhxc7w7OXtmVGGhg/mHbLRdw8/yc4BNzcmtd7v8ndd99N/fr1efjhh337Joj8hu3ywfDuYs47AN+tWEHXojvPnkuMMST+sgmAwtbRBPbqj+3LqZi77+KozcF9mZEs7uH9nWirVXAIAAFejgjzP14AwInoxqdfPuT/4E8ziT0E85YupdH111cuzkpwOQupn2rFGzjsRsAaufv9RQ0Y8PkB/hcL6eme64SHh5OcnFwzo6L8/KBoJHGljf8rzBxC4i54JrGo7PI2EF76COFOnTrRtGlT9uzZ41E+c+ZMLrvssqqJqzIOb8J2wHp6qM+lnL9hK2zeTO/uJeeXAwcO5K9//auPAhQR8V6lvhVNnDiR8ePHM3bsWP7+978DkJeXxyOPPMKMGTPIz89n0KBBvPXWWx63001JSWHUqFEsWLCA8PBwRo4cyYQJE9y3OxYp5ty6Gjtg6kHi+X18F0h4JPzrdRg5CtsBiNsK8ztCwDuTse0D89xsbNsOQkS072I8yZG0NEKLEmG0u9CnsZxJpRNh2daJtTOxjERYcDDE2uGIk8a5h3G5XPiVcrv26hS2r+gkNxEIbnD6Sl37w1+AACCuWcV21Oh8+OQjuPpWyAM62eD3Eyu2rbNQfHw8r7zyivcrZmdDvXpQUHRNZKsIXlh+DMYMgbe+4ZY98Lui680Odgriuinfwn/DwAG0bMpFF13E5s2bq+pliFStfteAbTwBx2HHDwvh7rt9HFDVy8jIoF22dam9/cKiH6f63YZt623YMzOZV1BQ+y5ZLkqE+Ttd5V/HGOw5Vv3ChNanr9NpKCYebOmQP+c/4MNE2I6lX3J+UaIr9tqSdtdh8kru97+TFj0GkjNqFJ06dWLfvn3MnTuX/v37+yjaSuo+GPokwE8HiUqzimx33XfGVex2O8uXL2ffvn0cPHiQIUOGAPD9999Xd7Tlsv6Dv9LFBQXBYB8yBD74ANLSaP3KK/zlL3/h559/5vXXX/f55bciIt6ocOZp5cqVvPPOO6dcC/7QQw8xZ84cPv30U6Kiorj//vu57rrr+OmnnwBwOp0MHTqUxMRElixZwoEDB7jtttsICAjgpZdeqtyrkXPO8dULiQLyouG8Ji19G8zv7oXNfWDm+3DXZCI3lSyyHXCQ/ac/EPH3Wb6L7yQntm3DVjQvFM1qz0i136psIiwg2/oF3ZzXouzKiWFwJIu2AS4OHz5MfHx8hfZZGaGZ1jeBwig7/v4hp68U0xUuHQP5hyDy/Irv7He3wLJYmP0y/N9DEHb6UQNyks2bwd8fCgogOhre+gSbnx8kv4n5byuC0qFJplX1xNiHSQwNhYv+ARuega4v+zR0kTLFtcEk2rAdMIRu+sHX0VSL7du30+2I9dze13MkTVRUlA8iKputIomw3CPYcqynoa0uOH2dgHDy2oYQkp5L/bW+Pd6Z334GBgqjbPifVzLfa2KDRrwx/Sv335s3byYjI4OGDRv6Isyq8/rb0O9aOA60i4Ah95e5SkJCgnvQwJEjR4iLi+Pnn3/mwIEDNGhQyg9nNWDhwoVkz5xBFyA9Btr27QsffQQPPgijRjGue+296kBE5EwqlAg7fvw4N998M++99x4vvPCCuzwzM5P333+fadOmuX/J+fDDD2nXrh3Lli2jd+/efPvtt2zZsoXvvvuOhIQEunbtyp///Gcef/xxkpOT3ZNnnyw/P9/ji3JWVtYpdeTc5NiyFoCsCDsJdruPowFiOsMdr8HavfDmLEw0ZHWyEfWDwTX1CxZcPZ/LasOvmDu2Wv/WB8LLGC3lQ5VKhDkc2ItGvdmbl33jAlvDWNicRUu7NWG+LxJhETlWFsURGVp652uzQY/JVbPDLkOsh5RP795w7BikpEDLliWT99dvycGHBpH45DdgwHVlM1r8oeiHm5Z3WA+R2s5mw9U8CvuBY7Q4lkpubi4hIaUk5M9Su1ctJelQ0R+XXefTWMrLFmpdkufvMuVf6cAv7qcJHUu58QpQcGFHQhavpHlquk9vkBC8wxopeyIxiMgz1QsOPvuTYAAXXANLvoLPP4K7XrQuvfRCbGwsF1xwAWvWrKFPnz60b9/e6xD69evH448/7vV6v/XZZ5/xUNEg6e2BcFl0NNxwA1x7LRSdw4mInI0qlAgbPXo0Q4cOZeDAgR6JsNWrV1NQUMDAgQPdZW3btqVJkyYsXbqU3r17s3TpUjp16uRxqeSgQYMYNWoUmzdv5oILTv1la8KECTz33HMVCVXOcn5F8yUcjQwjoYy6Ner1z+CWmdiaNqdg1yrofw9RmYbkwQPZ/LfJjB492qdDxIMP7ATAFWvDLyDaZ3GUpTgRVqG7Ru4vuszQDmGl/SJ+ssYNgN2c54J9+/f7ZH6ciNwTABRG186RCYJ159hWrU4pTnx8LkdDbycy8wT2Rz70QWAilefXthksWUcHG6xfv57evXv7OqQqFbR0DgCuBBt+TWrvaOiT2cPCAfD3Yoqwwn3b8QecIdCgcemjfcOvuQH+upL4DMOy+fNJGjSoktFWTNgha4KpnHqRZ0yEnVM6XWE9KujWW29lzZo17N69u0J3wZs7dy6jRo0iMtK7d9wYw/DrriVl/RcEhiewbNNBXi3KTRb0vKikopJgInKW8zoRNmPGDNasWcPKlStPWZaWlkZgYCDR0dEe5QkJCaSlpbnrnJwEK15evOx0xo8f7zHxcFZWFo3P8MEv546ANOt2zScSfTcs/LRsNug9HIC4hC6YweOwmWw6pRrGjBnD+vXreeONN9yJnpoWkmGddDoiAgiuxXM2VGZEWOGvG/EHTBTEJDQve4X61giwKBes3L/f6/1VhYg8K+HnrBfnk/1LJdj8iHngn76OQqRSbG07Auto4oDpK1eec4mwxKKRR3ktwwmtxZ99J/MrToS5KPeordxffyYCcIRYo4dK3XanKyH+EWzp8Os/p/osERZx1BoNnZeYWEZNKTZ27Fi6d+/Or7/+ijHlHy3ocrm44w5rlHJBQYHX+83OzqZb6hfMDANX7EG+6A7+q8DY4LLXpnm9PRGR2sqrRNjevXsZO3Ys8+bNIzg4uLpiOkVQUJDPEgriW8FH8gCwty3HiB9f8bNje/4RzM9/47ojlzFl7Wz+8Y9/sGXLFj777DMSfXDiF5aVAUBBVAg19z/Ve8WXQlckEXZi63oigYIIiI6JKXuFOCsRFl5gXRrpC2H5RdcXlDW5v4hIdejUG/iY6ExO+4Pm2a7hIWuCMEe75oT6OJbysodbI4T9nZCbm1uuRJhj768A5AXbCDnTtBERrXG2sWNPdxK18rsqibciwjOtz3hn00rMe1nH2Gw2+vbtS9++fb1a7+REmDcJtGLHjh5lbAqQDn7AtcXxXJhAQMMmZ1hTROTs4tVF66tXryY9PZ1u3brh7++Pv78/ixYtYvLkyfj7+5OQkIDD4eDYsWMe6x08eNCdDEhMTOTgwYOnLC9eJlLMkZlJwDHrQzzukmt8G0xZ2jyI7dr99B/1BXPmzCEqKoolS5bQo0cPVq1aVePhRORYk2cV1PJL8MLDrV/Cs7OzvV63YNd2AHLDrDsulSneGtsf4vBNIszldBKSaz23Nz710jsRkWrX1ZrD0v847Fyx1MfBVK2cnBzij1k/NgT1utjH0ZSff4R16Zq9KBFWHq6D+wDIDS7js89mw9nduqtku0OHTzk/rxHGEFR0LuffvvbexfpccfK0HBVJhOUtX05E0R0+TZcASAB6BMH7/6miCEVEagevEmEDBgxg48aNrFu3zv3o0aMHN998s/t5QECAx+1+t23bRkpKCklJSQAkJSWxceNG0tPT3XXmzZtHZGSk15NB5jhyPDp5h9NBjiOH/ML8U+rlOHJwmZI78hQ4C8hx5JBXmFfhuicKTpDjyMHpKpnYodBVSI4jh9yC3ArXzS3IJceRQ6Gr0F3mdDm9rnui4IRH3bzCPHIcORQ4CypU12Vc7vfnZPmF+eQ4cnA4HRWqa4xx1z35eG778l+c8If8cGjY5+oz1vXm2FdFOznleAZGUeAXRI4jh0sHXsqKFSto27Yt+/bt4+LLLuYf//yHV8e+su3Ez2HVLSwaBVXZdlLa8axsO4mNjYVAyMtLK7Pub4+92ZcCwNEw+xmPp7udJDTGYYfCAjh45OBp61ZnH5G5bx/+Tij0A1fT1uojqqCPKP6/7E1dn/URXtat7j7it8eztvYRJx9Pb+p6c+zr1HlETGNOFF2Z3ThjB1lZWedMH7F++Q+YHDBAyKXXAGdHH+EKtsau2Z2QfSK7XO3EduQwhX5wODygzD4icMAgCv2gfi58M/d/p61bnX1E3uEd+B0Flw38u12kPsLLut72EScKTsBJVwV720c4ln5Frj+4OoFt1VHY9jO5Pxwgp3WPs76P0HmERecR5at7rvYRVXHsz5bvGmUylXTJJZeYsWPHuv++9957TZMmTcz8+fPNqlWrTFJSkklKSnIvLywsNB07djSXX365Wbdunfn6669N/fr1zfjx48u9z8zMTAMYnsCkH093l7+w6AVDMubOL+70qB/6YqghGbPr6C532atLXzUkY2767CaPunGT4gzJmE0HN7nL3l31riEZM2z6MI+6TV9takjGrEhd4S77eP3HhmTMwH8O9Kjb/s32hmTMgl0L3GWzfp5lSMb0eb+PR90e7/YwJGNmb5vtLvt2x7eGZEyXKV086l7y4SWGZMwnmz5xl/2450dDMqbV5FYedYf8e4ghGfPh2g/dZWsPrDUkYxr+raFH3es/ud6QjHlj+Rvusu2HtxuSMVETojzqjpw10pCMmfTjJHdZamaqIRnj/7y/R937Zt9nSMY8u+BZd9nR3KOGZAzJGEehw11+87gWhmTMQ9eXNFNHocNd92juUXf5swueNSRj7pt9n8f+/J/3NyRjUjNT3WWTfpxkSMaMnDXSo27UhChDMmb74e3usjeWv2FIxlz/yfUedRv+raEhGbP2wFp32YdrPzQkY4b8e4gxxmqnV155pWGMFe+Nj91oCgsLjTHGfLLpE0My5pIPL/HYbpcpXQzJmG93fOsum71ttiEZ0+PdHh51+7zfx5CMmfXzLHfZgl0LDMmYtg9gDJiDL1xrjDFm4D8HGpIxH6//2F13ReoKQzKm6atNPbY7bPowQzLm3VXvuss2HdxkSMbETYrzqHvTZzcZkjGvLn3VXbbr6C5DMib0xVCPund+cachGfPCohfcZctWfOE+nsblcpeP/WqsIRnz5HdPusuO5x931z2ef9wc6d/KGDD33h5gSMaM/Wqsx/6K67r7iLXfmRf6WmWNRzf2qFsTfcSu7783BszH3VAfYaqmjxj3zThDMmbcN+PcZWdTH1Gs1eRWhmTMj3t+dJdVdx/R/s32HnVrax+Rfjy9pI84SXn6iGJPfvdk+foIU0fOIx70MwbMq80wCxYsOOf6iPx6GFNgHf+zoo/47mljwLjAvDTnxXL1EUcGJZhP2lO+PmLfN2Z2V6vueY9Ge9StiT5i/9y3jAGzMRH1EUWqu4+gGQYwBw8erFgfcS/m6MCS/3fnWh+h8widRxhTt/uIc/27RnGuKDMz05yJX/lTZuXz6quvcuWVVzJ8+HD69etHYmIiM2fOdC+32+3Mnj0bu91OUlISt9xyC7fddhvPP/98VYciZzm/o9Y90AtDfXO778qKjIzk888/JybWmr9q+vTpDB06lKNHj1b7vm1FPyDYa/l8HJHh8e7nhbkZXq1rP3IMAEdgOdtHgxbup7ZC7yeQraycX605XWr1pG0icu4LtKaHbe93bs4Tlt/QDv5hvg6j/IJDAGsQjyMv78x1iwRk5ZRdqVhcL0x966k54f00BJWVv3EJAI7wGt91nWcqcGlksSP1QqowEhGR2sdmKtNL+khWVhZRUVHsP7SfxHqJ7uvhHU4HBc4C/P38CfIvmVy/eOhcSEAIfjYr91fgLMDhdGD3sxPsH1yhuicKTmCMIdg/GLufNU9DoauQ/MJ8/Gx+hASEVKhubkEuLuMiyD8Ifz/rhNXpcpJXmOdVXZvNRmhAyXSxeYV5OF1OAu2BBNgDvK7rMi73UMiwwJKTzPzCfApdhQTYAwi0B3pd1xjjHgoZGhDqPp5bOgbRdLuDrGvPp8F/tp2xrjfHvirayemO55nqfvrfT7n3znvJO5FH69at+WzWZ7Ro3aLU41mZdpKReZjQxETCHJC7bBohvW6sdDsp7XhWqp0UFOCqF0tu3nGYAK6b1hLRoGu520luk1BCU/NYeOv59PzHmlKPp7udOAvJDwqg0A9GJXXkn4s2VujYV7SdbEh+igv+/DKFLSF/Qzp+AeHqIyrZRxT/X/ambm3tI7w99pVtJ789nrWyj/jN8axsOymzjyhH3XPiPOLJIYT8bTG/NIGnev2ej/790TnRR2y9KIEmqw6RdVk9Gnxz2ON41uo+ImUe9tbWlOQ/fj2XCy7rV2Y7yesSiP+mAhZc252Lp/9QZh/huDqGgq+PMScOevy4kxYtWpTZTqqqj9h//0U0fXslh5oFELrt6CnHU31E1fcR4SHh4IIDBw4QFx/nVR+RFR9KcKaDTXc3pOeb+yp17GtbH6HzCM9jr/OIuttHnOvfNfJy8oiKiiIzM5PIyEhKc1Ynwsp6cXL2ysnJwdE6nJgDkPPSjYSNP/tv2bxu3TqGDRtGSkoKERERTJs2jSuvvLLK95O6Zg3nde8OgDmyDVts7R0VZi6+GNtPP8FdcOSROdRrM6Tc6xZG2PE/7mLhYwO59OV55dtfpB+2bMO4pOb8dcmvFQ27QtbeeT0XvP8ZhV1s+K9zlb2CiEh1eO8+uHsKmfWga0Qzdu3a5euIqsTxxgGEpxay7d5etJmyzNfhlF/aYmhwCQCL//tf+g0fXuYqzpZ+2H81LLzzWi59b2aZ9RnfFyb+yM5E+C75be65557KRl1uB4Y1osH/9rP7ghiarfFu5LdUjN1ux+VysX//fho0aFD+FfPyIMT6cvvtU124/IV11ROgiEg1Km+uqMovjRSpCmtWrybymPU8rNdlPo2lqnTt2pVVq1bRr18/srOzufrqq3nppZcqNXT9dHK2r7OehIMtqmmVbruq2YpuosEecBw/UP4V8/LwP24lk0zT1uVezURYv37EFXpxWUkV8U9PBSA/1L/G9y0i4tbB+qEkIhNSdu/m8OHDPg6oCuTnE5ZmTdJ7oms/HwfjpeB6UHSF/7wvvuCpp56ioOAMl+8787FnWOcNBU3blG8fXfoA0CgXvv3228pE67XQg8cAyI6PP3NFqTLFI1O8Pr/cu9f6NwiORyRWcVQiIrWLEmFSK21Z+B32XKxJM7pc6uNoqk79+vX57rvvuO+++zDG8NRTT3HDDTeQk1N1iZnCXdYlf84IwB505sq+1rGj9e9eKMzxIhG2f7/1byCENe9U7tVckdbQ53qu8s3DUpUCi+a8ywuv5cdERM5t53eDAPArhCbAqlWrfB1R5S38FFshmHAI6PV7X0fjnaB6EG09nf+vf/HSSy8xe/bs0usf2wPHrKf21l3Lt4+eQwEIzoTl87/D6XSWsULVCTlifd7mNW5VY/us64oTYV7bvdv6Nw5MiBcjyUREzkJKhEmt5Fpl/WJp6gGx59bJU0BAAG+++SbvvPMOAQEBfPLJJ1x88cXs2bOnSrZv27cDgPwwe5Vsr1p16GD9uw9cuYfKvZopfq9iIbZR+/KvF2kN+Y/15ta6VSQkKxOA/AjNGCwiPhR+HiRYTzueIxPmF858G4ADjaBx89o7HcBpBcZAPetpe+u3mjMnJ3esB8AVCNGtynl+1LwPpujqkN72rJpLfjrzCDhijd4O6NynZvYpbl6PCCs+t6oPhDaq8nhERGoTJcKkVopN3QJAYcMwqOgvW7Xc3Xffzfz584mPj2fdunX06NGDxYsXV3q7gfutoe15EWfB3TbbtcMAZIFfWmq5Vzu8aiEArhho0ubCcq/niraSUDHOwvLHuG8O/PIOZG4t/zqnEZxjTb7piImt1HZERColqB40sj5XZ7pg1YoVPg6okgpOYP/fUgDm2gKJiorycUBesgfhaGKdjidHQSDw66+lz2FpdmwCID8K4urXL98+/PyxNbcyYb+LrbnLIx2/LMZWdKPshAHX1Mg+pRKXRu4qandxEBDRpIqjEhGpXZQIk1onIyODZkW3+La1Prc/iC+++GJWrVpFt27dOHz4MAMGDGDKlCmV2mb4HisRltUgugoirGZhYWRFWSPX/HeWf0Rc5nprIuTcCD8Cg7y4xXeM9QUp2pvLQnb+A1beCwfnl3+d0wjNsUahFcSdV6ntiIhUis0POll9YQDQavHiKp+rskZNfw5bmgtXEHwW283X0VRI/rAuEASNDsG8VjBjxgz+97//nbZuwWor6Xc4zJpuodzaNAOghz/Mm1e+G8xUVuac6QDkxkBiu3Y1sk+pRCLsV+sO7c56EBzduKrDEhGpVZQIk0pzuVwsXryY++67j65du1Y6kbNq1SpaFU2Z5d+zRxVEWLs1btyYH374gZtuuonCwkLuu+8+7r33XhyOil2+F7k/C4BjzZpVYZTV51h9a5RW/sYd5X7Nzp83A5AbG1pGzd+oZ11/EllocLnKeefGE9btwyt7mUBIppV8czXvUKntiIhU2uCW0AZ+iIXJWVnsL5538Sxk3voHAMsbwrUj/+DjaCom4sapFI5pCEDfVKgfCMOGDeO11147pa5rvTVifndEICEhXvwQ1Ks3AOcfhaVLl5KdnV35wMtgVlpJu0PxwRWft0q8VuFEWKtEaAdHYyAmNq4aIhMRqT2UCJMKMcawbNkyHnroIRo3bswll1zClClTWL9+PU888USFkzgAq5ctIya96I+LLq+agGu50NBQPv74YyZNmoTNZuOdd95hwIABrFq1ittuu43HH3+cXbt2sX79ejZs2FDqdk6kpRGSbl32F5j0u5oKv1Kyz7MmqwlMyeT2228/Y11jDDt27CBs30HA+xGDfvWtfUUUQn5+fvlWenYjPA44Y7za18n2b95s3fwBiE2qG21aRGqx+s3hGdg0Ip5Czt55wo7/9D9syzMASM4N5tZbb/VxRBUU0xn/l/dCYgC2PHim6KPtwQcfpH///qSmppKbm8tNN90EW6yk5a8xXowGA7jqZgAi0qBBYSELFy6swhdweqG/WCPUDzXWxOs1qcKJsHsuh6fh14YQHR1d9YGJiNQiSoRJuRljWLNmDY8//jgtWrQgKSmJv//97+zfv5+oqChuv+UmAv0hKyuLGTNmVHw/8+fg5wDCgR5Dqiz+2s5ms/Hoo48yZ84coqKi+PHHH+nVsyfdt/8L+9xJXNWpBStu6MoPd3Zh6lsvnnYbS557HJsLSIA2195bsy+ggvJatgYgIRNWzp/O7uK7FmGNNvz+++/dSavnnnuO1q1bUz/TSvYFd+/t1b78Eq3LEkMLIDc3t+wVjmXAijxIBT77wat9nWzxKy9YT+pBXLu+Fd6OiEiVaDgYgN8V3bj3bEyEbd26lV/vuBaA9JZwvEU370ZI1TZ+fnCVNeflffEJtG/XlofbwePOBcz5v0784dJ+rJs1neA0AzbY387LyedbXgTtrakIJsWe4fJIlxOcVXBnZWMI3WnNjZnVsWfltyflVtHRd64ca67W/UeVCBORc5+/rwMQOHToENdee62vwzhFcHAwycnJxMTEMGPGDP7zn//wyy+/uJeHhYUxbNgwRowYwRURoQReewXvJ8IdsfCHkSPZ99McHo48TlB+MDz9GsSXb26k3rusOyIV9AgnILjuTSw+ePBgVqxYwdNX/Y5/BqcQvNwqfwmgaL52x4mnOdy9EztJYOXKldx1110EBgbSbNnXABxvG0x4WEOfxO8t2+CRmA/mYN8Jn/4BbrhhBIsX/0BgYCCvvvoq48aN46677mLKm2+wZ9ZzPNoTglYCNgjrd71X+/JLtH5mD8qD7LxynOhvWlryvFVH99Ml38/m53fuodHlN3LFnX91l6emppKSkkKfPn04cOAAU6dO5f777ydhxXcAOFoGExgQ5lXMIiJVLr4fAM2jMgj0PzsTYbsevZHB26xL3Cf7Qdu2bX0cURW47SF47yf8lhxkU6MMbPuAnwGOcQ+rcBXddPhEE7jgmj96t20/O9w5AB7+luHZ8PevvgKgsLCQBQsWcOmll+LYvprQmwZiK8jFvPcKtovGemyiuG7Dhg0JCwtjzpw5NG3SmL0bZnPjTXcT3bQbm7ZsoUWLFoRuW4FfJpgACBx8lo7UO8t5OyIsK7wvf3wVMnJg6MvR1ROUiEhtYc5CmZmZBjCZmZm+DqVKpKamGuCseAQHB5vrr7/efPrppyYnJ6fkRfRobQxYj1BMYQAlf4NxtQg05vDWMt+Lfamp5niYtU7BX66sxne9lrvnHuOy4fGeGjDOaD/j8it6fxrZTKcYf9MzGDM4BtM1EuMKtJZtfqq/r19BublcLrM76QLrdUZjxnTDDBgwwLz99tvudhfjj8loHejZpjr6G+N0eLezZV9a64Zi3n57ilk8YZwx44YY89Sdxkx4yZgZ041xOo35apYxv+9pzBXh1v7aB3ls5pf2dqu8LWbO63cYh8Nhdu/ebTp06GAAM3v2bNO9e3djs2HuOh+TH2XFnD3+oqp740REKsrlMmbjn832ha+ZADsmJibGuFwuX0dVfotnGpfd6le3dMVcdtmlZufOnb6OqvJcLmPaR5R81gVgHO2ijKMRxpx0TpByb9eKbf/IWuNqam1jXjwmLCzM/TnbqVMns7Otzb0PZxvMv17+o/noo49Menq6+emnn0y/fv0MVm7L3FMP80VjzIkmJbE5QjGPxWMaNmxojj16vTFgMptjMjIyqvZ9kjMKDQ01gPn111+9Wm/nzp0GrHYhInK2Km+uyGbM2XeroKysLKKiosjMzCQyMtLX4VRabm4uX3/9ta/D8JCRkcFDDz1EdnY2AQEBDB48mBEjRnDVVVcRERHhWXnfPjjv9KO9CuLBPwNshWBibdi6NIIpH0Cb089fteylp+j91EsQBPyyCBr3q+JXdpb485/hmWegB3Ad8MgxOJ4HsQnMfPohrn797/hnnX7V4+fB8vfnMuDywTUYcCVlZmK6t8S28wgmGl4MhGfTwQX0DoWv60HU3pPq+wP/+TNc97R3+8k6gomOw2Zg3fnQdfupVQrq++N/pNC6xLTI0aRAnuzyR15++WVMQQERDeOsy3cBR3PYewKCYmFvDIQchuZ+UBAKkfsg0JrODGck2LZuwq+BJssXkdrB4XAQERGBw+Fgx44dtGzZ0tchlSn/2BH8OsYTsM/FkTZwfM4WmrY8h+5IuG8X/H0cxNeHWx+BxNZ8+MF7RGz+jgF7lhPdvRW2R2ZCYAXPf9+/Ge6cBsAXPeCaVVbxRTHwQxbYTrqpsjMGXgmHp1PBYaBRLPQIhKlOiD5Uyvb9YXIvGJYCTffC5r4RdFhcygmLVIvw8HBycnLYuXMnLVq0KPd6a9asoXv37jRq1IjU1NRqjFBEpPqUN1ekRJiUKiMjg59++om+ffueea6AV/4Ej7wArW3w55fgi3/BJRfhHDaGf3yxhPXJ43jz4HFsRS3N1TOAn99dTPsuvU6Zx+DABQk0WJdO9kVBRPxYBXNUnK2OHYNFL0HUz3DhOxBacpljWloaf+jUgP+5ICADCLB+zrUVgDMcrgqCV3/aSps2bXwVfcXs2YW5tAu23dadrFLDrHxo/aI7iJoQeCgebr3jebrfeAO0al2h3TjibQSedAKf3RZC4oquE18JnGYO/fUXQ9cfITk5maviw+h236NWTDbc7bo0xg6Z7aM59tQEmo04O+ZtE5G6o1evXqxYsYLp06dzww03+DqcMu0a2Yfm/1yKKxw6AFuyXLojoTdcTo4MTaTe14fBBnPb2XjzmOEDf0hIgew2gTyXG8fE9P34F52GGRs4A8G/HlB0g1ETCunnw9Zg+CS0JwHREYxbM5/zdgPBQNG66yffT5cxr9f866zDIiIiOH78uNfJ7fnz5zNgwAA6dOjApk2bqjFCEZHqU95ckeYIk1LFxsZy1VVXlV3xm5+sf6/oCyOesB6AHbjnnk4cGzGCt8beTfxP/+X6Xw1+KwtYNjaJjothYCSMvzqSzRH1aNvrTwzcYN0u8uf+/bmwml7XWSE6GoZNOu2ixMREBjz+F55JSyX5lkEEte6JrcCQMu01+jzyIhnOEBo3blyz8VaFps2xrdsBD10A0/ZzXlECDDuYDmF8ceX/0a5JL7rfc0+ldrO5TX0uOGRlwqY1g6mNBxIdEURMRCBtWpxg8N4tHI+P5nhOPS6bvRCAwpb14cdDfPPNNwxtaH3hyu9g5+jjrxD930nkhLjIOhBA0IEMXAmBLDqcRbOIBLrfeDXBt4wnun5joisVtYhI9ejRowcrVqxg5cqVtT8RlpNNky+WATCzKTz1xL+UBPOWn52ImSlkXNmZ2Pk7GLLF4L4tUSBETJnMXy+7h0lPP8KgBa/TeV0BthPgn4+VBPMDV2cYmQ/Nrnyahx9+mEtiiu6qfOwYdGgK+60RYKaVP11Gv1rzr7GOq+hdI48ePQpoonwRqRuUCJPKu/l2CAyD+/5y2sXR0dGM/ugTtm/fzrprL+KCLYf540YYGA9N04GPs+jfNYvNq/+IzQWFLcE+dHyNvoSzzbhx404pa3LfC3zcfiB2u53Q0FAfRFUFouLh3e1w3TPwn+nQIAhufgRbp3u5xq9ququ0x97nqR1Xs+oojHn9S7698srTVzx+HC6MAb9Cmox+Hj4axYoVK4hoEQRAdqt6JN76ANz6AMFAvZNWvblKIhURqX49e1p39DsrJsyfeC/2TIMzFs6bPJfe/c+iKQBqkcCQEGLnbYMHu8CsTXAACLDBS8Ph0rsBeOyFvwF/g+OpsPBPkLITjofB/z1Lhl8QN2zcz8CBAwkKCirZcHQ0fDoXBlwKBYXYXnwequizW6rfsWPHACXCRKRu0KWRUrMOHMDVpTl+h4quP7NhXdd3kq8vttH/+zwCAwNrPDypGw4ePMjRo0fLvstYzh44sgpz3rXUj4/nyJEjZEdAeDZkvDyM2Mc+r5F4RUSqy+bNm+nYsSNhYWFkZmZit9t9HZJbfn4+I0aMoGnTpjzxwP3Ed2uDPcvw384wcNFRfWGvLFcBHFkBQe0hOAyq6rzrwAHw94f69atme+KVqKgosrKy2L59O61bl38aib/97W+MGzeOW265hX/961/VGKGISPUpb67IrwZjEoEGDfDbsgfzzM24bj+fgqkD2ff8w+7FrgawZcBYJcGkWiUkJJSdBAMIawpNhmPz86N9+/Y0D7CSYNggYMCwao9TRKS6tW3blrCwMHJycti6dauvw/Ew79tvaHrgC7LmTmbBgPOxZxmccfAXV2slwaqCXwDUvwgiY6ouCQbQoIGSYD5U0UsjNSJMROoSjVeWmheXgO25j7FhZWIbAUQnkvu/D1h22aU8+MTffBqeyOm0a9eOO0/8AKuhsBGEdbjG1yGJiFSa3W6ne/fuLF68mJUrV9KhQ+25s23MP8fy2mrgpDsZzu8QyeSXNVpFpDSaI0xEpGwaESa1w5hHCZn3M5c9OQU/PzVLqX3at2/PwKKpUNYEBeIXHOPbgEREqkiPHj2AWjZP2I5t9Plyt0cSzHQJ4Xff7qNXr14+C0ukttOIMBGRsinjICJSDu3bt+foWuv5T8GNfBuMiEgVqnUT5hcWYm64Gls+OFrBnDceIftfo7At3AKB4b6OTqRWq2wiLCZGP/SJyLlPl0aKiJRD+5Yticu1nm9v2Mq3wYiIVKHiRNj69etxOBy+n6dz4kRsq7dDCEzrDDfe+aLn3QlFpMppRJiI1CUaESYiUg4N4+J4Fvg30HbIEF+HIyJSZVq0aEFMTAwOh4ONGzf6Ohw4csT6dySscsQpCSbiBc0RJiJSNiXCRETKwRYZyfUrV7L+0Ue56+67fR2OiEiVsdlstWuesFdeoeDPgXAx5Aa19HU0ImcVzREmIlI2JcJERMqpR48eTJo0idDQUF+HIiJSpWrVPGEnUgho4cDhhOD6nXwdjchZRXOEiYiUTYkwERERkTquOBG2atUqH0cChDbhhv9czICXoMsFPX0djchZpSKJMIfDwYkTJwCNCBORukGJMBEREZE6rjgRtnnzZvcXYp+x2Vi+IZUft1l37BWR8qtIIiwzM9P9PDIysspjEhGpbZQIExEREanjGjZsSGJiIk6nk7Vr1/o0FqfTSWpqKgBNmzb1aSwidUHxRPmRkZHY7XYfRyMiUv2UCBMRERGp42w2W62ZJ+zYsWMUFhYCkJiY6NNYRM42FRkRponyRaSuUSJMRERERGpNIiw7OxuA4OBgAgICfBqLyNmmMokwTZQvInWFEmEiIiIiUmsmzC9OhEVERPg0DpGzkUaEiYiUTYkwEREREaFHjx4AbN++3f3F2BeUCBOpuIokwornCFMiTETqCiXCRERERIS4uDiaNWsGwOrVq30WR1ZWFqC714lUhkaEiYiUTokwEREREQFqxzxhGhEmUnHFI8K8oTnCRKSuUSJMRERERAAlwkTOdpojTESkbEqEiYiIiAhQOybML06E6dJIEe9pjjARkbIpESYiIiIiAHTr1g2bzUZKSgrp6ek+iaF4jjCNCBPxnkaEiYiUTYkwEREREQGsUVht2rQBfHd5pC6NFKm4yiTCNEeYiNQVSoSJiIiIiJuv5wlTIkyk8jQiTESkdEqEiYiIiIibr+cJK740UnOEiXivMneNVCJMROoKJcJERERExK1Hjx6ANSLMm1ElVUUjwkQqzttLI40xmixfROocJcJERERExK1r1674+/uTnp7O3r17a3z/SoSJVJy3ibC8vDwcDgegRJiI1B1KhImIiIiIW0hICB07dgR8M09YcSJMl0aKeM/bRFjxZZF+fn5KPotInaFEmIiIiIh48OWE+cWJsPDw8Brft8i5wttEWHR0dIXmFxMRORspESYiIiIiHornCfPFhPmFhYUABAQE1Pi+Rc523o4I0/xgIlIXKREmIiIiIh5OvnOky+Wq0X07nU4A7HZ7je5X5Fzg7agu3TFSROoiJcJERERExEPHjh0JDg4mMzOTHTt21Oi+ixNvSoSJeK+ic4TFxMRUV0giIrWOEmEiIiIi4iEgIICuXbsCNT9PWPGIMD8/naaKeKuiiTCNCBORukRnGCIiIiJyipMvj6xJGhEmUnGaI0xEpGxKhImIiIjIKYonzNeIMJGzj0aEiYiUTmcYIiIiInKK4hFha9ascd/JsSZosnyRitMcYSIiZVMiTERERERO0aZNGyIiIsjNzWXLli01tt/iSyM1IkzEe7prpIhI2XSGISIiIiKn8PPzo3v37kDNXh6pEWEiFafJ8kVEyqZEmIiIiIicVvE8YTU5Yb4myxepOE2WLyJSNiXCREREROS0iucJ88WIMF0aKeI9jQgTESmbzjBERERE5LSKE2EbNmwgPz+/RvapSyNFKk+T5YuIlE6JMBERERE5rWbNmlGvXj0KCgpo06YNCxcurPZ9arJ8kYrzZkSYMUYjwkSkTvLqDGPKlCl07tyZyMhIIiMjSUpK4quvvnIvz8vLY/To0dSrV4/w8HCGDx/OwYMHPbaRkpLC0KFDCQ0NJT4+nkcffbRGb8ktIiIiIuVjs9nc84Tt2bOHyy67jJSUlGrdp0aEiVScN3eNPH78uPv/mxJhIlKXeJUIO++885g4cSKrV69m1apV9O/fn2HDhrF582YAHnroIb788ks+/fRTFi1axP79+7nuuuvc6zudToYOHYrD4WDJkiV89NFHTJ06lWeeeaZqX5WIiIiIVIniO0cWe/3116ttXyePYlEiTMR73owIKx4NFhAQQEhISHWGJSJSq3iVCLvqqqsYMmQIrVu35vzzz+fFF18kPDycZcuWkZmZyfvvv88rr7xC//796d69Ox9++CFLlixh2bJlAHz77bds2bKFjz/+mK5duzJ48GD+/Oc/8+abb+JwOKrlBYqIiIhIxT3yyCPceOONREVFAbBkyZJq21fx6BTQpZEiFVGRRFhMTIxXI8lERM52FT7DcDqdzJgxg5ycHJKSkli9ejUFBQUMHDjQXadt27Y0adKEpUuXArB06VI6depEQkKCu86gQYPIyspyjyo7nfz8fLKysjweIiIiIlL9YmNjmTZtGqtXrwZg1apV1TZx/smJMI0IE6k4bxJhuixSROoarxNhGzduJDw8nKCgIO69915mzZpF+/btSUtLIzAw8JSONCEhgbS0NADS0tI8kmDFy4uXlWbChAlERUW5H40bN/Y2bBERERGphBYtWhAfH4/D4WDNmjXVso/iifJBI8JEKsKbEWFHjx4FlAgTkbrH6zOMNm3asG7dOpYvX86oUaMYOXIkW7ZsqY7Y3MaPH09mZqb7sXfv3mrdn4iIiIh4stls9O7dG8A97UVV04gwkcqpyKWRSoSJSF3jdSIsMDCQVq1a0b17dyZMmECXLl147bXXSExMxOFwuDvUYgcPHiQxMRGAxMTEU+4iWfx3cZ3TCQoKct+psvghIiIiIjWrV69eACxfvrxatn/yiDAlwkS8581cXyfPESYiUpdUesy5y+UiPz+f7t27ExAQwPfff+9etm3bNlJSUkhKSgIgKSmJjRs3kp6e7q4zb948IiMjad++fWVDEREREZFqVN2JME2WL1I5GhEmIlI2f28qjx8/nsGDB9OkSROys7OZNm0aCxcu5JtvviEqKoo77riDhx9+mNjYWCIjIxkzZgxJSUnuYfSXX3457du359Zbb2XSpEmkpaXx9NNPM3r0aIKCgqrlBYqIiIhI1ejZsyc2m43du3eTnp5OfHx8lW5fI8JEKkeJMBGRsnn1U1t6ejq33XYbbdq0YcCAAaxcuZJvvvmG3/3udwC8+uqrXHnllQwfPpx+/fqRmJjIzJkz3evb7XZmz56N3W4nKSmJW265hdtuu43nn3++al+ViIiIiFS5yMhI2rVrB1TPqLCTR4R5c4mXiHjSZPkiIqXzakTY+++/f8blwcHBvPnmm7z55pul1mnatClz5871ZrciIiIiUkv06tWLLVu2sGzZMq666qoq3XZxIszPz0+JMJEKqMhdI2NjY6s1JhGR2kaTL4iIiIhIuVXnPGHFl0ZqfjCRivEmEXbkyBFAiTARqXt0liEiIiIi5VacCFu1alW5vmx7o3hEmOYHE6kYb0ZSZmRkAEqEiUjdo0SYiIiIiJRb27ZtsdlsZGZmcvDgwSrddvGIMCXCRCrGmxFhSoSJSF2lRJiIiIiIlFtwcDDNmzcHYNu2bVW67ZPnCBMR75U3EWaMUSJMROosnWWIiIiIiFfatm0LwNatW6t0u7o0UqRqlJUIO3HiBA6HA4B69erVREgiIrWGEmEiIiIi4pXqSoRpsnyRyinviLDi0WCBgYGEhoZWe1wiIrWJzjJERERExCtt2rQBNCJMpLYpbyLswIEDAMTFxXk1wb6IyLnA39cBiIiIiMjZpXhEWFXPEabJ8kUq5+Sklsvlwul0UlBQwMqVK3E6nfTv3x+ALVu2ACVJbRGRukSJMBERERHxSrt27QDYtWsXrVu35rzzzuOjjz6iSZMmldquJssXqZziRNiIESNOu7xTp05069aNjz76CIAuXbrUWGwiIrWFzjJERERExCv169dn+PDhAOzYsYOFCxdy//33V3q7ujRSpHJ69ep1xuUbN250J8EAhg4dWt0hiYjUOkqEiYiIiIjXPvjgAxISEtx/f/nll/zwww+V2qYmyxepnIkTJ3LgwAH279/PoUOHOHr0KNnZ2WRlZTF69GiuuOIKLr30UhITE3nnnXcYOHCgr0MWEalxujRSRERERLwWGRnJzJkz+eyzz9i1axezZs1i8uTJ9O3bt8Lb1IgwkcpLTEw8bfkbb7xRw5GIiNROSoSJiIiISIX06dOHPn36sHHjRmbNmsWsWbNISUmp8FxhmixfREREqpvGnYuIiIhIpXTq1InLLrsMp9PJlClTKrwdTZYvIiIi1U1nGSIiIiJSaQ888AAA7733Hrm5uRXahkaEiYiISHVTIkxEREREKu2qq66iadOmHDlyhOnTp1doGxoRJiIiItVNZxkiIiIiUml2u53Ro0cDMHnyZIwxXm9Dk+WLiIhIdTurJ8vPyYGICLDZrL8dDigoAH9/CAryrAcQEgLFPzAWFFj17XYIDq5Y3RMnwBirrPh8rbAQ8vOtdUNCKlY3NxdcLus1+BcdIacT8vK8q2uzQWhoSd28PGtZYCAEBHhf1+Wy9gcQFlZSNz/fei0BAVZ9b+saY70/YMXw2+PpTd3yHPuqaCenO55V0U6Kj2dl28lvj2dl20lpx7Oy7eTk41nZdlLa8axoO1Ef4X1d9RElddVHeF9XfYRVdrb3EXfccQfPPvss69dv5dtvf6J//4u9OvbFl0babHb3e6k+Qn0EqI84V/qIM9XVeURJXfUR3tdVH2GV1fU+otzMWSgzM9MABjJNenpJ+QsvGAPG3HmnZ/3QUKt8166SsldftcpuusmzblycVb5pU0nZu+9aZcOGedZt2tQqX7GipOzjj62ygQM967Zvb5UvWFBSNmuWVdanj2fdHj2s8tmzS8q+/dYq69LFs+4ll1jln3xSUvbjj1ZZq1aedYcMsco//LCkbO1aq6xhQ8+6119vlb/xRknZ9u1WWVSUZ92RI63ySZNKylJTrTJ/f8+6991nlT/7bEnZ0aNWGRjjcJSUjxtnlY0bV1LmcJTUPXq0pPzZZ62y++7z3J+/v1WemlpSNmmSVTZypGfdqCirfPv2krI33rDKrr/es27Dhlb52rUlZR9+aJUNGeJZt1Urq/zHH0vKPvnEKrvkEs+6XbpY5d9+W1I2e7ZV1qOHZ90+fazyWbNKyhYssMrat/esO3CgVf7xxyVlK1ZYZU2betYdNswqf/fdkrJNm6yyuDjPujfdZJW/+mpJ2a5dVlloqGfdO++0yl94oaQsPb3keJ5s7Fir7MknS8qOHy+pe/x4SfmTT1plY8d6bqO4rvoI9RHGqI8opj6ihPoIS3X0EXfffbeBTyrUR8yZM8cAplOnK9RHFFEfYVEfYTkX+ghjdB5RTH2ERX1ECfURlor2EcW5oszMTHMmujRSRERERKrMmDFj3M8zMjK8WleT5YuIiEh1sxljjK+D8FZWVhZRUVHs359JYmKkhiLWoqGIGq6s4coarlxSV32ERX2E93XVR1hl6iMqVrc29BGXXDKIxYt/5NFHH2TSpBeB8h37r776gmuuuYZevZL4/vslZ6yrPkJ9hPqIitWtDX2EziOs5+ojrDL1ERWrqz7CcvLxzMuzckWZmZlERkZSmrM6EVbWixMRERGRmvf5559z7bXXEhsbS2pqKiEnnzWfwcyZMxk+fDgXXXQRP/74YzVHKSIiIueS8uaKdGmkiIiIiFSpq666iqZNm5KRkcG0adPKvV7xpZF+fjpFFRERkeqhswwRERERqVJ2u537778fgNdff53yXoDgdDrd64uIiIhUByXCRERERKTK3XHHHYSGhrJ+/Xp++OGHcq2jyfJFRESkuikRJiIiIiJVLiYmhltuuQWAyZMnl2ud4hFhujRSREREqovOMkRERESkWowZMwaAWbNmkZKSUmZ9XRopIiIi1U2JMBERERGpFh07dqR///64XC6mTJlSZn1Nli8iIiLVTWcZIiIiIlJtHnjgAQDeffddcnNzz1hXI8JERESkuikRJiIiIiLV5sorr6RZs2ZkZGQwbdq0M9YtKCgAwN/fvyZCExERkTpIiTARERERqTZ2u53Ro0cD1qT5xphS62ZnZwMQGRlZI7GJiIhI3aNEmIiIiIhUqzvuuIPQ0FA2bNjA4sWLS62XlZUFKBEmIiIi1UeJMBERERGpVjExMdx6660AvP7666XWUyJMREREqpsSYSIiIiJS7caMGQPArFmzSElJOW0dJcJERESkuikRJiIiIiLVrkOHDvTv3x+Xy8Vbb7112jq//vorAPXr16/J0ERERKQOUSJMRERERGrEAw88AMB7771Hbm6uxzJjDKtWrQIgKSmpxmMTERGRukGJMBERERGpEVdeeSXNmjUjIyODadOmeSxLT08nNzcXm81GixYtfBShiIiInOuUCBMRERGRGmG327n//vsBmDx5MsYY97ItW7YA0KhRIwIDA30Sn4iIiJz7lAgTERERkRrzxz/+kdDQUDZs2MDixYvd5QsXLgSgX79+PopMRERE6gIlwkRERESkxsTExHDrrbcC1qiwYps2bQKgR48ePolLRERE6gYlwkRERESkRo0ZMwaAzz//nD179gCwdu1aADp16uSzuEREROTcp0SYiIiIiNSoDh06MGDAAFwuF1OmTOHw4cPs2rUL0IgwERERqV5KhImIiIhIjXvggQcAeO+99/jhhx8AaN26NdHR0T6MSkRERM51SoSJiIiISI0bOnQozZs3JyMjgwcffBCAnj17+jYoEREROecpESYiIiIiNc5utzN69GgAUlJSACXCREREpPopESYiIiIiPvHHP/6R0NBQ999KhImIiEh1UyJMRERERHwiJiaGm266yf33BRdc4MNoREREpC7w93UAIiIiIlJ3Pffccxw6dIirrrrKY3SYiIiISHVQIkxEREREfKZhw4Z8/vnnvg5DRERE6ghdGikiIiIiIiIiInWCEmEiIiIiIiIiIlInKBEmIiIiIiIiIiJ1ghJhIiIiIiIiIiJSJygRJiIiIiIiIiIidYISYSIiIiIiIiIiUicoESYiIiIiIiIiInWCV4mwCRMm0LNnTyIiIoiPj+eaa65h27ZtHnXy8vIYPXo09erVIzw8nOHDh3Pw4EGPOikpKQwdOpTQ0FDi4+N59NFHKSwsrPyrERERERERERERKYVXibBFixYxevRoli1bxrx58ygoKODyyy8nJyfHXeehhx7iyy+/5NNPP2XRokXs37+f6667zr3c6XQydOhQHA4HS5Ys4aOPPmLq1Kk888wzVfeqREREREREREREfsNmjDEVXfnQoUPEx8ezaNEi+vXrR2ZmJvXr12fatGlcf/31AGzdupV27dqxdOlSevfuzVdffcWVV17J/v37SUhIAODtt9/m8ccf59ChQwQGBpa536ysLKKiosjMzCQyMrKi4YuIiIiIiIiIyDmgvLmiSs0RlpmZCUBsbCwAq1evpqCggIEDB7rrtG3bliZNmrB06VIAli5dSqdOndxJMIBBgwaRlZXF5s2bT7uf/Px8srKyPB4iIiIiIiIiIiLeqHAizOVy8eCDD3LRRRfRsWNHANLS0ggMDCQ6OtqjbkJCAmlpae46JyfBipcXLzudCRMmEBUV5X40bty4omGLiIiIiIiIiEgdVeFE2OjRo9m0aRMzZsyoynhOa/z48WRmZrofe/furfZ9ioiIiIiIiIjIucW/Iivdf//9zJ49m8WLF3Peeee5yxMTE3E4HBw7dsxjVNjBgwdJTEx011mxYoXH9orvKllc57eCgoIICgqqSKgiIiIiIiIiIiKAl4kwYwxjxoxh1qxZLFy4kObNm3ss7969OwEBAXz//fcMHz4cgG3btpGSkkJSUhIASUlJvPjii6SnpxMfHw/AvHnziIyMpH379uWOA9BcYSIiIiIiIiIi4s4RlXVPSK/uGnnfffcxbdo0vvjiC9q0aeMuj4qKIiQkBIBRo0Yxd+5cpk6dSmRkJGPGjAFgyZIlADidTrp27UrDhg2ZNGkSaWlp3Hrrrdx555289NJL5YojNTVV84SJiIiIiIiIiIiHvXv3ely9+FteJcJsNttpyz/88ENuv/12APLy8njkkUeYPn06+fn5DBo0iLfeesvjssc9e/YwatQoFi5cSFhYGCNHjmTixIn4+5dvgJrL5WL//v1ERESUGlNVy8rKonHjxuzdu/eMt+GUukntQ0qjtiFnovYhpVHbkDNR+5DSqG3Imah9SGnOlbZhjCE7O5uGDRvi51f6lPheJcLqsqysLKKiosjMzDyrG4ZUD7UPKY3ahpyJ2oeURm1DzkTtQ0qjtiFnovYhpalrbaPCd40UERERERERERE5mygRJiIiIiIiIiIidYISYeUUFBTEs88+S1BQkK9DkVpI7UNKo7YhZ6L2IaVR25AzUfuQ0qhtyJmofUhp6lrb0BxhIiIiIiIiIiJSJ2hEmIiIiIiIiIiI1AlKhImIiIiIiIiISJ2gRJiIiIiIiIiIiNQJSoSJiIiIiIiIiEidoESYiIiIiIiIiIjUCUqEldObb75Js2bNCA4OplevXqxYscLXIUk1S05OxmazeTzatm3rXp6Xl8fo0aOpV68e4eHhDB8+nIMHD3psIyUlhaFDhxIaGkp8fDyPPvoohYWFNf1SpJIWL17MVVddRcOGDbHZbHz++ecey40xPPPMMzRo0ICQkBAGDhzIL7/84lEnIyODm2++mcjISKKjo7njjjs4fvy4R50NGzbQt29fgoODady4MZMmTarulyZVoKz2cfvtt5/Sl1xxxRUeddQ+zk0TJkygZ8+eREREEB8fzzXXXMO2bds86lTVZ8nChQvp1q0bQUFBtGrViqlTp1b3y5NKKE/buPTSS0/pO+69916POmob56YpU6bQuXNnIiMjiYyMJCkpia+++sq9XP1G3VVW21C/IcUmTpyIzWbjwQcfdJep7ziJkTLNmDHDBAYGmg8++MBs3rzZ3HXXXSY6OtocPHjQ16FJNXr22WdNhw4dzIEDB9yPQ4cOuZffe++9pnHjxub77783q1atMr179zZ9+vRxLy8sLDQdO3Y0AwcONGvXrjVz5841cXFxZvz48b54OVIJc+fONU899ZSZOXOmAcysWbM8lk+cONFERUWZzz//3Kxfv95cffXVpnnz5iY3N9dd54orrjBdunQxy5YtMz/88INp1aqVufHGG93LMzMzTUJCgrn55pvNpk2bzPTp001ISIh55513auplSgWV1T5GjhxprrjiCo++JCMjw6OO2se5adCgQebDDz80mzZtMuvWrTNDhgwxTZo0McePH3fXqYrPkl9//dWEhoaahx9+2GzZssW8/vrrxm63m6+//rpGX6+UX3naxiWXXGLuuusuj74jMzPTvVxt49z1v//9z8yZM8ds377dbNu2zTz55JMmICDAbNq0yRijfqMuK6ttqN8QY4xZsWKFadasmencubMZO3asu1x9RwklwsrhwgsvNKNHj3b/7XQ6TcOGDc2ECRN8GJVUt2effdZ06dLltMuOHTtmAgICzKeffuou+/nnnw1gli5daoyxvhz7+fmZtLQ0d50pU6aYyMhIk5+fX62xS/X5baLD5XKZxMRE85e//MVdduzYMRMUFGSmT59ujDFmy5YtBjArV6501/nqq6+MzWYz+/btM8YY89Zbb5mYmBiPtvH444+bNm3aVPMrkqpUWiJs2LBhpa6j9lF3pKenG8AsWrTIGFN1nyWPPfaY6dChg8e+RowYYQYNGlTdL0mqyG/bhjHWF9qTv8D8ltpG3RITE2P+8Y9/qN+QUxS3DWPUb4gx2dnZpnXr1mbevHke7UF9hyddGlkGh8PB6tWrGThwoLvMz8+PgQMHsnTpUh9GJjXhl19+oWHDhrRo0YKbb76ZlJQUAFavXk1BQYFHu2jbti1NmjRxt4ulS5fSqVMnEhIS3HUGDRpEVlYWmzdvrtkXItVm165dpKWlebSFqKgoevXq5dEWoqOj6dGjh7vOwIED8fPzY/ny5e46/fr1IzAw0F1n0KBBbNu2jaNHj9bQq5HqsnDhQuLj42nTpg2jRo3iyJEj7mVqH3VHZmYmALGxsUDVfZYsXbrUYxvFdXSecvb4bdso9u9//5u4uDg6duzI+PHjOXHihHuZ2kbd4HQ6mTFjBjk5OSQlJanfELffto1i6jfqttGjRzN06NBTjqH6Dk/+vg6gtjt8+DBOp9OjMQAkJCSwdetWH0UlNaFXr15MnTqVNm3acODAAZ577jn69u3Lpk2bSEtLIzAwkOjoaI91EhISSEtLAyAtLe207aZ4mZwbio/l6Y71yW0hPj7eY7m/vz+xsbEedZo3b37KNoqXxcTEVEv8Uv2uuOIKrrvuOpo3b87OnTt58sknGTx4MEuXLsVut6t91BEul4sHH3yQiy66iI4dOwJU2WdJaXWysrLIzc0lJCSkOl6SVJHTtQ2Am266iaZNm9KwYUM2bNjA448/zrZt25g5cyagtnGu27hxI0lJSeTl5REeHs6sWbNo374969atU79Rx5XWNkD9Rl03Y8YM1qxZw8qVK09ZpnMOT0qEiZRi8ODB7uedO3emV69eNG3alE8++eSs+Q8uIr53ww03uJ936tSJzp0707JlSxYuXMiAAQN8GJnUpNGjR7Np0yZ+/PFHX4citUxpbePuu+92P+/UqRMNGjRgwIAB7Ny5k5YtW9Z0mFLD2rRpw7p168jMzOS///0vI0eOZNGiRb4OS2qB0tpG+/bt1W/UYXv37mXs2LHMmzeP4OBgX4dT6+nSyDLExcVht9tPuZvCwYMHSUxM9FFU4gvR0dGcf/757Nixg8TERBwOB8eOHfOoc3K7SExMPG27KV4m54biY3mmPiIxMZH09HSP5YWFhWRkZKi91EEtWrQgLi6OHTt2AGofdcH999/P7NmzWbBgAeedd567vKo+S0qrExkZqR9uarnS2sbp9OrVC8Cj71DbOHcFBgbSqlUrunfvzoQJE+jSpQuvvfaa+g0ptW2cjvqNumP16tWkp6fTrVs3/P398ff3Z9GiRUyePBl/f38SEhLUd5xEibAyBAYG0r17d77//nt3mcvl4vvvv/e4FlvOfcePH2fnzp00aNCA7t27ExAQ4NEutm3bRkpKirtdJCUlsXHjRo8vuPPmzSMyMtI9fFnOfs2bNycxMdGjLWRlZbF8+XKPtnDs2DFWr17trjN//nxcLpf7BCUpKYnFixdTUFDgrjNv3jzatGmjy97OMampqRw5coQGDRoAah/nMmMM999/P7NmzWL+/PmnXN5aVZ8lSUlJHtsorqPzlNqrrLZxOuvWrQPw6DvUNuoOl8tFfn6++g05RXHbOB31G3XHgAED2LhxI+vWrXM/evTowc033+x+rr7jJL6erf9sMGPGDBMUFGSmTp1qtmzZYu6++24THR3tcTcFOfc88sgjZuHChWbXrl3mp59+MgMHDjRxcXEmPT3dGGPdfrZJkyZm/vz5ZtWqVSYpKckkJSW51y++/ezll19u1q1bZ77++mtTv359j9vPytkhOzvbrF271qxdu9YA5pVXXjFr1641e/bsMcYYM3HiRBMdHW2++OILs2HDBjNs2DDTvHlzk5ub697GFVdcYS644AKzfPly8+OPP5rWrVubG2+80b382LFjJiEhwdx6661m06ZNZsaMGSY0NNS88847Nf56xTtnah/Z2dlm3LhxZunSpWbXrl3mu+++M926dTOtW7c2eXl57m2ofZybRo0aZaKioszChQs9bmV/4sQJd52q+CwpvpX5o48+an7++Wfz5ptvnpW3Mq9LymobO3bsMM8//7xZtWqV2bVrl/niiy9MixYtTL9+/dzbUNs4dz3xxBNm0aJFZteuXWbDhg3miSeeMDabzXz77bfGGPUbddmZ2ob6Dfmt395FVH1HCSXCyun11183TZo0MYGBgebCCy80y5Yt83VIUs1GjBhhGjRoYAIDA02jRo3MiBEjzI4dO9zLc3NzzX333WdiYmJMaGioufbaa82BAwc8trF7924zePBgExISYuLi4swjjzxiCgoKavqlSCUtWLDAAKc8Ro4caYwxxuVymT/96U8mISHBBAUFmQEDBpht27Z5bOPIkSPmxhtvNOHh4SYyMtL84Q9/MNnZ2R511q9fby6++GITFBRkGjVqZCZOnFhTL1Eq4Uzt48SJE+byyy839evXNwEBAaZp06bmrrvuOuWHFLWPc9Pp2gVgPvzwQ3edqvosWbBggenatasJDAw0LVq08NiH1D5ltY2UlBTTr18/Exsba4KCgkyrVq3Mo48+ajIzMz22o7ZxbvrjH/9omjZtagIDA039+vXNgAED3EkwY9Rv1GVnahvqN+S3fpsIU99RwmaMMTU3/kxERERERERERMQ3NEeYiIiIiIiIiIjUCUqEiYiIiIiIiIhInaBEmIiIiIiIiIiI1AlKhImIiIiIiIiISJ2gRJiIiIiIiIiIiNQJSoSJiIiIiIiIiEidoESYiIiIiIiIiIjUCUqEiYiIiIiIiIhInaBEmIiIiIiIiIiI1AlKhImIiIiIiIiISJ2gRJiIiIiIiIiIiNQJ/w99W0qRTf8PgAAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#f0_gen,_=get_pitch_crepe(*vocoder.wav2spec(wav_gen),hparams,threshold=0.05)\n", + "%matplotlib inline\n", + "f0_gen,_=get_pitch_parselmouth(*svc_model.vocoder.wav2spec(wav_gen),hparams)\n", + "f0_tst[f0_tst==0]=np.nan#ground truth f0\n", + "f0_pred[f0_pred==0]=np.nan#f0 pe predicted\n", + "f0_gen[f0_gen==0]=np.nan#f0 generated\n", + "fig=plt.figure(figsize=[15,5])\n", + "plt.plot(np.arange(0,len(f0_tst)),f0_tst,color='black')\n", + "plt.plot(np.arange(0,len(f0_pred)),f0_pred,color='orange')\n", + "plt.plot(np.arange(0,len(f0_gen)),f0_gen,color='red')\n", + "plt.axhline(librosa.note_to_hz('C4'),ls=\":\",c=\"blue\")\n", + "plt.axhline(librosa.note_to_hz('G4'),ls=\":\",c=\"green\")\n", + "plt.axhline(librosa.note_to_hz('C5'),ls=\":\",c=\"orange\")\n", + "plt.axhline(librosa.note_to_hz('F#5'),ls=\":\",c=\"red\")\n", + "#plt.axhline(librosa.note_to_hz('A#5'),ls=\":\",c=\"black\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.13 ('diffsvc')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "vscode": { + "interpreter": { + "hash": "5cf89e54348a1bdadbb0ca2d227dcc30cc7e2d47cc75a8605923523671b5b7c7" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/modules/commons/__pycache__/common_layers.cpython-38.pyc b/modules/commons/__pycache__/common_layers.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf94aa9e8449eb39488f124f7743df1da2ded023 Binary files /dev/null and b/modules/commons/__pycache__/common_layers.cpython-38.pyc differ diff --git a/modules/commons/__pycache__/espnet_positional_embedding.cpython-38.pyc b/modules/commons/__pycache__/espnet_positional_embedding.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b1b81e35f49db1b199633befefadfce70c38f68 Binary files /dev/null and b/modules/commons/__pycache__/espnet_positional_embedding.cpython-38.pyc differ diff --git a/modules/commons/__pycache__/ssim.cpython-38.pyc b/modules/commons/__pycache__/ssim.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8fc7255a8b79296ddb4fcf41fad1710de2538a07 Binary files /dev/null and b/modules/commons/__pycache__/ssim.cpython-38.pyc differ diff --git a/modules/commons/common_layers.py b/modules/commons/common_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..192997cee5b265525b244e1f421b41dab9874b99 --- /dev/null +++ b/modules/commons/common_layers.py @@ -0,0 +1,671 @@ +import math +import torch +from torch import nn +from torch.nn import Parameter +import torch.onnx.operators +import torch.nn.functional as F +import utils + + +class Reshape(nn.Module): + def __init__(self, *args): + super(Reshape, self).__init__() + self.shape = args + + def forward(self, x): + return x.view(self.shape) + + +class Permute(nn.Module): + def __init__(self, *args): + super(Permute, self).__init__() + self.args = args + + def forward(self, x): + return x.permute(self.args) + + +class LinearNorm(torch.nn.Module): + def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'): + super(LinearNorm, self).__init__() + self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias) + + torch.nn.init.xavier_uniform_( + self.linear_layer.weight, + gain=torch.nn.init.calculate_gain(w_init_gain)) + + def forward(self, x): + return self.linear_layer(x) + + +class ConvNorm(torch.nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, + padding=None, dilation=1, bias=True, w_init_gain='linear'): + super(ConvNorm, self).__init__() + if padding is None: + assert (kernel_size % 2 == 1) + padding = int(dilation * (kernel_size - 1) / 2) + + self.conv = torch.nn.Conv1d(in_channels, out_channels, + kernel_size=kernel_size, stride=stride, + padding=padding, dilation=dilation, + bias=bias) + + torch.nn.init.xavier_uniform_( + self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain)) + + def forward(self, signal): + conv_signal = self.conv(signal) + return conv_signal + + +def Embedding(num_embeddings, embedding_dim, padding_idx=None): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5) + if padding_idx is not None: + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False): + if not export and torch.cuda.is_available(): + try: + from apex.normalization import FusedLayerNorm + return FusedLayerNorm(normalized_shape, eps, elementwise_affine) + except ImportError: + pass + return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine) + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.) + return m + + +class SinusoidalPositionalEmbedding(nn.Module): + """This module produces sinusoidal positional embeddings of any length. + + Padding symbols are ignored. + """ + + def __init__(self, embedding_dim, padding_idx, init_size=1024): + super().__init__() + self.embedding_dim = embedding_dim + self.padding_idx = padding_idx + self.weights = SinusoidalPositionalEmbedding.get_embedding( + init_size, + embedding_dim, + padding_idx, + ) + self.register_buffer('_float_tensor', torch.FloatTensor(1)) + + @staticmethod + def get_embedding(num_embeddings, embedding_dim, padding_idx=None): + """Build sinusoidal embeddings. + + This matches the implementation in tensor2tensor, but differs slightly + from the description in Section 3.5 of "Attention Is All You Need". + """ + half_dim = embedding_dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb) + emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze(1) * emb.unsqueeze(0) + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(num_embeddings, -1) + if embedding_dim % 2 == 1: + # zero pad + emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1) + if padding_idx is not None: + emb[padding_idx, :] = 0 + return emb + + def forward(self, input, incremental_state=None, timestep=None, positions=None, **kwargs): + """Input is expected to be of size [bsz x seqlen].""" + bsz, seq_len = input.shape[:2] + max_pos = self.padding_idx + 1 + seq_len + if self.weights is None or max_pos > self.weights.size(0): + # recompute/expand embeddings if needed + self.weights = SinusoidalPositionalEmbedding.get_embedding( + max_pos, + self.embedding_dim, + self.padding_idx, + ) + self.weights = self.weights.to(self._float_tensor) + + if incremental_state is not None: + # positions is the same for every token when decoding a single step + pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len + return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1) + + positions = utils.make_positions(input, self.padding_idx) if positions is None else positions + return self.weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1).detach() + + def max_positions(self): + """Maximum number of supported positions.""" + return int(1e5) # an arbitrary large number + + +class ConvTBC(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, padding=0): + super(ConvTBC, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.padding = padding + + self.weight = torch.nn.Parameter(torch.Tensor( + self.kernel_size, in_channels, out_channels)) + self.bias = torch.nn.Parameter(torch.Tensor(out_channels)) + + def forward(self, input): + return torch.conv_tbc(input.contiguous(), self.weight, self.bias, self.padding) + + +class MultiheadAttention(nn.Module): + def __init__(self, embed_dim, num_heads, kdim=None, vdim=None, dropout=0., bias=True, + add_bias_kv=False, add_zero_attn=False, self_attention=False, + encoder_decoder_attention=False): + super().__init__() + self.embed_dim = embed_dim + self.kdim = kdim if kdim is not None else embed_dim + self.vdim = vdim if vdim is not None else embed_dim + self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim + + self.num_heads = num_heads + self.dropout = dropout + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" + self.scaling = self.head_dim ** -0.5 + + self.self_attention = self_attention + self.encoder_decoder_attention = encoder_decoder_attention + + assert not self.self_attention or self.qkv_same_dim, 'Self-attention requires query, key and ' \ + 'value to be of the same size' + + if self.qkv_same_dim: + self.in_proj_weight = Parameter(torch.Tensor(3 * embed_dim, embed_dim)) + else: + self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim)) + self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim)) + self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) + + if bias: + self.in_proj_bias = Parameter(torch.Tensor(3 * embed_dim)) + else: + self.register_parameter('in_proj_bias', None) + + self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias) + + if add_bias_kv: + self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim)) + self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim)) + else: + self.bias_k = self.bias_v = None + + self.add_zero_attn = add_zero_attn + + self.reset_parameters() + + self.enable_torch_version = False + if hasattr(F, "multi_head_attention_forward"): + self.enable_torch_version = True + else: + self.enable_torch_version = False + self.last_attn_probs = None + + def reset_parameters(self): + if self.qkv_same_dim: + nn.init.xavier_uniform_(self.in_proj_weight) + else: + nn.init.xavier_uniform_(self.k_proj_weight) + nn.init.xavier_uniform_(self.v_proj_weight) + nn.init.xavier_uniform_(self.q_proj_weight) + + nn.init.xavier_uniform_(self.out_proj.weight) + if self.in_proj_bias is not None: + nn.init.constant_(self.in_proj_bias, 0.) + nn.init.constant_(self.out_proj.bias, 0.) + if self.bias_k is not None: + nn.init.xavier_normal_(self.bias_k) + if self.bias_v is not None: + nn.init.xavier_normal_(self.bias_v) + + def forward( + self, + query, key, value, + key_padding_mask=None, + incremental_state=None, + need_weights=True, + static_kv=False, + attn_mask=None, + before_softmax=False, + need_head_weights=False, + enc_dec_attn_constraint_mask=None, + reset_attn_weight=None + ): + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + tgt_len, bsz, embed_dim = query.size() + assert embed_dim == self.embed_dim + assert list(query.size()) == [tgt_len, bsz, embed_dim] + + if self.enable_torch_version and incremental_state is None and not static_kv and reset_attn_weight is None: + if self.qkv_same_dim: + return F.multi_head_attention_forward(query, key, value, + self.embed_dim, self.num_heads, + self.in_proj_weight, + self.in_proj_bias, self.bias_k, self.bias_v, + self.add_zero_attn, self.dropout, + self.out_proj.weight, self.out_proj.bias, + self.training, key_padding_mask, need_weights, + attn_mask) + else: + return F.multi_head_attention_forward(query, key, value, + self.embed_dim, self.num_heads, + torch.empty([0]), + self.in_proj_bias, self.bias_k, self.bias_v, + self.add_zero_attn, self.dropout, + self.out_proj.weight, self.out_proj.bias, + self.training, key_padding_mask, need_weights, + attn_mask, use_separate_proj_weight=True, + q_proj_weight=self.q_proj_weight, + k_proj_weight=self.k_proj_weight, + v_proj_weight=self.v_proj_weight) + + if incremental_state is not None: + print('Not implemented error.') + exit() + else: + saved_state = None + + if self.self_attention: + # self-attention + q, k, v = self.in_proj_qkv(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.in_proj_q(query) + if key is None: + assert value is None + k = v = None + else: + k = self.in_proj_k(key) + v = self.in_proj_v(key) + + else: + q = self.in_proj_q(query) + k = self.in_proj_k(key) + v = self.in_proj_v(value) + q *= self.scaling + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1)], dim=1) + + q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1) + if k is not None: + k = k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1) + if v is not None: + v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1) + + if saved_state is not None: + print('Not implemented error.') + exit() + + src_len = k.size(1) + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.shape == torch.Size([]): + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask)], dim=1) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + if len(attn_mask.shape) == 2: + attn_mask = attn_mask.unsqueeze(0) + elif len(attn_mask.shape) == 3: + attn_mask = attn_mask[:, None].repeat([1, self.num_heads, 1, 1]).reshape( + bsz * self.num_heads, tgt_len, src_len) + attn_weights = attn_weights + attn_mask + + if enc_dec_attn_constraint_mask is not None: # bs x head x L_kv + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attn_weights = attn_weights.masked_fill( + enc_dec_attn_constraint_mask.unsqueeze(2).bool(), + -1e9, + ) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2), + -1e9, + ) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + attn_logits = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + + if before_softmax: + return attn_weights, v + + attn_weights_float = utils.softmax(attn_weights, dim=-1) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = F.dropout(attn_weights_float.type_as(attn_weights), p=self.dropout, training=self.training) + + if reset_attn_weight is not None: + if reset_attn_weight: + self.last_attn_probs = attn_probs.detach() + else: + assert self.last_attn_probs is not None + attn_probs = self.last_attn_probs + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + + if need_weights: + attn_weights = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + else: + attn_weights = None + + return attn, (attn_weights, attn_logits) + + def in_proj_qkv(self, query): + return self._in_proj(query).chunk(3, dim=-1) + + def in_proj_q(self, query): + if self.qkv_same_dim: + return self._in_proj(query, end=self.embed_dim) + else: + bias = self.in_proj_bias + if bias is not None: + bias = bias[:self.embed_dim] + return F.linear(query, self.q_proj_weight, bias) + + def in_proj_k(self, key): + if self.qkv_same_dim: + return self._in_proj(key, start=self.embed_dim, end=2 * self.embed_dim) + else: + weight = self.k_proj_weight + bias = self.in_proj_bias + if bias is not None: + bias = bias[self.embed_dim:2 * self.embed_dim] + return F.linear(key, weight, bias) + + def in_proj_v(self, value): + if self.qkv_same_dim: + return self._in_proj(value, start=2 * self.embed_dim) + else: + weight = self.v_proj_weight + bias = self.in_proj_bias + if bias is not None: + bias = bias[2 * self.embed_dim:] + return F.linear(value, weight, bias) + + def _in_proj(self, input, start=0, end=None): + weight = self.in_proj_weight + bias = self.in_proj_bias + weight = weight[start:end, :] + if bias is not None: + bias = bias[start:end] + return F.linear(input, weight, bias) + + + def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz): + return attn_weights + + +class Swish(torch.autograd.Function): + @staticmethod + def forward(ctx, i): + result = i * torch.sigmoid(i) + ctx.save_for_backward(i) + return result + + @staticmethod + def backward(ctx, grad_output): + i = ctx.saved_variables[0] + sigmoid_i = torch.sigmoid(i) + return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) + + +class CustomSwish(nn.Module): + def forward(self, input_tensor): + return Swish.apply(input_tensor) + +class Mish(nn.Module): + def forward(self, x): + return x * torch.tanh(F.softplus(x)) + +class TransformerFFNLayer(nn.Module): + def __init__(self, hidden_size, filter_size, padding="SAME", kernel_size=1, dropout=0., act='gelu'): + super().__init__() + self.kernel_size = kernel_size + self.dropout = dropout + self.act = act + if padding == 'SAME': + self.ffn_1 = nn.Conv1d(hidden_size, filter_size, kernel_size, padding=kernel_size // 2) + elif padding == 'LEFT': + self.ffn_1 = nn.Sequential( + nn.ConstantPad1d((kernel_size - 1, 0), 0.0), + nn.Conv1d(hidden_size, filter_size, kernel_size) + ) + self.ffn_2 = Linear(filter_size, hidden_size) + if self.act == 'swish': + self.swish_fn = CustomSwish() + + def forward(self, x, incremental_state=None): + # x: T x B x C + if incremental_state is not None: + assert incremental_state is None, 'Nar-generation does not allow this.' + exit(1) + + x = self.ffn_1(x.permute(1, 2, 0)).permute(2, 0, 1) + x = x * self.kernel_size ** -0.5 + + if incremental_state is not None: + x = x[-1:] + if self.act == 'gelu': + x = F.gelu(x) + if self.act == 'relu': + x = F.relu(x) + if self.act == 'swish': + x = self.swish_fn(x) + x = F.dropout(x, self.dropout, training=self.training) + x = self.ffn_2(x) + return x + + +class BatchNorm1dTBC(nn.Module): + def __init__(self, c): + super(BatchNorm1dTBC, self).__init__() + self.bn = nn.BatchNorm1d(c) + + def forward(self, x): + """ + + :param x: [T, B, C] + :return: [T, B, C] + """ + x = x.permute(1, 2, 0) # [B, C, T] + x = self.bn(x) # [B, C, T] + x = x.permute(2, 0, 1) # [T, B, C] + return x + + +class EncSALayer(nn.Module): + def __init__(self, c, num_heads, dropout, attention_dropout=0.1, + relu_dropout=0.1, kernel_size=9, padding='SAME', norm='ln', act='gelu'): + super().__init__() + self.c = c + self.dropout = dropout + self.num_heads = num_heads + if num_heads > 0: + if norm == 'ln': + self.layer_norm1 = LayerNorm(c) + elif norm == 'bn': + self.layer_norm1 = BatchNorm1dTBC(c) + self.self_attn = MultiheadAttention( + self.c, num_heads, self_attention=True, dropout=attention_dropout, bias=False, + ) + if norm == 'ln': + self.layer_norm2 = LayerNorm(c) + elif norm == 'bn': + self.layer_norm2 = BatchNorm1dTBC(c) + self.ffn = TransformerFFNLayer( + c, 4 * c, kernel_size=kernel_size, dropout=relu_dropout, padding=padding, act=act) + + def forward(self, x, encoder_padding_mask=None, **kwargs): + layer_norm_training = kwargs.get('layer_norm_training', None) + if layer_norm_training is not None: + self.layer_norm1.training = layer_norm_training + self.layer_norm2.training = layer_norm_training + if self.num_heads > 0: + residual = x + x = self.layer_norm1(x) + x, _, = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=encoder_padding_mask + ) + x = F.dropout(x, self.dropout, training=self.training) + x = residual + x + x = x * (1 - encoder_padding_mask.float()).transpose(0, 1)[..., None] + + residual = x + x = self.layer_norm2(x) + x = self.ffn(x) + x = F.dropout(x, self.dropout, training=self.training) + x = residual + x + x = x * (1 - encoder_padding_mask.float()).transpose(0, 1)[..., None] + return x + + +class DecSALayer(nn.Module): + def __init__(self, c, num_heads, dropout, attention_dropout=0.1, relu_dropout=0.1, kernel_size=9, act='gelu'): + super().__init__() + self.c = c + self.dropout = dropout + self.layer_norm1 = LayerNorm(c) + self.self_attn = MultiheadAttention( + c, num_heads, self_attention=True, dropout=attention_dropout, bias=False + ) + self.layer_norm2 = LayerNorm(c) + self.encoder_attn = MultiheadAttention( + c, num_heads, encoder_decoder_attention=True, dropout=attention_dropout, bias=False, + ) + self.layer_norm3 = LayerNorm(c) + self.ffn = TransformerFFNLayer( + c, 4 * c, padding='LEFT', kernel_size=kernel_size, dropout=relu_dropout, act=act) + + def forward( + self, + x, + encoder_out=None, + encoder_padding_mask=None, + incremental_state=None, + self_attn_mask=None, + self_attn_padding_mask=None, + attn_out=None, + reset_attn_weight=None, + **kwargs, + ): + layer_norm_training = kwargs.get('layer_norm_training', None) + if layer_norm_training is not None: + self.layer_norm1.training = layer_norm_training + self.layer_norm2.training = layer_norm_training + self.layer_norm3.training = layer_norm_training + residual = x + x = self.layer_norm1(x) + x, _ = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + attn_mask=self_attn_mask + ) + x = F.dropout(x, self.dropout, training=self.training) + x = residual + x + + residual = x + x = self.layer_norm2(x) + if encoder_out is not None: + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + enc_dec_attn_constraint_mask=None, #utils.get_incremental_state(self, incremental_state, 'enc_dec_attn_constraint_mask'), + reset_attn_weight=reset_attn_weight + ) + attn_logits = attn[1] + else: + assert attn_out is not None + x = self.encoder_attn.in_proj_v(attn_out.transpose(0, 1)) + attn_logits = None + x = F.dropout(x, self.dropout, training=self.training) + x = residual + x + + residual = x + x = self.layer_norm3(x) + x = self.ffn(x, incremental_state=incremental_state) + x = F.dropout(x, self.dropout, training=self.training) + x = residual + x + # if len(attn_logits.size()) > 3: + # indices = attn_logits.softmax(-1).max(-1).values.sum(-1).argmax(-1) + # attn_logits = attn_logits.gather(1, + # indices[:, None, None, None].repeat(1, 1, attn_logits.size(-2), attn_logits.size(-1))).squeeze(1) + return x, attn_logits diff --git a/modules/commons/espnet_positional_embedding.py b/modules/commons/espnet_positional_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..74decb6ab300951490ae08a4b93041a0542b5bb7 --- /dev/null +++ b/modules/commons/espnet_positional_embedding.py @@ -0,0 +1,113 @@ +import math +import torch + + +class PositionalEncoding(torch.nn.Module): + """Positional encoding. + Args: + d_model (int): Embedding dimension. + dropout_rate (float): Dropout rate. + max_len (int): Maximum input length. + reverse (bool): Whether to reverse the input position. + """ + + def __init__(self, d_model, dropout_rate, max_len=5000, reverse=False): + """Construct an PositionalEncoding object.""" + super(PositionalEncoding, self).__init__() + self.d_model = d_model + self.reverse = reverse + self.xscale = math.sqrt(self.d_model) + self.dropout = torch.nn.Dropout(p=dropout_rate) + self.pe = None + self.extend_pe(torch.tensor(0.0).expand(1, max_len)) + + def extend_pe(self, x): + """Reset the positional encodings.""" + if self.pe is not None: + if self.pe.size(1) >= x.size(1): + if self.pe.dtype != x.dtype or self.pe.device != x.device: + self.pe = self.pe.to(dtype=x.dtype, device=x.device) + return + pe = torch.zeros(x.size(1), self.d_model) + if self.reverse: + position = torch.arange( + x.size(1) - 1, -1, -1.0, dtype=torch.float32 + ).unsqueeze(1) + else: + position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1) + div_term = torch.exp( + torch.arange(0, self.d_model, 2, dtype=torch.float32) + * -(math.log(10000.0) / self.d_model) + ) + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0) + self.pe = pe.to(device=x.device, dtype=x.dtype) + + def forward(self, x: torch.Tensor): + """Add positional encoding. + Args: + x (torch.Tensor): Input tensor (batch, time, `*`). + Returns: + torch.Tensor: Encoded tensor (batch, time, `*`). + """ + self.extend_pe(x) + x = x * self.xscale + self.pe[:, : x.size(1)] + return self.dropout(x) + + +class ScaledPositionalEncoding(PositionalEncoding): + """Scaled positional encoding module. + See Sec. 3.2 https://arxiv.org/abs/1809.08895 + Args: + d_model (int): Embedding dimension. + dropout_rate (float): Dropout rate. + max_len (int): Maximum input length. + """ + + def __init__(self, d_model, dropout_rate, max_len=5000): + """Initialize class.""" + super().__init__(d_model=d_model, dropout_rate=dropout_rate, max_len=max_len) + self.alpha = torch.nn.Parameter(torch.tensor(1.0)) + + def reset_parameters(self): + """Reset parameters.""" + self.alpha.data = torch.tensor(1.0) + + def forward(self, x): + """Add positional encoding. + Args: + x (torch.Tensor): Input tensor (batch, time, `*`). + Returns: + torch.Tensor: Encoded tensor (batch, time, `*`). + """ + self.extend_pe(x) + x = x + self.alpha * self.pe[:, : x.size(1)] + return self.dropout(x) + + +class RelPositionalEncoding(PositionalEncoding): + """Relative positional encoding module. + See : Appendix B in https://arxiv.org/abs/1901.02860 + Args: + d_model (int): Embedding dimension. + dropout_rate (float): Dropout rate. + max_len (int): Maximum input length. + """ + + def __init__(self, d_model, dropout_rate, max_len=5000): + """Initialize class.""" + super().__init__(d_model, dropout_rate, max_len, reverse=True) + + def forward(self, x): + """Compute positional encoding. + Args: + x (torch.Tensor): Input tensor (batch, time, `*`). + Returns: + torch.Tensor: Encoded tensor (batch, time, `*`). + torch.Tensor: Positional embedding tensor (1, time, `*`). + """ + self.extend_pe(x) + x = x * self.xscale + pos_emb = self.pe[:, : x.size(1)] + return self.dropout(x) + self.dropout(pos_emb) \ No newline at end of file diff --git a/modules/commons/ssim.py b/modules/commons/ssim.py new file mode 100644 index 0000000000000000000000000000000000000000..0d0241f267ef58b24979e022b05f2a9adf768826 --- /dev/null +++ b/modules/commons/ssim.py @@ -0,0 +1,391 @@ +# ''' +# https://github.com/One-sixth/ms_ssim_pytorch/blob/master/ssim.py +# ''' +# +# import torch +# import torch.jit +# import torch.nn.functional as F +# +# +# @torch.jit.script +# def create_window(window_size: int, sigma: float, channel: int): +# ''' +# Create 1-D gauss kernel +# :param window_size: the size of gauss kernel +# :param sigma: sigma of normal distribution +# :param channel: input channel +# :return: 1D kernel +# ''' +# coords = torch.arange(window_size, dtype=torch.float) +# coords -= window_size // 2 +# +# g = torch.exp(-(coords ** 2) / (2 * sigma ** 2)) +# g /= g.sum() +# +# g = g.reshape(1, 1, 1, -1).repeat(channel, 1, 1, 1) +# return g +# +# +# @torch.jit.script +# def _gaussian_filter(x, window_1d, use_padding: bool): +# ''' +# Blur input with 1-D kernel +# :param x: batch of tensors to be blured +# :param window_1d: 1-D gauss kernel +# :param use_padding: padding image before conv +# :return: blured tensors +# ''' +# C = x.shape[1] +# padding = 0 +# if use_padding: +# window_size = window_1d.shape[3] +# padding = window_size // 2 +# out = F.conv2d(x, window_1d, stride=1, padding=(0, padding), groups=C) +# out = F.conv2d(out, window_1d.transpose(2, 3), stride=1, padding=(padding, 0), groups=C) +# return out +# +# +# @torch.jit.script +# def ssim(X, Y, window, data_range: float, use_padding: bool = False): +# ''' +# Calculate ssim index for X and Y +# :param X: images [B, C, H, N_bins] +# :param Y: images [B, C, H, N_bins] +# :param window: 1-D gauss kernel +# :param data_range: value range of input images. (usually 1.0 or 255) +# :param use_padding: padding image before conv +# :return: +# ''' +# +# K1 = 0.01 +# K2 = 0.03 +# compensation = 1.0 +# +# C1 = (K1 * data_range) ** 2 +# C2 = (K2 * data_range) ** 2 +# +# mu1 = _gaussian_filter(X, window, use_padding) +# mu2 = _gaussian_filter(Y, window, use_padding) +# sigma1_sq = _gaussian_filter(X * X, window, use_padding) +# sigma2_sq = _gaussian_filter(Y * Y, window, use_padding) +# sigma12 = _gaussian_filter(X * Y, window, use_padding) +# +# mu1_sq = mu1.pow(2) +# mu2_sq = mu2.pow(2) +# mu1_mu2 = mu1 * mu2 +# +# sigma1_sq = compensation * (sigma1_sq - mu1_sq) +# sigma2_sq = compensation * (sigma2_sq - mu2_sq) +# sigma12 = compensation * (sigma12 - mu1_mu2) +# +# cs_map = (2 * sigma12 + C2) / (sigma1_sq + sigma2_sq + C2) +# # Fixed the issue that the negative value of cs_map caused ms_ssim to output Nan. +# cs_map = cs_map.clamp_min(0.) +# ssim_map = ((2 * mu1_mu2 + C1) / (mu1_sq + mu2_sq + C1)) * cs_map +# +# ssim_val = ssim_map.mean(dim=(1, 2, 3)) # reduce along CHW +# cs = cs_map.mean(dim=(1, 2, 3)) +# +# return ssim_val, cs +# +# +# @torch.jit.script +# def ms_ssim(X, Y, window, data_range: float, weights, use_padding: bool = False, eps: float = 1e-8): +# ''' +# interface of ms-ssim +# :param X: a batch of images, (N,C,H,W) +# :param Y: a batch of images, (N,C,H,W) +# :param window: 1-D gauss kernel +# :param data_range: value range of input images. (usually 1.0 or 255) +# :param weights: weights for different levels +# :param use_padding: padding image before conv +# :param eps: use for avoid grad nan. +# :return: +# ''' +# levels = weights.shape[0] +# cs_vals = [] +# ssim_vals = [] +# for _ in range(levels): +# ssim_val, cs = ssim(X, Y, window=window, data_range=data_range, use_padding=use_padding) +# # Use for fix a issue. When c = a ** b and a is 0, c.backward() will cause the a.grad become inf. +# ssim_val = ssim_val.clamp_min(eps) +# cs = cs.clamp_min(eps) +# cs_vals.append(cs) +# +# ssim_vals.append(ssim_val) +# padding = (X.shape[2] % 2, X.shape[3] % 2) +# X = F.avg_pool2d(X, kernel_size=2, stride=2, padding=padding) +# Y = F.avg_pool2d(Y, kernel_size=2, stride=2, padding=padding) +# +# cs_vals = torch.stack(cs_vals, dim=0) +# ms_ssim_val = torch.prod((cs_vals[:-1] ** weights[:-1].unsqueeze(1)) * (ssim_vals[-1] ** weights[-1]), dim=0) +# return ms_ssim_val +# +# +# class SSIM(torch.jit.ScriptModule): +# __constants__ = ['data_range', 'use_padding'] +# +# def __init__(self, window_size=11, window_sigma=1.5, data_range=255., channel=3, use_padding=False): +# ''' +# :param window_size: the size of gauss kernel +# :param window_sigma: sigma of normal distribution +# :param data_range: value range of input images. (usually 1.0 or 255) +# :param channel: input channels (default: 3) +# :param use_padding: padding image before conv +# ''' +# super().__init__() +# assert window_size % 2 == 1, 'Window size must be odd.' +# window = create_window(window_size, window_sigma, channel) +# self.register_buffer('window', window) +# self.data_range = data_range +# self.use_padding = use_padding +# +# @torch.jit.script_method +# def forward(self, X, Y): +# r = ssim(X, Y, window=self.window, data_range=self.data_range, use_padding=self.use_padding) +# return r[0] +# +# +# class MS_SSIM(torch.jit.ScriptModule): +# __constants__ = ['data_range', 'use_padding', 'eps'] +# +# def __init__(self, window_size=11, window_sigma=1.5, data_range=255., channel=3, use_padding=False, weights=None, +# levels=None, eps=1e-8): +# ''' +# class for ms-ssim +# :param window_size: the size of gauss kernel +# :param window_sigma: sigma of normal distribution +# :param data_range: value range of input images. (usually 1.0 or 255) +# :param channel: input channels +# :param use_padding: padding image before conv +# :param weights: weights for different levels. (default [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]) +# :param levels: number of downsampling +# :param eps: Use for fix a issue. When c = a ** b and a is 0, c.backward() will cause the a.grad become inf. +# ''' +# super().__init__() +# assert window_size % 2 == 1, 'Window size must be odd.' +# self.data_range = data_range +# self.use_padding = use_padding +# self.eps = eps +# +# window = create_window(window_size, window_sigma, channel) +# self.register_buffer('window', window) +# +# if weights is None: +# weights = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333] +# weights = torch.tensor(weights, dtype=torch.float) +# +# if levels is not None: +# weights = weights[:levels] +# weights = weights / weights.sum() +# +# self.register_buffer('weights', weights) +# +# @torch.jit.script_method +# def forward(self, X, Y): +# return ms_ssim(X, Y, window=self.window, data_range=self.data_range, weights=self.weights, +# use_padding=self.use_padding, eps=self.eps) +# +# +# if __name__ == '__main__': +# print('Simple Test') +# im = torch.randint(0, 255, (5, 3, 256, 256), dtype=torch.float, device='cuda') +# img1 = im / 255 +# img2 = img1 * 0.5 +# +# losser = SSIM(data_range=1.).cuda() +# loss = losser(img1, img2).mean() +# +# losser2 = MS_SSIM(data_range=1.).cuda() +# loss2 = losser2(img1, img2).mean() +# +# print(loss.item()) +# print(loss2.item()) +# +# if __name__ == '__main__': +# print('Training Test') +# import cv2 +# import torch.optim +# import numpy as np +# import imageio +# import time +# +# out_test_video = False +# # 最好不要直接输出gif图,会非常大,最好先输出mkv文件后用ffmpeg转换到GIF +# video_use_gif = False +# +# im = cv2.imread('test_img1.jpg', 1) +# t_im = torch.from_numpy(im).cuda().permute(2, 0, 1).float()[None] / 255. +# +# if out_test_video: +# if video_use_gif: +# fps = 0.5 +# out_wh = (im.shape[1] // 2, im.shape[0] // 2) +# suffix = '.gif' +# else: +# fps = 5 +# out_wh = (im.shape[1], im.shape[0]) +# suffix = '.mkv' +# video_last_time = time.perf_counter() +# video = imageio.get_writer('ssim_test' + suffix, fps=fps) +# +# # 测试ssim +# print('Training SSIM') +# rand_im = torch.randint_like(t_im, 0, 255, dtype=torch.float32) / 255. +# rand_im.requires_grad = True +# optim = torch.optim.Adam([rand_im], 0.003, eps=1e-8) +# losser = SSIM(data_range=1., channel=t_im.shape[1]).cuda() +# ssim_score = 0 +# while ssim_score < 0.999: +# optim.zero_grad() +# loss = losser(rand_im, t_im) +# (-loss).sum().backward() +# ssim_score = loss.item() +# optim.step() +# r_im = np.transpose(rand_im.detach().cpu().numpy().clip(0, 1) * 255, [0, 2, 3, 1]).astype(np.uint8)[0] +# r_im = cv2.putText(r_im, 'ssim %f' % ssim_score, (10, 30), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0), 2) +# +# if out_test_video: +# if time.perf_counter() - video_last_time > 1. / fps: +# video_last_time = time.perf_counter() +# out_frame = cv2.cvtColor(r_im, cv2.COLOR_BGR2RGB) +# out_frame = cv2.resize(out_frame, out_wh, interpolation=cv2.INTER_AREA) +# if isinstance(out_frame, cv2.UMat): +# out_frame = out_frame.get() +# video.append_data(out_frame) +# +# cv2.imshow('ssim', r_im) +# cv2.setWindowTitle('ssim', 'ssim %f' % ssim_score) +# cv2.waitKey(1) +# +# if out_test_video: +# video.close() +# +# # 测试ms_ssim +# if out_test_video: +# if video_use_gif: +# fps = 0.5 +# out_wh = (im.shape[1] // 2, im.shape[0] // 2) +# suffix = '.gif' +# else: +# fps = 5 +# out_wh = (im.shape[1], im.shape[0]) +# suffix = '.mkv' +# video_last_time = time.perf_counter() +# video = imageio.get_writer('ms_ssim_test' + suffix, fps=fps) +# +# print('Training MS_SSIM') +# rand_im = torch.randint_like(t_im, 0, 255, dtype=torch.float32) / 255. +# rand_im.requires_grad = True +# optim = torch.optim.Adam([rand_im], 0.003, eps=1e-8) +# losser = MS_SSIM(data_range=1., channel=t_im.shape[1]).cuda() +# ssim_score = 0 +# while ssim_score < 0.999: +# optim.zero_grad() +# loss = losser(rand_im, t_im) +# (-loss).sum().backward() +# ssim_score = loss.item() +# optim.step() +# r_im = np.transpose(rand_im.detach().cpu().numpy().clip(0, 1) * 255, [0, 2, 3, 1]).astype(np.uint8)[0] +# r_im = cv2.putText(r_im, 'ms_ssim %f' % ssim_score, (10, 30), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0), 2) +# +# if out_test_video: +# if time.perf_counter() - video_last_time > 1. / fps: +# video_last_time = time.perf_counter() +# out_frame = cv2.cvtColor(r_im, cv2.COLOR_BGR2RGB) +# out_frame = cv2.resize(out_frame, out_wh, interpolation=cv2.INTER_AREA) +# if isinstance(out_frame, cv2.UMat): +# out_frame = out_frame.get() +# video.append_data(out_frame) +# +# cv2.imshow('ms_ssim', r_im) +# cv2.setWindowTitle('ms_ssim', 'ms_ssim %f' % ssim_score) +# cv2.waitKey(1) +# +# if out_test_video: +# video.close() + +""" +Adapted from https://github.com/Po-Hsun-Su/pytorch-ssim +""" + +import torch +import torch.nn.functional as F +from torch.autograd import Variable +import numpy as np +from math import exp + + +def gaussian(window_size, sigma): + gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)]) + return gauss / gauss.sum() + + +def create_window(window_size, channel): + _1D_window = gaussian(window_size, 1.5).unsqueeze(1) + _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) + window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) + return window + + +def _ssim(img1, img2, window, window_size, channel, size_average=True): + mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel) + mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel) + + mu1_sq = mu1.pow(2) + mu2_sq = mu2.pow(2) + mu1_mu2 = mu1 * mu2 + + sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq + sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq + sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2 + + C1 = 0.01 ** 2 + C2 = 0.03 ** 2 + + ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)) + + if size_average: + return ssim_map.mean() + else: + return ssim_map.mean(1) + + +class SSIM(torch.nn.Module): + def __init__(self, window_size=11, size_average=True): + super(SSIM, self).__init__() + self.window_size = window_size + self.size_average = size_average + self.channel = 1 + self.window = create_window(window_size, self.channel) + + def forward(self, img1, img2): + (_, channel, _, _) = img1.size() + + if channel == self.channel and self.window.data.type() == img1.data.type(): + window = self.window + else: + window = create_window(self.window_size, channel) + + if img1.is_cuda: + window = window.cuda(img1.get_device()) + window = window.type_as(img1) + + self.window = window + self.channel = channel + + return _ssim(img1, img2, window, self.window_size, channel, self.size_average) + + +window = None + + +def ssim(img1, img2, window_size=11, size_average=True): + (_, channel, _, _) = img1.size() + global window + if window is None: + window = create_window(window_size, channel) + if img1.is_cuda: + window = window.cuda(img1.get_device()) + window = window.type_as(img1) + return _ssim(img1, img2, window, window_size, channel, size_average) diff --git a/modules/fastspeech/__pycache__/fs2.cpython-38.pyc b/modules/fastspeech/__pycache__/fs2.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dee5fcdd0a76202567f83557c7acee622e5b6e6e Binary files /dev/null and b/modules/fastspeech/__pycache__/fs2.cpython-38.pyc differ diff --git a/modules/fastspeech/__pycache__/pe.cpython-38.pyc b/modules/fastspeech/__pycache__/pe.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6775c82edef051b7b6ddb3840c487526e026b4d9 Binary files /dev/null and b/modules/fastspeech/__pycache__/pe.cpython-38.pyc differ diff --git a/modules/fastspeech/__pycache__/tts_modules.cpython-38.pyc b/modules/fastspeech/__pycache__/tts_modules.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ce853baf7c08eabd8a9da96f476a2331baa75881 Binary files /dev/null and b/modules/fastspeech/__pycache__/tts_modules.cpython-38.pyc differ diff --git a/modules/fastspeech/fs2.py b/modules/fastspeech/fs2.py new file mode 100644 index 0000000000000000000000000000000000000000..085448a8dd44cdd8a8e106e9bf1b983fae29cb55 --- /dev/null +++ b/modules/fastspeech/fs2.py @@ -0,0 +1,255 @@ +from modules.commons.common_layers import * +from modules.commons.common_layers import Embedding +from modules.fastspeech.tts_modules import FastspeechDecoder, DurationPredictor, LengthRegulator, PitchPredictor, \ + EnergyPredictor, FastspeechEncoder +from utils.cwt import cwt2f0 +from utils.hparams import hparams +from utils.pitch_utils import f0_to_coarse, denorm_f0, norm_f0 + +FS_ENCODERS = { + 'fft': lambda hp: FastspeechEncoder( + hp['hidden_size'], hp['enc_layers'], hp['enc_ffn_kernel_size'], + num_heads=hp['num_heads']), +} + +FS_DECODERS = { + 'fft': lambda hp: FastspeechDecoder( + hp['hidden_size'], hp['dec_layers'], hp['dec_ffn_kernel_size'], hp['num_heads']), +} + + +class FastSpeech2(nn.Module): + def __init__(self, dictionary, out_dims=None): + super().__init__() + # self.dictionary = dictionary + self.padding_idx = 0 + if not hparams['no_fs2'] if 'no_fs2' in hparams.keys() else True: + self.enc_layers = hparams['enc_layers'] + self.dec_layers = hparams['dec_layers'] + self.encoder = FS_ENCODERS[hparams['encoder_type']](hparams) + self.decoder = FS_DECODERS[hparams['decoder_type']](hparams) + self.hidden_size = hparams['hidden_size'] + # self.encoder_embed_tokens = self.build_embedding(self.dictionary, self.hidden_size) + self.out_dims = out_dims + if out_dims is None: + self.out_dims = hparams['audio_num_mel_bins'] + self.mel_out = Linear(self.hidden_size, self.out_dims, bias=True) + #=========not used=========== + # if hparams['use_spk_id']: + # self.spk_embed_proj = Embedding(hparams['num_spk'] + 1, self.hidden_size) + # if hparams['use_split_spk_id']: + # self.spk_embed_f0 = Embedding(hparams['num_spk'] + 1, self.hidden_size) + # self.spk_embed_dur = Embedding(hparams['num_spk'] + 1, self.hidden_size) + # elif hparams['use_spk_embed']: + # self.spk_embed_proj = Linear(256, self.hidden_size, bias=True) + predictor_hidden = hparams['predictor_hidden'] if hparams['predictor_hidden'] > 0 else self.hidden_size + # self.dur_predictor = DurationPredictor( + # self.hidden_size, + # n_chans=predictor_hidden, + # n_layers=hparams['dur_predictor_layers'], + # dropout_rate=hparams['predictor_dropout'], padding=hparams['ffn_padding'], + # kernel_size=hparams['dur_predictor_kernel']) + # self.length_regulator = LengthRegulator() + if hparams['use_pitch_embed']: + self.pitch_embed = Embedding(300, self.hidden_size, self.padding_idx) + if hparams['pitch_type'] == 'cwt': + h = hparams['cwt_hidden_size'] + cwt_out_dims = 10 + if hparams['use_uv']: + cwt_out_dims = cwt_out_dims + 1 + self.cwt_predictor = nn.Sequential( + nn.Linear(self.hidden_size, h), + PitchPredictor( + h, + n_chans=predictor_hidden, + n_layers=hparams['predictor_layers'], + dropout_rate=hparams['predictor_dropout'], odim=cwt_out_dims, + padding=hparams['ffn_padding'], kernel_size=hparams['predictor_kernel'])) + self.cwt_stats_layers = nn.Sequential( + nn.Linear(self.hidden_size, h), nn.ReLU(), + nn.Linear(h, h), nn.ReLU(), nn.Linear(h, 2) + ) + else: + self.pitch_predictor = PitchPredictor( + self.hidden_size, + n_chans=predictor_hidden, + n_layers=hparams['predictor_layers'], + dropout_rate=hparams['predictor_dropout'], + odim=2 if hparams['pitch_type'] == 'frame' else 1, + padding=hparams['ffn_padding'], kernel_size=hparams['predictor_kernel']) + if hparams['use_energy_embed']: + self.energy_embed = Embedding(256, self.hidden_size, self.padding_idx) + # self.energy_predictor = EnergyPredictor( + # self.hidden_size, + # n_chans=predictor_hidden, + # n_layers=hparams['predictor_layers'], + # dropout_rate=hparams['predictor_dropout'], odim=1, + # padding=hparams['ffn_padding'], kernel_size=hparams['predictor_kernel']) + + # def build_embedding(self, dictionary, embed_dim): + # num_embeddings = len(dictionary) + # emb = Embedding(num_embeddings, embed_dim, self.padding_idx) + # return emb + + def forward(self, hubert, mel2ph=None, spk_embed=None, + ref_mels=None, f0=None, uv=None, energy=None, skip_decoder=True, + spk_embed_dur_id=None, spk_embed_f0_id=None, infer=False, **kwargs): + ret = {} + if not hparams['no_fs2'] if 'no_fs2' in hparams.keys() else True: + encoder_out =self.encoder(hubert) # [B, T, C] + else: + encoder_out =hubert + src_nonpadding = (hubert!=0).any(-1)[:,:,None] + + # add ref style embed + # Not implemented + # variance encoder + var_embed = 0 + + # encoder_out_dur denotes encoder outputs for duration predictor + # in speech adaptation, duration predictor use old speaker embedding + if hparams['use_spk_embed']: + spk_embed_dur = spk_embed_f0 = spk_embed = self.spk_embed_proj(spk_embed)[:, None, :] + elif hparams['use_spk_id']: + spk_embed_id = spk_embed + if spk_embed_dur_id is None: + spk_embed_dur_id = spk_embed_id + if spk_embed_f0_id is None: + spk_embed_f0_id = spk_embed_id + spk_embed = self.spk_embed_proj(spk_embed_id)[:, None, :] + spk_embed_dur = spk_embed_f0 = spk_embed + if hparams['use_split_spk_id']: + spk_embed_dur = self.spk_embed_dur(spk_embed_dur_id)[:, None, :] + spk_embed_f0 = self.spk_embed_f0(spk_embed_f0_id)[:, None, :] + else: + spk_embed_dur = spk_embed_f0 = spk_embed = 0 + + # add dur + # dur_inp = (encoder_out + var_embed + spk_embed_dur) * src_nonpadding + + # mel2ph = self.add_dur(dur_inp, mel2ph, hubert, ret) + ret['mel2ph'] = mel2ph + + decoder_inp = F.pad(encoder_out, [0, 0, 1, 0]) + + mel2ph_ = mel2ph[..., None].repeat([1, 1, encoder_out.shape[-1]]) + decoder_inp_origin = decoder_inp = torch.gather(decoder_inp, 1, mel2ph_) # [B, T, H] + + tgt_nonpadding = (mel2ph > 0).float()[:, :, None] + + # add pitch and energy embed + pitch_inp = (decoder_inp_origin + var_embed + spk_embed_f0) * tgt_nonpadding + if hparams['use_pitch_embed']: + pitch_inp_ph = (encoder_out + var_embed + spk_embed_f0) * src_nonpadding + decoder_inp = decoder_inp + self.add_pitch(pitch_inp, f0, uv, mel2ph, ret, encoder_out=pitch_inp_ph) + if hparams['use_energy_embed']: + decoder_inp = decoder_inp + self.add_energy(pitch_inp, energy, ret) + + ret['decoder_inp'] = decoder_inp = (decoder_inp + spk_embed) * tgt_nonpadding + if not hparams['no_fs2'] if 'no_fs2' in hparams.keys() else True: + if skip_decoder: + return ret + ret['mel_out'] = self.run_decoder(decoder_inp, tgt_nonpadding, ret, infer=infer, **kwargs) + + return ret + + def add_dur(self, dur_input, mel2ph, hubert, ret): + src_padding = (hubert==0).all(-1) + dur_input = dur_input.detach() + hparams['predictor_grad'] * (dur_input - dur_input.detach()) + if mel2ph is None: + dur, xs = self.dur_predictor.inference(dur_input, src_padding) + ret['dur'] = xs + ret['dur_choice'] = dur + mel2ph = self.length_regulator(dur, src_padding).detach() + else: + ret['dur'] = self.dur_predictor(dur_input, src_padding) + ret['mel2ph'] = mel2ph + return mel2ph + + def run_decoder(self, decoder_inp, tgt_nonpadding, ret, infer, **kwargs): + x = decoder_inp # [B, T, H] + x = self.decoder(x) + x = self.mel_out(x) + return x * tgt_nonpadding + + def cwt2f0_norm(self, cwt_spec, mean, std, mel2ph): + f0 = cwt2f0(cwt_spec, mean, std, hparams['cwt_scales']) + f0 = torch.cat( + [f0] + [f0[:, -1:]] * (mel2ph.shape[1] - f0.shape[1]), 1) + f0_norm = norm_f0(f0, None, hparams) + return f0_norm + + def out2mel(self, out): + return out + + def add_pitch(self,decoder_inp, f0, uv, mel2ph, ret, encoder_out=None): + # if hparams['pitch_type'] == 'ph': + # pitch_pred_inp = encoder_out.detach() + hparams['predictor_grad'] * (encoder_out - encoder_out.detach()) + # pitch_padding = (encoder_out.sum().abs() == 0) + # ret['pitch_pred'] = pitch_pred = self.pitch_predictor(pitch_pred_inp) + # if f0 is None: + # f0 = pitch_pred[:, :, 0] + # ret['f0_denorm'] = f0_denorm = denorm_f0(f0, None, hparams, pitch_padding=pitch_padding) + # pitch = f0_to_coarse(f0_denorm) # start from 0 [B, T_txt] + # pitch = F.pad(pitch, [1, 0]) + # pitch = torch.gather(pitch, 1, mel2ph) # [B, T_mel] + # pitch_embedding = pitch_embed(pitch) + # return pitch_embedding + + decoder_inp = decoder_inp.detach() + hparams['predictor_grad'] * (decoder_inp - decoder_inp.detach()) + + pitch_padding = (mel2ph == 0) + + # if hparams['pitch_type'] == 'cwt': + # # NOTE: this part of script is *isolated* from other scripts, which means + # # it may not be compatible with the current version. + # pass + # # pitch_padding = None + # # ret['cwt'] = cwt_out = self.cwt_predictor(decoder_inp) + # # stats_out = self.cwt_stats_layers(encoder_out[:, 0, :]) # [B, 2] + # # mean = ret['f0_mean'] = stats_out[:, 0] + # # std = ret['f0_std'] = stats_out[:, 1] + # # cwt_spec = cwt_out[:, :, :10] + # # if f0 is None: + # # std = std * hparams['cwt_std_scale'] + # # f0 = self.cwt2f0_norm(cwt_spec, mean, std, mel2ph) + # # if hparams['use_uv']: + # # assert cwt_out.shape[-1] == 11 + # # uv = cwt_out[:, :, -1] > 0 + # elif hparams['pitch_ar']: + # ret['pitch_pred'] = pitch_pred = self.pitch_predictor(decoder_inp, f0 if is_training else None) + # if f0 is None: + # f0 = pitch_pred[:, :, 0] + # else: + #ret['pitch_pred'] = pitch_pred = self.pitch_predictor(decoder_inp) + # if f0 is None: + # f0 = pitch_pred[:, :, 0] + # if hparams['use_uv'] and uv is None: + # uv = pitch_pred[:, :, 1] > 0 + ret['f0_denorm'] = f0_denorm = denorm_f0(f0, uv, hparams, pitch_padding=pitch_padding) + if pitch_padding is not None: + f0[pitch_padding] = 0 + + pitch = f0_to_coarse(f0_denorm,hparams) # start from 0 + ret['pitch_pred']=pitch.unsqueeze(-1) + # print(ret['pitch_pred'].shape) + # print(pitch.shape) + pitch_embedding = self.pitch_embed(pitch) + return pitch_embedding + + def add_energy(self,decoder_inp, energy, ret): + decoder_inp = decoder_inp.detach() + hparams['predictor_grad'] * (decoder_inp - decoder_inp.detach()) + ret['energy_pred'] = energy#energy_pred = self.energy_predictor(decoder_inp)[:, :, 0] + # if energy is None: + # energy = energy_pred + energy = torch.clamp(energy * 256 // 4, max=255).long() # energy_to_coarse + energy_embedding = self.energy_embed(energy) + return energy_embedding + + @staticmethod + def mel_norm(x): + return (x + 5.5) / (6.3 / 2) - 1 + + @staticmethod + def mel_denorm(x): + return (x + 1) * (6.3 / 2) - 5.5 diff --git a/modules/fastspeech/pe.py b/modules/fastspeech/pe.py new file mode 100644 index 0000000000000000000000000000000000000000..da0d46e3446bbf45d8ee3682edcaf0d8d64dcdfb --- /dev/null +++ b/modules/fastspeech/pe.py @@ -0,0 +1,149 @@ +from modules.commons.common_layers import * +from utils.hparams import hparams +from modules.fastspeech.tts_modules import PitchPredictor +from utils.pitch_utils import denorm_f0 + + +class Prenet(nn.Module): + def __init__(self, in_dim=80, out_dim=256, kernel=5, n_layers=3, strides=None): + super(Prenet, self).__init__() + padding = kernel // 2 + self.layers = [] + self.strides = strides if strides is not None else [1] * n_layers + for l in range(n_layers): + self.layers.append(nn.Sequential( + nn.Conv1d(in_dim, out_dim, kernel_size=kernel, padding=padding, stride=self.strides[l]), + nn.ReLU(), + nn.BatchNorm1d(out_dim) + )) + in_dim = out_dim + self.layers = nn.ModuleList(self.layers) + self.out_proj = nn.Linear(out_dim, out_dim) + + def forward(self, x): + """ + + :param x: [B, T, 80] + :return: [L, B, T, H], [B, T, H] + """ + # padding_mask = x.abs().sum(-1).eq(0).data # [B, T] + padding_mask = x.abs().sum(-1).eq(0).detach() + nonpadding_mask_TB = 1 - padding_mask.float()[:, None, :] # [B, 1, T] + x = x.transpose(1, 2) + hiddens = [] + for i, l in enumerate(self.layers): + nonpadding_mask_TB = nonpadding_mask_TB[:, :, ::self.strides[i]] + x = l(x) * nonpadding_mask_TB + hiddens.append(x) + hiddens = torch.stack(hiddens, 0) # [L, B, H, T] + hiddens = hiddens.transpose(2, 3) # [L, B, T, H] + x = self.out_proj(x.transpose(1, 2)) # [B, T, H] + x = x * nonpadding_mask_TB.transpose(1, 2) + return hiddens, x + + +class ConvBlock(nn.Module): + def __init__(self, idim=80, n_chans=256, kernel_size=3, stride=1, norm='gn', dropout=0): + super().__init__() + self.conv = ConvNorm(idim, n_chans, kernel_size, stride=stride) + self.norm = norm + if self.norm == 'bn': + self.norm = nn.BatchNorm1d(n_chans) + elif self.norm == 'in': + self.norm = nn.InstanceNorm1d(n_chans, affine=True) + elif self.norm == 'gn': + self.norm = nn.GroupNorm(n_chans // 16, n_chans) + elif self.norm == 'ln': + self.norm = LayerNorm(n_chans // 16, n_chans) + elif self.norm == 'wn': + self.conv = torch.nn.utils.weight_norm(self.conv.conv) + self.dropout = nn.Dropout(dropout) + self.relu = nn.ReLU() + + def forward(self, x): + """ + + :param x: [B, C, T] + :return: [B, C, T] + """ + x = self.conv(x) + if not isinstance(self.norm, str): + if self.norm == 'none': + pass + elif self.norm == 'ln': + x = self.norm(x.transpose(1, 2)).transpose(1, 2) + else: + x = self.norm(x) + x = self.relu(x) + x = self.dropout(x) + return x + + +class ConvStacks(nn.Module): + def __init__(self, idim=80, n_layers=5, n_chans=256, odim=32, kernel_size=5, norm='gn', + dropout=0, strides=None, res=True): + super().__init__() + self.conv = torch.nn.ModuleList() + self.kernel_size = kernel_size + self.res = res + self.in_proj = Linear(idim, n_chans) + if strides is None: + strides = [1] * n_layers + else: + assert len(strides) == n_layers + for idx in range(n_layers): + self.conv.append(ConvBlock( + n_chans, n_chans, kernel_size, stride=strides[idx], norm=norm, dropout=dropout)) + self.out_proj = Linear(n_chans, odim) + + def forward(self, x, return_hiddens=False): + """ + + :param x: [B, T, H] + :return: [B, T, H] + """ + x = self.in_proj(x) + x = x.transpose(1, -1) # (B, idim, Tmax) + hiddens = [] + for f in self.conv: + x_ = f(x) + x = x + x_ if self.res else x_ # (B, C, Tmax) + hiddens.append(x) + x = x.transpose(1, -1) + x = self.out_proj(x) # (B, Tmax, H) + if return_hiddens: + hiddens = torch.stack(hiddens, 1) # [B, L, C, T] + return x, hiddens + return x + + +class PitchExtractor(nn.Module): + def __init__(self, n_mel_bins=80, conv_layers=2): + super().__init__() + self.hidden_size = hparams['hidden_size'] + self.predictor_hidden = hparams['predictor_hidden'] if hparams['predictor_hidden'] > 0 else self.hidden_size + self.conv_layers = conv_layers + + self.mel_prenet = Prenet(n_mel_bins, self.hidden_size, strides=[1, 1, 1]) + if self.conv_layers > 0: + self.mel_encoder = ConvStacks( + idim=self.hidden_size, n_chans=self.hidden_size, odim=self.hidden_size, n_layers=self.conv_layers) + self.pitch_predictor = PitchPredictor( + self.hidden_size, n_chans=self.predictor_hidden, + n_layers=5, dropout_rate=0.1, odim=2, + padding=hparams['ffn_padding'], kernel_size=hparams['predictor_kernel']) + + def forward(self, mel_input=None): + ret = {} + mel_hidden = self.mel_prenet(mel_input)[1] + if self.conv_layers > 0: + mel_hidden = self.mel_encoder(mel_hidden) + + ret['pitch_pred'] = pitch_pred = self.pitch_predictor(mel_hidden) + + pitch_padding = mel_input.abs().sum(-1) == 0 + use_uv = hparams['pitch_type'] == 'frame' #and hparams['use_uv'] + ret['f0_denorm_pred'] = denorm_f0( + pitch_pred[:, :, 0], (pitch_pred[:, :, 1] > 0) if use_uv else None, + hparams, pitch_padding=pitch_padding) + return ret \ No newline at end of file diff --git a/modules/fastspeech/tts_modules.py b/modules/fastspeech/tts_modules.py new file mode 100644 index 0000000000000000000000000000000000000000..3fdd417d3cc354a2d65f905326f8fe053dea6d97 --- /dev/null +++ b/modules/fastspeech/tts_modules.py @@ -0,0 +1,364 @@ +import logging +import math + +import torch +import torch.nn as nn +from torch.nn import functional as F + +from modules.commons.espnet_positional_embedding import RelPositionalEncoding +from modules.commons.common_layers import SinusoidalPositionalEmbedding, Linear, EncSALayer, DecSALayer, BatchNorm1dTBC +from utils.hparams import hparams + +DEFAULT_MAX_SOURCE_POSITIONS = 2000 +DEFAULT_MAX_TARGET_POSITIONS = 2000 + + +class TransformerEncoderLayer(nn.Module): + def __init__(self, hidden_size, dropout, kernel_size=None, num_heads=2, norm='ln'): + super().__init__() + self.hidden_size = hidden_size + self.dropout = dropout + self.num_heads = num_heads + self.op = EncSALayer( + hidden_size, num_heads, dropout=dropout, + attention_dropout=0.0, relu_dropout=dropout, + kernel_size=kernel_size + if kernel_size is not None else hparams['enc_ffn_kernel_size'], + padding=hparams['ffn_padding'], + norm=norm, act=hparams['ffn_act']) + + def forward(self, x, **kwargs): + return self.op(x, **kwargs) + + +###################### +# fastspeech modules +###################### +class LayerNorm(torch.nn.LayerNorm): + """Layer normalization module. + :param int nout: output dim size + :param int dim: dimension to be normalized + """ + + def __init__(self, nout, dim=-1): + """Construct an LayerNorm object.""" + super(LayerNorm, self).__init__(nout, eps=1e-12) + self.dim = dim + + def forward(self, x): + """Apply layer normalization. + :param torch.Tensor x: input tensor + :return: layer normalized tensor + :rtype torch.Tensor + """ + if self.dim == -1: + return super(LayerNorm, self).forward(x) + return super(LayerNorm, self).forward(x.transpose(1, -1)).transpose(1, -1) + + +class DurationPredictor(torch.nn.Module): + """Duration predictor module. + This is a module of duration predictor described in `FastSpeech: Fast, Robust and Controllable Text to Speech`_. + The duration predictor predicts a duration of each frame in log domain from the hidden embeddings of encoder. + .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`: + https://arxiv.org/pdf/1905.09263.pdf + Note: + The calculation domain of outputs is different between in `forward` and in `inference`. In `forward`, + the outputs are calculated in log domain but in `inference`, those are calculated in linear domain. + """ + + def __init__(self, idim, n_layers=2, n_chans=384, kernel_size=3, dropout_rate=0.1, offset=1.0, padding='SAME'): + """Initilize duration predictor module. + Args: + idim (int): Input dimension. + n_layers (int, optional): Number of convolutional layers. + n_chans (int, optional): Number of channels of convolutional layers. + kernel_size (int, optional): Kernel size of convolutional layers. + dropout_rate (float, optional): Dropout rate. + offset (float, optional): Offset value to avoid nan in log domain. + """ + super(DurationPredictor, self).__init__() + self.offset = offset + self.conv = torch.nn.ModuleList() + self.kernel_size = kernel_size + self.padding = padding + for idx in range(n_layers): + in_chans = idim if idx == 0 else n_chans + self.conv += [torch.nn.Sequential( + torch.nn.ConstantPad1d(((kernel_size - 1) // 2, (kernel_size - 1) // 2) + if padding == 'SAME' + else (kernel_size - 1, 0), 0), + torch.nn.Conv1d(in_chans, n_chans, kernel_size, stride=1, padding=0), + torch.nn.ReLU(), + LayerNorm(n_chans, dim=1), + torch.nn.Dropout(dropout_rate) + )] + if hparams['dur_loss'] in ['mse', 'huber']: + odims = 1 + elif hparams['dur_loss'] == 'mog': + odims = 15 + elif hparams['dur_loss'] == 'crf': + odims = 32 + from torchcrf import CRF + self.crf = CRF(odims, batch_first=True) + self.linear = torch.nn.Linear(n_chans, odims) + + def _forward(self, xs, x_masks=None, is_inference=False): + xs = xs.transpose(1, -1) # (B, idim, Tmax) + for f in self.conv: + xs = f(xs) # (B, C, Tmax) + if x_masks is not None: + xs = xs * (1 - x_masks.float())[:, None, :] + + xs = self.linear(xs.transpose(1, -1)) # [B, T, C] + xs = xs * (1 - x_masks.float())[:, :, None] # (B, T, C) + if is_inference: + return self.out2dur(xs), xs + else: + if hparams['dur_loss'] in ['mse']: + xs = xs.squeeze(-1) # (B, Tmax) + return xs + + def out2dur(self, xs): + if hparams['dur_loss'] in ['mse']: + # NOTE: calculate in log domain + xs = xs.squeeze(-1) # (B, Tmax) + dur = torch.clamp(torch.round(xs.exp() - self.offset), min=0).long() # avoid negative value + elif hparams['dur_loss'] == 'mog': + return NotImplementedError + elif hparams['dur_loss'] == 'crf': + dur = torch.LongTensor(self.crf.decode(xs)).cuda() + return dur + + def forward(self, xs, x_masks=None): + """Calculate forward propagation. + Args: + xs (Tensor): Batch of input sequences (B, Tmax, idim). + x_masks (ByteTensor, optional): Batch of masks indicating padded part (B, Tmax). + Returns: + Tensor: Batch of predicted durations in log domain (B, Tmax). + """ + return self._forward(xs, x_masks, False) + + def inference(self, xs, x_masks=None): + """Inference duration. + Args: + xs (Tensor): Batch of input sequences (B, Tmax, idim). + x_masks (ByteTensor, optional): Batch of masks indicating padded part (B, Tmax). + Returns: + LongTensor: Batch of predicted durations in linear domain (B, Tmax). + """ + return self._forward(xs, x_masks, True) + + +class LengthRegulator(torch.nn.Module): + def __init__(self, pad_value=0.0): + super(LengthRegulator, self).__init__() + self.pad_value = pad_value + + def forward(self, dur, dur_padding=None, alpha=1.0): + """ + Example (no batch dim version): + 1. dur = [2,2,3] + 2. token_idx = [[1],[2],[3]], dur_cumsum = [2,4,7], dur_cumsum_prev = [0,2,4] + 3. token_mask = [[1,1,0,0,0,0,0], + [0,0,1,1,0,0,0], + [0,0,0,0,1,1,1]] + 4. token_idx * token_mask = [[1,1,0,0,0,0,0], + [0,0,2,2,0,0,0], + [0,0,0,0,3,3,3]] + 5. (token_idx * token_mask).sum(0) = [1,1,2,2,3,3,3] + + :param dur: Batch of durations of each frame (B, T_txt) + :param dur_padding: Batch of padding of each frame (B, T_txt) + :param alpha: duration rescale coefficient + :return: + mel2ph (B, T_speech) + """ + assert alpha > 0 + dur = torch.round(dur.float() * alpha).long() + if dur_padding is not None: + dur = dur * (1 - dur_padding.long()) + token_idx = torch.arange(1, dur.shape[1] + 1)[None, :, None].to(dur.device) + dur_cumsum = torch.cumsum(dur, 1) + dur_cumsum_prev = F.pad(dur_cumsum, [1, -1], mode='constant', value=0) + + pos_idx = torch.arange(dur.sum(-1).max())[None, None].to(dur.device) + token_mask = (pos_idx >= dur_cumsum_prev[:, :, None]) & (pos_idx < dur_cumsum[:, :, None]) + mel2ph = (token_idx * token_mask.long()).sum(1) + return mel2ph + + +class PitchPredictor(torch.nn.Module): + def __init__(self, idim, n_layers=5, n_chans=384, odim=2, kernel_size=5, + dropout_rate=0.1, padding='SAME'): + """Initilize pitch predictor module. + Args: + idim (int): Input dimension. + n_layers (int, optional): Number of convolutional layers. + n_chans (int, optional): Number of channels of convolutional layers. + kernel_size (int, optional): Kernel size of convolutional layers. + dropout_rate (float, optional): Dropout rate. + """ + super(PitchPredictor, self).__init__() + self.conv = torch.nn.ModuleList() + self.kernel_size = kernel_size + self.padding = padding + for idx in range(n_layers): + in_chans = idim if idx == 0 else n_chans + self.conv += [torch.nn.Sequential( + torch.nn.ConstantPad1d(((kernel_size - 1) // 2, (kernel_size - 1) // 2) + if padding == 'SAME' + else (kernel_size - 1, 0), 0), + torch.nn.Conv1d(in_chans, n_chans, kernel_size, stride=1, padding=0), + torch.nn.ReLU(), + LayerNorm(n_chans, dim=1), + torch.nn.Dropout(dropout_rate) + )] + self.linear = torch.nn.Linear(n_chans, odim) + self.embed_positions = SinusoidalPositionalEmbedding(idim, 0, init_size=4096) + self.pos_embed_alpha = nn.Parameter(torch.Tensor([1])) + + def forward(self, xs): + """ + + :param xs: [B, T, H] + :return: [B, T, H] + """ + positions = self.pos_embed_alpha * self.embed_positions(xs[..., 0]) + xs = xs + positions + xs = xs.transpose(1, -1) # (B, idim, Tmax) + for f in self.conv: + xs = f(xs) # (B, C, Tmax) + # NOTE: calculate in log domain + xs = self.linear(xs.transpose(1, -1)) # (B, Tmax, H) + return xs + + +class EnergyPredictor(PitchPredictor): + pass + + +def mel2ph_to_dur(mel2ph, T_txt, max_dur=None): + B, _ = mel2ph.shape + dur = mel2ph.new_zeros(B, T_txt + 1).scatter_add(1, mel2ph, torch.ones_like(mel2ph)) + dur = dur[:, 1:] + if max_dur is not None: + dur = dur.clamp(max=max_dur) + return dur + + +class FFTBlocks(nn.Module): + def __init__(self, hidden_size, num_layers, ffn_kernel_size=9, dropout=None, num_heads=2, + use_pos_embed=True, use_last_norm=True, norm='ln', use_pos_embed_alpha=True): + super().__init__() + self.num_layers = num_layers + embed_dim = self.hidden_size = hidden_size + self.dropout = dropout if dropout is not None else hparams['dropout'] + self.use_pos_embed = use_pos_embed + self.use_last_norm = use_last_norm + if use_pos_embed: + self.max_source_positions = DEFAULT_MAX_TARGET_POSITIONS + self.padding_idx = 0 + self.pos_embed_alpha = nn.Parameter(torch.Tensor([1])) if use_pos_embed_alpha else 1 + self.embed_positions = SinusoidalPositionalEmbedding( + embed_dim, self.padding_idx, init_size=DEFAULT_MAX_TARGET_POSITIONS, + ) + + self.layers = nn.ModuleList([]) + self.layers.extend([ + TransformerEncoderLayer(self.hidden_size, self.dropout, + kernel_size=ffn_kernel_size, num_heads=num_heads) + for _ in range(self.num_layers) + ]) + if self.use_last_norm: + if norm == 'ln': + self.layer_norm = nn.LayerNorm(embed_dim) + elif norm == 'bn': + self.layer_norm = BatchNorm1dTBC(embed_dim) + else: + self.layer_norm = None + + def forward(self, x, padding_mask=None, attn_mask=None, return_hiddens=False): + """ + :param x: [B, T, C] + :param padding_mask: [B, T] + :return: [B, T, C] or [L, B, T, C] + """ + # padding_mask = x.abs().sum(-1).eq(0).data if padding_mask is None else padding_mask + padding_mask = x.abs().sum(-1).eq(0).detach() if padding_mask is None else padding_mask + nonpadding_mask_TB = 1 - padding_mask.transpose(0, 1).float()[:, :, None] # [T, B, 1] + if self.use_pos_embed: + positions = self.pos_embed_alpha * self.embed_positions(x[..., 0]) + x = x + positions + x = F.dropout(x, p=self.dropout, training=self.training) + # B x T x C -> T x B x C + x = x.transpose(0, 1) * nonpadding_mask_TB + hiddens = [] + for layer in self.layers: + x = layer(x, encoder_padding_mask=padding_mask, attn_mask=attn_mask) * nonpadding_mask_TB + hiddens.append(x) + if self.use_last_norm: + x = self.layer_norm(x) * nonpadding_mask_TB + if return_hiddens: + x = torch.stack(hiddens, 0) # [L, T, B, C] + x = x.transpose(1, 2) # [L, B, T, C] + else: + x = x.transpose(0, 1) # [B, T, C] + return x + + +class FastspeechEncoder(FFTBlocks): + ''' + compared to FFTBlocks: + - input is [B, T, H], not [B, T, C] + - supports "relative" positional encoding + ''' + def __init__(self, hidden_size=None, num_layers=None, kernel_size=None, num_heads=2): + hidden_size = hparams['hidden_size'] if hidden_size is None else hidden_size + kernel_size = hparams['enc_ffn_kernel_size'] if kernel_size is None else kernel_size + num_layers = hparams['dec_layers'] if num_layers is None else num_layers + super().__init__(hidden_size, num_layers, kernel_size, num_heads=num_heads, + use_pos_embed=False) # use_pos_embed_alpha for compatibility + #self.embed_tokens = embed_tokens + self.embed_scale = math.sqrt(hidden_size) + self.padding_idx = 0 + if hparams.get('rel_pos') is not None and hparams['rel_pos']: + self.embed_positions = RelPositionalEncoding(hidden_size, dropout_rate=0.0) + else: + self.embed_positions = SinusoidalPositionalEmbedding( + hidden_size, self.padding_idx, init_size=DEFAULT_MAX_TARGET_POSITIONS, + ) + + def forward(self, hubert): + """ + + :param hubert: [B, T, H ] + :return: { + 'encoder_out': [T x B x C] + } + """ + # encoder_padding_mask = txt_tokens.eq(self.padding_idx).data + encoder_padding_mask = (hubert==0).all(-1) + x = self.forward_embedding(hubert) # [B, T, H] + x = super(FastspeechEncoder, self).forward(x, encoder_padding_mask) + return x + + def forward_embedding(self, hubert): + # embed tokens and positions + x = self.embed_scale * hubert + if hparams['use_pos_embed']: + positions = self.embed_positions(hubert) + x = x + positions + x = F.dropout(x, p=self.dropout, training=self.training) + return x + + +class FastspeechDecoder(FFTBlocks): + def __init__(self, hidden_size=None, num_layers=None, kernel_size=None, num_heads=None): + num_heads = hparams['num_heads'] if num_heads is None else num_heads + hidden_size = hparams['hidden_size'] if hidden_size is None else hidden_size + kernel_size = hparams['dec_ffn_kernel_size'] if kernel_size is None else kernel_size + num_layers = hparams['dec_layers'] if num_layers is None else num_layers + super().__init__(hidden_size, num_layers, kernel_size, num_heads=num_heads) + diff --git a/modules/hifigan/__pycache__/hifigan.cpython-38.pyc b/modules/hifigan/__pycache__/hifigan.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e8491c1afef1e7e079b27962e8cb06b6efea0b4 Binary files /dev/null and b/modules/hifigan/__pycache__/hifigan.cpython-38.pyc differ diff --git a/modules/hifigan/hifigan.py b/modules/hifigan/hifigan.py new file mode 100644 index 0000000000000000000000000000000000000000..ae7e61f56b00d60bcc49a18ece3edbe54746f7ea --- /dev/null +++ b/modules/hifigan/hifigan.py @@ -0,0 +1,365 @@ +import torch +import torch.nn.functional as F +import torch.nn as nn +from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d +from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm + +from modules.parallel_wavegan.layers import UpsampleNetwork, ConvInUpsampleNetwork +from modules.parallel_wavegan.models.source import SourceModuleHnNSF +import numpy as np + +LRELU_SLOPE = 0.1 + + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +def apply_weight_norm(m): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + weight_norm(m) + + +def get_padding(kernel_size, dilation=1): + return int((kernel_size * dilation - dilation) / 2) + + +class ResBlock1(torch.nn.Module): + def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)): + super(ResBlock1, self).__init__() + self.h = h + self.convs1 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]))) + ]) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))) + ]) + self.convs2.apply(init_weights) + + def forward(self, x): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + xt = c2(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_weight_norm(l) + for l in self.convs2: + remove_weight_norm(l) + + +class ResBlock2(torch.nn.Module): + def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)): + super(ResBlock2, self).__init__() + self.h = h + self.convs = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))) + ]) + self.convs.apply(init_weights) + + def forward(self, x): + for c in self.convs: + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs: + remove_weight_norm(l) + + +class Conv1d1x1(Conv1d): + """1x1 Conv1d with customized initialization.""" + + def __init__(self, in_channels, out_channels, bias): + """Initialize 1x1 Conv1d module.""" + super(Conv1d1x1, self).__init__(in_channels, out_channels, + kernel_size=1, padding=0, + dilation=1, bias=bias) + + +class HifiGanGenerator(torch.nn.Module): + def __init__(self, h, c_out=1): + super(HifiGanGenerator, self).__init__() + self.h = h + self.num_kernels = len(h['resblock_kernel_sizes']) + self.num_upsamples = len(h['upsample_rates']) + + if h['use_pitch_embed']: + self.harmonic_num = 8 + self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(h['upsample_rates'])) + self.m_source = SourceModuleHnNSF( + sampling_rate=h['audio_sample_rate'], + harmonic_num=self.harmonic_num) + self.noise_convs = nn.ModuleList() + self.conv_pre = weight_norm(Conv1d(80, h['upsample_initial_channel'], 7, 1, padding=3)) + resblock = ResBlock1 if h['resblock'] == '1' else ResBlock2 + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(h['upsample_rates'], h['upsample_kernel_sizes'])): + c_cur = h['upsample_initial_channel'] // (2 ** (i + 1)) + self.ups.append(weight_norm( + ConvTranspose1d(c_cur * 2, c_cur, k, u, padding=(k - u) // 2))) + if h['use_pitch_embed']: + if i + 1 < len(h['upsample_rates']): + stride_f0 = np.prod(h['upsample_rates'][i + 1:]) + self.noise_convs.append(Conv1d( + 1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=stride_f0 // 2)) + else: + self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1)) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = h['upsample_initial_channel'] // (2 ** (i + 1)) + for j, (k, d) in enumerate(zip(h['resblock_kernel_sizes'], h['resblock_dilation_sizes'])): + self.resblocks.append(resblock(h, ch, k, d)) + + self.conv_post = weight_norm(Conv1d(ch, c_out, 7, 1, padding=3)) + self.ups.apply(init_weights) + self.conv_post.apply(init_weights) + + def forward(self, x, f0=None): + if f0 is not None: + # harmonic-source signal, noise-source signal, uv flag + f0 = self.f0_upsamp(f0[:, None]).transpose(1, 2) + har_source, noi_source, uv = self.m_source(f0) + har_source = har_source.transpose(1, 2) + + x = self.conv_pre(x) + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + x = self.ups[i](x) + if f0 is not None: + x_source = self.noise_convs[i](har_source) + x = x + x_source + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + print('Removing weight norm...') + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + remove_weight_norm(self.conv_pre) + remove_weight_norm(self.conv_post) + + +class DiscriminatorP(torch.nn.Module): + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False, use_cond=False, c_in=1): + super(DiscriminatorP, self).__init__() + self.use_cond = use_cond + if use_cond: + from utils.hparams import hparams + t = hparams['hop_size'] + self.cond_net = torch.nn.ConvTranspose1d(80, 1, t * 2, stride=t, padding=t // 2) + c_in = 2 + + self.period = period + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv2d(c_in, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))), + ]) + self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x, mel): + fmap = [] + if self.use_cond: + x_mel = self.cond_net(mel) + x = torch.cat([x_mel, x], 1) + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiPeriodDiscriminator(torch.nn.Module): + def __init__(self, use_cond=False, c_in=1): + super(MultiPeriodDiscriminator, self).__init__() + self.discriminators = nn.ModuleList([ + DiscriminatorP(2, use_cond=use_cond, c_in=c_in), + DiscriminatorP(3, use_cond=use_cond, c_in=c_in), + DiscriminatorP(5, use_cond=use_cond, c_in=c_in), + DiscriminatorP(7, use_cond=use_cond, c_in=c_in), + DiscriminatorP(11, use_cond=use_cond, c_in=c_in), + ]) + + def forward(self, y, y_hat, mel=None): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y, mel) + y_d_g, fmap_g = d(y_hat, mel) + y_d_rs.append(y_d_r) + fmap_rs.append(fmap_r) + y_d_gs.append(y_d_g) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class DiscriminatorS(torch.nn.Module): + def __init__(self, use_spectral_norm=False, use_cond=False, upsample_rates=None, c_in=1): + super(DiscriminatorS, self).__init__() + self.use_cond = use_cond + if use_cond: + t = np.prod(upsample_rates) + self.cond_net = torch.nn.ConvTranspose1d(80, 1, t * 2, stride=t, padding=t // 2) + c_in = 2 + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv1d(c_in, 128, 15, 1, padding=7)), + norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)), + norm_f(Conv1d(128, 256, 41, 2, groups=16, padding=20)), + norm_f(Conv1d(256, 512, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(512, 1024, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(1024, 1024, 41, 1, groups=16, padding=20)), + norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), + ]) + self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x, mel): + if self.use_cond: + x_mel = self.cond_net(mel) + x = torch.cat([x_mel, x], 1) + fmap = [] + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiScaleDiscriminator(torch.nn.Module): + def __init__(self, use_cond=False, c_in=1): + super(MultiScaleDiscriminator, self).__init__() + from utils.hparams import hparams + self.discriminators = nn.ModuleList([ + DiscriminatorS(use_spectral_norm=True, use_cond=use_cond, + upsample_rates=[4, 4, hparams['hop_size'] // 16], + c_in=c_in), + DiscriminatorS(use_cond=use_cond, + upsample_rates=[4, 4, hparams['hop_size'] // 32], + c_in=c_in), + DiscriminatorS(use_cond=use_cond, + upsample_rates=[4, 4, hparams['hop_size'] // 64], + c_in=c_in), + ]) + self.meanpools = nn.ModuleList([ + AvgPool1d(4, 2, padding=1), + AvgPool1d(4, 2, padding=1) + ]) + + def forward(self, y, y_hat, mel=None): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + if i != 0: + y = self.meanpools[i - 1](y) + y_hat = self.meanpools[i - 1](y_hat) + y_d_r, fmap_r = d(y, mel) + y_d_g, fmap_g = d(y_hat, mel) + y_d_rs.append(y_d_r) + fmap_rs.append(fmap_r) + y_d_gs.append(y_d_g) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +def feature_loss(fmap_r, fmap_g): + loss = 0 + for dr, dg in zip(fmap_r, fmap_g): + for rl, gl in zip(dr, dg): + loss += torch.mean(torch.abs(rl - gl)) + + return loss * 2 + + +def discriminator_loss(disc_real_outputs, disc_generated_outputs): + r_losses = 0 + g_losses = 0 + for dr, dg in zip(disc_real_outputs, disc_generated_outputs): + r_loss = torch.mean((1 - dr) ** 2) + g_loss = torch.mean(dg ** 2) + r_losses += r_loss + g_losses += g_loss + r_losses = r_losses / len(disc_real_outputs) + g_losses = g_losses / len(disc_real_outputs) + return r_losses, g_losses + + +def cond_discriminator_loss(outputs): + loss = 0 + for dg in outputs: + g_loss = torch.mean(dg ** 2) + loss += g_loss + loss = loss / len(outputs) + return loss + + +def generator_loss(disc_outputs): + loss = 0 + for dg in disc_outputs: + l = torch.mean((1 - dg) ** 2) + loss += l + loss = loss / len(disc_outputs) + return loss diff --git a/modules/hifigan/mel_utils.py b/modules/hifigan/mel_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..06e0f7d4d16fa3e4aefc8949347455f5a6e938da --- /dev/null +++ b/modules/hifigan/mel_utils.py @@ -0,0 +1,80 @@ +import numpy as np +import torch +import torch.utils.data +from librosa.filters import mel as librosa_mel_fn +from scipy.io.wavfile import read + +MAX_WAV_VALUE = 32768.0 + + +def load_wav(full_path): + sampling_rate, data = read(full_path) + return data, sampling_rate + + +def dynamic_range_compression(x, C=1, clip_val=1e-5): + return np.log(np.clip(x, a_min=clip_val, a_max=None) * C) + + +def dynamic_range_decompression(x, C=1): + return np.exp(x) / C + + +def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): + return torch.log(torch.clamp(x, min=clip_val) * C) + + +def dynamic_range_decompression_torch(x, C=1): + return torch.exp(x) / C + + +def spectral_normalize_torch(magnitudes): + output = dynamic_range_compression_torch(magnitudes) + return output + + +def spectral_de_normalize_torch(magnitudes): + output = dynamic_range_decompression_torch(magnitudes) + return output + + +mel_basis = {} +hann_window = {} + + +def mel_spectrogram(y, hparams, center=False, complex=False): + # hop_size: 512 # For 22050Hz, 275 ~= 12.5 ms (0.0125 * sample_rate) + # win_size: 2048 # For 22050Hz, 1100 ~= 50 ms (If None, win_size: fft_size) (0.05 * sample_rate) + # fmin: 55 # Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To test depending on dataset. Pitch info: male~[65, 260], female~[100, 525]) + # fmax: 10000 # To be increased/reduced depending on data. + # fft_size: 2048 # Extra window size is filled with 0 paddings to match this parameter + # n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, + n_fft = hparams['fft_size'] + num_mels = hparams['audio_num_mel_bins'] + sampling_rate = hparams['audio_sample_rate'] + hop_size = hparams['hop_size'] + win_size = hparams['win_size'] + fmin = hparams['fmin'] + fmax = hparams['fmax'] + y = y.clamp(min=-1., max=1.) + global mel_basis, hann_window + if fmax not in mel_basis: + mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax) + mel_basis[str(fmax) + '_' + str(y.device)] = torch.from_numpy(mel).float().to(y.device) + hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device) + + y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), + mode='reflect') + y = y.squeeze(1) + + spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[str(y.device)], + center=center, pad_mode='reflect', normalized=False, onesided=True) + + if not complex: + spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9)) + spec = torch.matmul(mel_basis[str(fmax) + '_' + str(y.device)], spec) + spec = spectral_normalize_torch(spec) + else: + B, C, T, _ = spec.shape + spec = spec.transpose(1, 2) # [B, T, n_fft, 2] + return spec diff --git a/modules/nsf_hifigan/__pycache__/env.cpython-38.pyc b/modules/nsf_hifigan/__pycache__/env.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e221d3323729f3a79720201e7e1601fba9cdbf32 Binary files /dev/null and b/modules/nsf_hifigan/__pycache__/env.cpython-38.pyc differ diff --git a/modules/nsf_hifigan/__pycache__/models.cpython-38.pyc b/modules/nsf_hifigan/__pycache__/models.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d79fb29e052aac1f1d9961e2bf745385947aa88 Binary files /dev/null and b/modules/nsf_hifigan/__pycache__/models.cpython-38.pyc differ diff --git a/modules/nsf_hifigan/__pycache__/nvSTFT.cpython-38.pyc b/modules/nsf_hifigan/__pycache__/nvSTFT.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e19b55b614866765e2de2048952b21780cab49f Binary files /dev/null and b/modules/nsf_hifigan/__pycache__/nvSTFT.cpython-38.pyc differ diff --git a/modules/nsf_hifigan/__pycache__/utils.cpython-38.pyc b/modules/nsf_hifigan/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..39e546b658232b77a1250b0e54fee8a7a56e070f Binary files /dev/null and b/modules/nsf_hifigan/__pycache__/utils.cpython-38.pyc differ diff --git a/modules/nsf_hifigan/env.py b/modules/nsf_hifigan/env.py new file mode 100644 index 0000000000000000000000000000000000000000..02a2739a3eb25764527a14b46347ee386b0241b0 --- /dev/null +++ b/modules/nsf_hifigan/env.py @@ -0,0 +1,15 @@ +import os +import shutil + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +def build_env(config, config_name, path): + t_path = os.path.join(path, config_name) + if config != t_path: + os.makedirs(path, exist_ok=True) + shutil.copyfile(config, os.path.join(path, config_name)) \ No newline at end of file diff --git a/modules/nsf_hifigan/models.py b/modules/nsf_hifigan/models.py new file mode 100644 index 0000000000000000000000000000000000000000..7c82cc8cfcdcf2fb2cc2cb358573f4b15f12392d --- /dev/null +++ b/modules/nsf_hifigan/models.py @@ -0,0 +1,549 @@ +import os +import json +from .env import AttrDict +import numpy as np +import torch +import torch.nn.functional as F +import torch.nn as nn +from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d +from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm +from .utils import init_weights, get_padding + +LRELU_SLOPE = 0.1 + +def load_model(model_path, device='cuda'): + config_file = os.path.join(os.path.split(model_path)[0], 'config.json') + with open(config_file) as f: + data = f.read() + + global h + json_config = json.loads(data) + h = AttrDict(json_config) + + generator = Generator(h).to(device) + + cp_dict = torch.load(model_path) + generator.load_state_dict(cp_dict['generator']) + generator.eval() + generator.remove_weight_norm() + del cp_dict + return generator, h + + +class ResBlock1(torch.nn.Module): + def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)): + super(ResBlock1, self).__init__() + self.h = h + self.convs1 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]))) + ]) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))) + ]) + self.convs2.apply(init_weights) + + def forward(self, x): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + xt = c2(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_weight_norm(l) + for l in self.convs2: + remove_weight_norm(l) + + +class ResBlock2(torch.nn.Module): + def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)): + super(ResBlock2, self).__init__() + self.h = h + self.convs = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))) + ]) + self.convs.apply(init_weights) + + def forward(self, x): + for c in self.convs: + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs: + remove_weight_norm(l) + + +class Generator(torch.nn.Module): + def __init__(self, h): + super(Generator, self).__init__() + self.h = h + self.num_kernels = len(h.resblock_kernel_sizes) + self.num_upsamples = len(h.upsample_rates) + self.conv_pre = weight_norm(Conv1d(h.num_mels, h.upsample_initial_channel, 7, 1, padding=3)) + resblock = ResBlock1 if h.resblock == '1' else ResBlock2 + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)): + self.ups.append(weight_norm( + ConvTranspose1d(h.upsample_initial_channel//(2**i), h.upsample_initial_channel//(2**(i+1)), + k, u, padding=(k-u)//2))) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = h.upsample_initial_channel//(2**(i+1)) + for j, (k, d) in enumerate(zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)): + self.resblocks.append(resblock(h, ch, k, d)) + + self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3)) + self.ups.apply(init_weights) + self.conv_post.apply(init_weights) + + def forward(self, x): + x = self.conv_pre(x) + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + x = self.ups[i](x) + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i*self.num_kernels+j](x) + else: + xs += self.resblocks[i*self.num_kernels+j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + print('Removing weight norm...') + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + remove_weight_norm(self.conv_pre) + remove_weight_norm(self.conv_post) +class SineGen(torch.nn.Module): + """ Definition of sine generator + SineGen(samp_rate, harmonic_num = 0, + sine_amp = 0.1, noise_std = 0.003, + voiced_threshold = 0, + flag_for_pulse=False) + samp_rate: sampling rate in Hz + harmonic_num: number of harmonic overtones (default 0) + sine_amp: amplitude of sine-wavefrom (default 0.1) + noise_std: std of Gaussian noise (default 0.003) + voiced_thoreshold: F0 threshold for U/V classification (default 0) + flag_for_pulse: this SinGen is used inside PulseGen (default False) + Note: when flag_for_pulse is True, the first time step of a voiced + segment is always sin(np.pi) or cos(0) + """ + + def __init__(self, samp_rate, harmonic_num=0, + sine_amp=0.1, noise_std=0.003, + voiced_threshold=0, + flag_for_pulse=False): + super(SineGen, self).__init__() + self.sine_amp = sine_amp + self.noise_std = noise_std + self.harmonic_num = harmonic_num + self.dim = self.harmonic_num + 1 + self.sampling_rate = samp_rate + self.voiced_threshold = voiced_threshold + self.flag_for_pulse = flag_for_pulse + + def _f02uv(self, f0): + # generate uv signal + uv = torch.ones_like(f0) + uv = uv * (f0 > self.voiced_threshold) + return uv + + def _f02sine(self, f0_values): + """ f0_values: (batchsize, length, dim) + where dim indicates fundamental tone and overtones + """ + # convert to F0 in rad. The interger part n can be ignored + # because 2 * np.pi * n doesn't affect phase + rad_values = (f0_values / self.sampling_rate) % 1 + + # initial phase noise (no noise for fundamental component) + rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \ + device=f0_values.device) + rand_ini[:, 0] = 0 + rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini + + # instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad) + if not self.flag_for_pulse: + # for normal case + + # To prevent torch.cumsum numerical overflow, + # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1. + # Buffer tmp_over_one_idx indicates the time step to add -1. + # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi + tmp_over_one = torch.cumsum(rad_values, 1) % 1 + tmp_over_one_idx = (tmp_over_one[:, 1:, :] - + tmp_over_one[:, :-1, :]) < 0 + cumsum_shift = torch.zeros_like(rad_values) + cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 + + sines = torch.sin(torch.cumsum(rad_values + cumsum_shift, dim=1) + * 2 * np.pi) + else: + # If necessary, make sure that the first time step of every + # voiced segments is sin(pi) or cos(0) + # This is used for pulse-train generation + + # identify the last time step in unvoiced segments + uv = self._f02uv(f0_values) + uv_1 = torch.roll(uv, shifts=-1, dims=1) + uv_1[:, -1, :] = 1 + u_loc = (uv < 1) * (uv_1 > 0) + + # get the instantanouse phase + tmp_cumsum = torch.cumsum(rad_values, dim=1) + # different batch needs to be processed differently + for idx in range(f0_values.shape[0]): + temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :] + temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :] + # stores the accumulation of i.phase within + # each voiced segments + tmp_cumsum[idx, :, :] = 0 + tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum + + # rad_values - tmp_cumsum: remove the accumulation of i.phase + # within the previous voiced segment. + i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1) + + # get the sines + sines = torch.cos(i_phase * 2 * np.pi) + return sines + + def forward(self, f0): + """ sine_tensor, uv = forward(f0) + input F0: tensor(batchsize=1, length, dim=1) + f0 for unvoiced steps should be 0 + output sine_tensor: tensor(batchsize=1, length, dim) + output uv: tensor(batchsize=1, length, 1) + """ + with torch.no_grad(): + f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, + device=f0.device) + # fundamental component + f0_buf[:, :, 0] = f0[:, :, 0] + for idx in np.arange(self.harmonic_num): + # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic + f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * (idx + 2) + + # generate sine waveforms + sine_waves = self._f02sine(f0_buf) * self.sine_amp + + # generate uv signal + # uv = torch.ones(f0.shape) + # uv = uv * (f0 > self.voiced_threshold) + uv = self._f02uv(f0) + + # noise: for unvoiced should be similar to sine_amp + # std = self.sine_amp/3 -> max value ~ self.sine_amp + # . for voiced regions is self.noise_std + noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3 + noise = noise_amp * torch.randn_like(sine_waves) + + # first: set the unvoiced part to 0 by uv + # then: additive noise + sine_waves = sine_waves * uv + noise + return sine_waves, uv, noise +class SourceModuleHnNSF(torch.nn.Module): + """ SourceModule for hn-nsf + SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0) + sampling_rate: sampling_rate in Hz + harmonic_num: number of harmonic above F0 (default: 0) + sine_amp: amplitude of sine source signal (default: 0.1) + add_noise_std: std of additive Gaussian noise (default: 0.003) + note that amplitude of noise in unvoiced is decided + by sine_amp + voiced_threshold: threhold to set U/V given F0 (default: 0) + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + uv (batchsize, length, 1) + """ + + def __init__(self, sampling_rate, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0): + super(SourceModuleHnNSF, self).__init__() + + self.sine_amp = sine_amp + self.noise_std = add_noise_std + + # to produce sine waveforms + self.l_sin_gen = SineGen(sampling_rate, harmonic_num, + sine_amp, add_noise_std, voiced_threshod) + + # to merge source harmonics into a single excitation + self.l_linear = torch.nn.Linear(harmonic_num + 1, 1) + self.l_tanh = torch.nn.Tanh() + + def forward(self, x): + """ + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + """ + # source for harmonic branch + sine_wavs, uv, _ = self.l_sin_gen(x) + sine_merge = self.l_tanh(self.l_linear(sine_wavs)) + + # source for noise branch, in the same shape as uv + noise = torch.randn_like(uv) * self.sine_amp / 3 + return sine_merge, noise, uv + +class Generator(torch.nn.Module): + def __init__(self, h): + super(Generator, self).__init__() + self.h = h + self.num_kernels = len(h.resblock_kernel_sizes) + self.num_upsamples = len(h.upsample_rates) + self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(h.upsample_rates)) + self.m_source = SourceModuleHnNSF( + sampling_rate=h.sampling_rate, + harmonic_num=8) + self.noise_convs = nn.ModuleList() + self.conv_pre = weight_norm(Conv1d(h.num_mels, h.upsample_initial_channel, 7, 1, padding=3)) + resblock = ResBlock1 if h.resblock == '1' else ResBlock2 + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)): + c_cur = h.upsample_initial_channel // (2 ** (i + 1)) + self.ups.append(weight_norm( + ConvTranspose1d(h.upsample_initial_channel//(2**i), h.upsample_initial_channel//(2**(i+1)), + k, u, padding=(k-u)//2))) + if i + 1 < len(h.upsample_rates):# + stride_f0 = np.prod(h.upsample_rates[i + 1:]) + self.noise_convs.append(Conv1d( + 1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=stride_f0 // 2)) + else: + self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1)) + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = h.upsample_initial_channel//(2**(i+1)) + for j, (k, d) in enumerate(zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)): + self.resblocks.append(resblock(h, ch, k, d)) + + self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3)) + self.ups.apply(init_weights) + self.conv_post.apply(init_weights) + + def forward(self, x,f0): + # print(1,x.shape,f0.shape,f0[:, None].shape) + f0 = self.f0_upsamp(f0[:, None]).transpose(1, 2)#bs,n,t + # print(2,f0.shape) + har_source, noi_source, uv = self.m_source(f0) + har_source = har_source.transpose(1, 2) + x = self.conv_pre(x) + # print(124,x.shape,har_source.shape) + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + # print(3,x.shape) + x = self.ups[i](x) + x_source = self.noise_convs[i](har_source) + # print(4,x_source.shape,har_source.shape,x.shape) + x = x + x_source + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i*self.num_kernels+j](x) + else: + xs += self.resblocks[i*self.num_kernels+j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + print('Removing weight norm...') + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + remove_weight_norm(self.conv_pre) + remove_weight_norm(self.conv_post) + +class DiscriminatorP(torch.nn.Module): + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): + super(DiscriminatorP, self).__init__() + self.period = period + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))), + ]) + self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x): + fmap = [] + + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiPeriodDiscriminator(torch.nn.Module): + def __init__(self, periods=None): + super(MultiPeriodDiscriminator, self).__init__() + self.periods = periods if periods is not None else [2, 3, 5, 7, 11] + self.discriminators = nn.ModuleList() + for period in self.periods: + self.discriminators.append(DiscriminatorP(period)) + + def forward(self, y, y_hat): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + y_d_rs.append(y_d_r) + fmap_rs.append(fmap_r) + y_d_gs.append(y_d_g) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class DiscriminatorS(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(DiscriminatorS, self).__init__() + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv1d(1, 128, 15, 1, padding=7)), + norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)), + norm_f(Conv1d(128, 256, 41, 2, groups=16, padding=20)), + norm_f(Conv1d(256, 512, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(512, 1024, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(1024, 1024, 41, 1, groups=16, padding=20)), + norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), + ]) + self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x): + fmap = [] + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiScaleDiscriminator(torch.nn.Module): + def __init__(self): + super(MultiScaleDiscriminator, self).__init__() + self.discriminators = nn.ModuleList([ + DiscriminatorS(use_spectral_norm=True), + DiscriminatorS(), + DiscriminatorS(), + ]) + self.meanpools = nn.ModuleList([ + AvgPool1d(4, 2, padding=2), + AvgPool1d(4, 2, padding=2) + ]) + + def forward(self, y, y_hat): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + if i != 0: + y = self.meanpools[i-1](y) + y_hat = self.meanpools[i-1](y_hat) + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + y_d_rs.append(y_d_r) + fmap_rs.append(fmap_r) + y_d_gs.append(y_d_g) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +def feature_loss(fmap_r, fmap_g): + loss = 0 + for dr, dg in zip(fmap_r, fmap_g): + for rl, gl in zip(dr, dg): + loss += torch.mean(torch.abs(rl - gl)) + + return loss*2 + + +def discriminator_loss(disc_real_outputs, disc_generated_outputs): + loss = 0 + r_losses = [] + g_losses = [] + for dr, dg in zip(disc_real_outputs, disc_generated_outputs): + r_loss = torch.mean((1-dr)**2) + g_loss = torch.mean(dg**2) + loss += (r_loss + g_loss) + r_losses.append(r_loss.item()) + g_losses.append(g_loss.item()) + + return loss, r_losses, g_losses + + +def generator_loss(disc_outputs): + loss = 0 + gen_losses = [] + for dg in disc_outputs: + l = torch.mean((1-dg)**2) + gen_losses.append(l) + loss += l + + return loss, gen_losses \ No newline at end of file diff --git a/modules/nsf_hifigan/nvSTFT.py b/modules/nsf_hifigan/nvSTFT.py new file mode 100644 index 0000000000000000000000000000000000000000..35635c844ea1ae6258112f0ba92e417e81a22642 --- /dev/null +++ b/modules/nsf_hifigan/nvSTFT.py @@ -0,0 +1,111 @@ +import math +import os +os.environ["LRU_CACHE_CAPACITY"] = "3" +import random +import torch +import torch.utils.data +import numpy as np +import librosa +from librosa.util import normalize +from librosa.filters import mel as librosa_mel_fn +from scipy.io.wavfile import read +import soundfile as sf + +def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False): + sampling_rate = None + try: + data, sampling_rate = sf.read(full_path, always_2d=True)# than soundfile. + except Exception as ex: + print(f"'{full_path}' failed to load.\nException:") + print(ex) + if return_empty_on_exception: + return [], sampling_rate or target_sr or 48000 + else: + raise Exception(ex) + + if len(data.shape) > 1: + data = data[:, 0] + assert len(data) > 2# check duration of audio file is > 2 samples (because otherwise the slice operation was on the wrong dimension) + + if np.issubdtype(data.dtype, np.integer): # if audio data is type int + max_mag = -np.iinfo(data.dtype).min # maximum magnitude = min possible value of intXX + else: # if audio data is type fp32 + max_mag = max(np.amax(data), -np.amin(data)) + max_mag = (2**31)+1 if max_mag > (2**15) else ((2**15)+1 if max_mag > 1.01 else 1.0) # data should be either 16-bit INT, 32-bit INT or [-1 to 1] float32 + + data = torch.FloatTensor(data.astype(np.float32))/max_mag + + if (torch.isinf(data) | torch.isnan(data)).any() and return_empty_on_exception:# resample will crash with inf/NaN inputs. return_empty_on_exception will return empty arr instead of except + return [], sampling_rate or target_sr or 48000 + if target_sr is not None and sampling_rate != target_sr: + data = torch.from_numpy(librosa.core.resample(data.numpy(), orig_sr=sampling_rate, target_sr=target_sr)) + sampling_rate = target_sr + + return data, sampling_rate + +def dynamic_range_compression(x, C=1, clip_val=1e-5): + return np.log(np.clip(x, a_min=clip_val, a_max=None) * C) + +def dynamic_range_decompression(x, C=1): + return np.exp(x) / C + +def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): + return torch.log(torch.clamp(x, min=clip_val) * C) + +def dynamic_range_decompression_torch(x, C=1): + return torch.exp(x) / C + +class STFT(): + def __init__(self, sr=22050, n_mels=80, n_fft=1024, win_size=1024, hop_length=256, fmin=20, fmax=11025, clip_val=1e-5): + self.target_sr = sr + + self.n_mels = n_mels + self.n_fft = n_fft + self.win_size = win_size + self.hop_length = hop_length + self.fmin = fmin + self.fmax = fmax + self.clip_val = clip_val + self.mel_basis = {} + self.hann_window = {} + + def get_mel(self, y, center=False): + sampling_rate = self.target_sr + n_mels = self.n_mels + n_fft = self.n_fft + win_size = self.win_size + hop_length = self.hop_length + fmin = self.fmin + fmax = self.fmax + clip_val = self.clip_val + + if torch.min(y) < -1.: + print('min value is ', torch.min(y)) + if torch.max(y) > 1.: + print('max value is ', torch.max(y)) + + if fmax not in self.mel_basis: + mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax) + self.mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device) + self.hann_window[str(y.device)] = torch.hann_window(self.win_size).to(y.device) + + y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_length)/2), int((n_fft-hop_length)/2)), mode='reflect') + y = y.squeeze(1) + + spec = torch.stft(y, n_fft, hop_length=hop_length, win_length=win_size, window=self.hann_window[str(y.device)], + center=center, pad_mode='reflect', normalized=False, onesided=True) + # print(111,spec) + spec = torch.sqrt(spec.pow(2).sum(-1)+(1e-9)) + # print(222,spec) + spec = torch.matmul(self.mel_basis[str(fmax)+'_'+str(y.device)], spec) + # print(333,spec) + spec = dynamic_range_compression_torch(spec, clip_val=clip_val) + # print(444,spec) + return spec + + def __call__(self, audiopath): + audio, sr = load_wav_to_torch(audiopath, target_sr=self.target_sr) + spect = self.get_mel(audio.unsqueeze(0)).squeeze(0) + return spect + +stft = STFT() \ No newline at end of file diff --git a/modules/nsf_hifigan/utils.py b/modules/nsf_hifigan/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6ad5a37507987bd6e1200cb9241edf97989e3ead --- /dev/null +++ b/modules/nsf_hifigan/utils.py @@ -0,0 +1,67 @@ +import glob +import os +import matplotlib +import torch +from torch.nn.utils import weight_norm +matplotlib.use("Agg") +import matplotlib.pylab as plt + + +def plot_spectrogram(spectrogram): + fig, ax = plt.subplots(figsize=(10, 2)) + im = ax.imshow(spectrogram, aspect="auto", origin="lower", + interpolation='none') + plt.colorbar(im, ax=ax) + + fig.canvas.draw() + plt.close() + + return fig + + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +def apply_weight_norm(m): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + weight_norm(m) + + +def get_padding(kernel_size, dilation=1): + return int((kernel_size*dilation - dilation)/2) + + +def load_checkpoint(filepath, device): + assert os.path.isfile(filepath) + print("Loading '{}'".format(filepath)) + checkpoint_dict = torch.load(filepath, map_location=device) + print("Complete.") + return checkpoint_dict + + +def save_checkpoint(filepath, obj): + print("Saving checkpoint to {}".format(filepath)) + torch.save(obj, filepath) + print("Complete.") + + +def del_old_checkpoints(cp_dir, prefix, n_models=2): + pattern = os.path.join(cp_dir, prefix + '????????') + cp_list = glob.glob(pattern) # get checkpoint paths + cp_list = sorted(cp_list)# sort by iter + if len(cp_list) > n_models: # if more than n_models models are found + for cp in cp_list[:-n_models]:# delete the oldest models other than lastest n_models + open(cp, 'w').close()# empty file contents + os.unlink(cp)# delete file (move to trash when using Colab) + + +def scan_checkpoint(cp_dir, prefix): + pattern = os.path.join(cp_dir, prefix + '????????') + cp_list = glob.glob(pattern) + if len(cp_list) == 0: + return None + return sorted(cp_list)[-1] \ No newline at end of file diff --git a/modules/parallel_wavegan/__init__.py b/modules/parallel_wavegan/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/parallel_wavegan/__pycache__/__init__.cpython-38.pyc b/modules/parallel_wavegan/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a72dc078a6888fa6ecc164fddba4000a1021029 Binary files /dev/null and b/modules/parallel_wavegan/__pycache__/__init__.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/layers/__init__.py b/modules/parallel_wavegan/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e477f51116a3157781b1aefefbaf32fe4d4bd1f0 --- /dev/null +++ b/modules/parallel_wavegan/layers/__init__.py @@ -0,0 +1,5 @@ +from .causal_conv import * # NOQA +from .pqmf import * # NOQA +from .residual_block import * # NOQA +from modules.parallel_wavegan.layers.residual_stack import * # NOQA +from .upsample import * # NOQA diff --git a/modules/parallel_wavegan/layers/__pycache__/__init__.cpython-38.pyc b/modules/parallel_wavegan/layers/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86f5d78d83d5daf5c56a45255dbaa381804cbcc2 Binary files /dev/null and b/modules/parallel_wavegan/layers/__pycache__/__init__.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/layers/__pycache__/causal_conv.cpython-38.pyc b/modules/parallel_wavegan/layers/__pycache__/causal_conv.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6ab116b8671479cd5fa56a9553544a1924a5fb9 Binary files /dev/null and b/modules/parallel_wavegan/layers/__pycache__/causal_conv.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/layers/__pycache__/pqmf.cpython-38.pyc b/modules/parallel_wavegan/layers/__pycache__/pqmf.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..291742627bc50217be5d191aa0b9838c4e961c6b Binary files /dev/null and b/modules/parallel_wavegan/layers/__pycache__/pqmf.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/layers/__pycache__/residual_block.cpython-38.pyc b/modules/parallel_wavegan/layers/__pycache__/residual_block.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d3b43a4af9460e84063fecd2496273512d6428f Binary files /dev/null and b/modules/parallel_wavegan/layers/__pycache__/residual_block.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/layers/__pycache__/residual_stack.cpython-38.pyc b/modules/parallel_wavegan/layers/__pycache__/residual_stack.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f88f24a4aeb61a2ffdbb8c4c17832d5d8cfcdf45 Binary files /dev/null and b/modules/parallel_wavegan/layers/__pycache__/residual_stack.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/layers/__pycache__/upsample.cpython-38.pyc b/modules/parallel_wavegan/layers/__pycache__/upsample.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f7942fd2ecd2b6a54b524cb71217440ebcf21581 Binary files /dev/null and b/modules/parallel_wavegan/layers/__pycache__/upsample.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/layers/causal_conv.py b/modules/parallel_wavegan/layers/causal_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..fca77daf65f234e6fbe355ed148fc8f0ee85038a --- /dev/null +++ b/modules/parallel_wavegan/layers/causal_conv.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Tomoki Hayashi +# MIT License (https://opensource.org/licenses/MIT) + +"""Causal convolusion layer modules.""" + + +import torch + + +class CausalConv1d(torch.nn.Module): + """CausalConv1d module with customized initialization.""" + + def __init__(self, in_channels, out_channels, kernel_size, + dilation=1, bias=True, pad="ConstantPad1d", pad_params={"value": 0.0}): + """Initialize CausalConv1d module.""" + super(CausalConv1d, self).__init__() + self.pad = getattr(torch.nn, pad)((kernel_size - 1) * dilation, **pad_params) + self.conv = torch.nn.Conv1d(in_channels, out_channels, kernel_size, + dilation=dilation, bias=bias) + + def forward(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Input tensor (B, in_channels, T). + + Returns: + Tensor: Output tensor (B, out_channels, T). + + """ + return self.conv(self.pad(x))[:, :, :x.size(2)] + + +class CausalConvTranspose1d(torch.nn.Module): + """CausalConvTranspose1d module with customized initialization.""" + + def __init__(self, in_channels, out_channels, kernel_size, stride, bias=True): + """Initialize CausalConvTranspose1d module.""" + super(CausalConvTranspose1d, self).__init__() + self.deconv = torch.nn.ConvTranspose1d( + in_channels, out_channels, kernel_size, stride, bias=bias) + self.stride = stride + + def forward(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Input tensor (B, in_channels, T_in). + + Returns: + Tensor: Output tensor (B, out_channels, T_out). + + """ + return self.deconv(x)[:, :, :-self.stride] diff --git a/modules/parallel_wavegan/layers/pqmf.py b/modules/parallel_wavegan/layers/pqmf.py new file mode 100644 index 0000000000000000000000000000000000000000..ac21074fd32a370a099fa2facb62cfd3253d7579 --- /dev/null +++ b/modules/parallel_wavegan/layers/pqmf.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Tomoki Hayashi +# MIT License (https://opensource.org/licenses/MIT) + +"""Pseudo QMF modules.""" + +import numpy as np +import torch +import torch.nn.functional as F + +from scipy.signal import kaiser + + +def design_prototype_filter(taps=62, cutoff_ratio=0.15, beta=9.0): + """Design prototype filter for PQMF. + + This method is based on `A Kaiser window approach for the design of prototype + filters of cosine modulated filterbanks`_. + + Args: + taps (int): The number of filter taps. + cutoff_ratio (float): Cut-off frequency ratio. + beta (float): Beta coefficient for kaiser window. + + Returns: + ndarray: Impluse response of prototype filter (taps + 1,). + + .. _`A Kaiser window approach for the design of prototype filters of cosine modulated filterbanks`: + https://ieeexplore.ieee.org/abstract/document/681427 + + """ + # check the arguments are valid + assert taps % 2 == 0, "The number of taps mush be even number." + assert 0.0 < cutoff_ratio < 1.0, "Cutoff ratio must be > 0.0 and < 1.0." + + # make initial filter + omega_c = np.pi * cutoff_ratio + with np.errstate(invalid='ignore'): + h_i = np.sin(omega_c * (np.arange(taps + 1) - 0.5 * taps)) \ + / (np.pi * (np.arange(taps + 1) - 0.5 * taps)) + h_i[taps // 2] = np.cos(0) * cutoff_ratio # fix nan due to indeterminate form + + # apply kaiser window + w = kaiser(taps + 1, beta) + h = h_i * w + + return h + + +class PQMF(torch.nn.Module): + """PQMF module. + + This module is based on `Near-perfect-reconstruction pseudo-QMF banks`_. + + .. _`Near-perfect-reconstruction pseudo-QMF banks`: + https://ieeexplore.ieee.org/document/258122 + + """ + + def __init__(self, subbands=4, taps=62, cutoff_ratio=0.15, beta=9.0): + """Initilize PQMF module. + + Args: + subbands (int): The number of subbands. + taps (int): The number of filter taps. + cutoff_ratio (float): Cut-off frequency ratio. + beta (float): Beta coefficient for kaiser window. + + """ + super(PQMF, self).__init__() + + # define filter coefficient + h_proto = design_prototype_filter(taps, cutoff_ratio, beta) + h_analysis = np.zeros((subbands, len(h_proto))) + h_synthesis = np.zeros((subbands, len(h_proto))) + for k in range(subbands): + h_analysis[k] = 2 * h_proto * np.cos( + (2 * k + 1) * (np.pi / (2 * subbands)) * + (np.arange(taps + 1) - ((taps - 1) / 2)) + + (-1) ** k * np.pi / 4) + h_synthesis[k] = 2 * h_proto * np.cos( + (2 * k + 1) * (np.pi / (2 * subbands)) * + (np.arange(taps + 1) - ((taps - 1) / 2)) - + (-1) ** k * np.pi / 4) + + # convert to tensor + analysis_filter = torch.from_numpy(h_analysis).float().unsqueeze(1) + synthesis_filter = torch.from_numpy(h_synthesis).float().unsqueeze(0) + + # register coefficients as beffer + self.register_buffer("analysis_filter", analysis_filter) + self.register_buffer("synthesis_filter", synthesis_filter) + + # filter for downsampling & upsampling + updown_filter = torch.zeros((subbands, subbands, subbands)).float() + for k in range(subbands): + updown_filter[k, k, 0] = 1.0 + self.register_buffer("updown_filter", updown_filter) + self.subbands = subbands + + # keep padding info + self.pad_fn = torch.nn.ConstantPad1d(taps // 2, 0.0) + + def analysis(self, x): + """Analysis with PQMF. + + Args: + x (Tensor): Input tensor (B, 1, T). + + Returns: + Tensor: Output tensor (B, subbands, T // subbands). + + """ + x = F.conv1d(self.pad_fn(x), self.analysis_filter) + return F.conv1d(x, self.updown_filter, stride=self.subbands) + + def synthesis(self, x): + """Synthesis with PQMF. + + Args: + x (Tensor): Input tensor (B, subbands, T // subbands). + + Returns: + Tensor: Output tensor (B, 1, T). + + """ + x = F.conv_transpose1d(x, self.updown_filter * self.subbands, stride=self.subbands) + return F.conv1d(self.pad_fn(x), self.synthesis_filter) diff --git a/modules/parallel_wavegan/layers/residual_block.py b/modules/parallel_wavegan/layers/residual_block.py new file mode 100644 index 0000000000000000000000000000000000000000..7a267a86c1fa521c2824addf9dda304c43f1ff1f --- /dev/null +++ b/modules/parallel_wavegan/layers/residual_block.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- + +"""Residual block module in WaveNet. + +This code is modified from https://github.com/r9y9/wavenet_vocoder. + +""" + +import math + +import torch +import torch.nn.functional as F + + +class Conv1d(torch.nn.Conv1d): + """Conv1d module with customized initialization.""" + + def __init__(self, *args, **kwargs): + """Initialize Conv1d module.""" + super(Conv1d, self).__init__(*args, **kwargs) + + def reset_parameters(self): + """Reset parameters.""" + torch.nn.init.kaiming_normal_(self.weight, nonlinearity="relu") + if self.bias is not None: + torch.nn.init.constant_(self.bias, 0.0) + + +class Conv1d1x1(Conv1d): + """1x1 Conv1d with customized initialization.""" + + def __init__(self, in_channels, out_channels, bias): + """Initialize 1x1 Conv1d module.""" + super(Conv1d1x1, self).__init__(in_channels, out_channels, + kernel_size=1, padding=0, + dilation=1, bias=bias) + + +class ResidualBlock(torch.nn.Module): + """Residual block module in WaveNet.""" + + def __init__(self, + kernel_size=3, + residual_channels=64, + gate_channels=128, + skip_channels=64, + aux_channels=80, + dropout=0.0, + dilation=1, + bias=True, + use_causal_conv=False + ): + """Initialize ResidualBlock module. + + Args: + kernel_size (int): Kernel size of dilation convolution layer. + residual_channels (int): Number of channels for residual connection. + skip_channels (int): Number of channels for skip connection. + aux_channels (int): Local conditioning channels i.e. auxiliary input dimension. + dropout (float): Dropout probability. + dilation (int): Dilation factor. + bias (bool): Whether to add bias parameter in convolution layers. + use_causal_conv (bool): Whether to use use_causal_conv or non-use_causal_conv convolution. + + """ + super(ResidualBlock, self).__init__() + self.dropout = dropout + # no future time stamps available + if use_causal_conv: + padding = (kernel_size - 1) * dilation + else: + assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." + padding = (kernel_size - 1) // 2 * dilation + self.use_causal_conv = use_causal_conv + + # dilation conv + self.conv = Conv1d(residual_channels, gate_channels, kernel_size, + padding=padding, dilation=dilation, bias=bias) + + # local conditioning + if aux_channels > 0: + self.conv1x1_aux = Conv1d1x1(aux_channels, gate_channels, bias=False) + else: + self.conv1x1_aux = None + + # conv output is split into two groups + gate_out_channels = gate_channels // 2 + self.conv1x1_out = Conv1d1x1(gate_out_channels, residual_channels, bias=bias) + self.conv1x1_skip = Conv1d1x1(gate_out_channels, skip_channels, bias=bias) + + def forward(self, x, c): + """Calculate forward propagation. + + Args: + x (Tensor): Input tensor (B, residual_channels, T). + c (Tensor): Local conditioning auxiliary tensor (B, aux_channels, T). + + Returns: + Tensor: Output tensor for residual connection (B, residual_channels, T). + Tensor: Output tensor for skip connection (B, skip_channels, T). + + """ + residual = x + x = F.dropout(x, p=self.dropout, training=self.training) + x = self.conv(x) + + # remove future time steps if use_causal_conv conv + x = x[:, :, :residual.size(-1)] if self.use_causal_conv else x + + # split into two part for gated activation + splitdim = 1 + xa, xb = x.split(x.size(splitdim) // 2, dim=splitdim) + + # local conditioning + if c is not None: + assert self.conv1x1_aux is not None + c = self.conv1x1_aux(c) + ca, cb = c.split(c.size(splitdim) // 2, dim=splitdim) + xa, xb = xa + ca, xb + cb + + x = torch.tanh(xa) * torch.sigmoid(xb) + + # for skip connection + s = self.conv1x1_skip(x) + + # for residual connection + x = (self.conv1x1_out(x) + residual) * math.sqrt(0.5) + + return x, s diff --git a/modules/parallel_wavegan/layers/residual_stack.py b/modules/parallel_wavegan/layers/residual_stack.py new file mode 100644 index 0000000000000000000000000000000000000000..6e07c8803ad348dd923f6b7c0f7aff14aab9cf78 --- /dev/null +++ b/modules/parallel_wavegan/layers/residual_stack.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Tomoki Hayashi +# MIT License (https://opensource.org/licenses/MIT) + +"""Residual stack module in MelGAN.""" + +import torch + +from . import CausalConv1d + + +class ResidualStack(torch.nn.Module): + """Residual stack module introduced in MelGAN.""" + + def __init__(self, + kernel_size=3, + channels=32, + dilation=1, + bias=True, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + pad="ReflectionPad1d", + pad_params={}, + use_causal_conv=False, + ): + """Initialize ResidualStack module. + + Args: + kernel_size (int): Kernel size of dilation convolution layer. + channels (int): Number of channels of convolution layers. + dilation (int): Dilation factor. + bias (bool): Whether to add bias parameter in convolution layers. + nonlinear_activation (str): Activation function module name. + nonlinear_activation_params (dict): Hyperparameters for activation function. + pad (str): Padding function module name before dilated convolution layer. + pad_params (dict): Hyperparameters for padding function. + use_causal_conv (bool): Whether to use causal convolution. + + """ + super(ResidualStack, self).__init__() + + # defile residual stack part + if not use_causal_conv: + assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." + self.stack = torch.nn.Sequential( + getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), + getattr(torch.nn, pad)((kernel_size - 1) // 2 * dilation, **pad_params), + torch.nn.Conv1d(channels, channels, kernel_size, dilation=dilation, bias=bias), + getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), + torch.nn.Conv1d(channels, channels, 1, bias=bias), + ) + else: + self.stack = torch.nn.Sequential( + getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), + CausalConv1d(channels, channels, kernel_size, dilation=dilation, + bias=bias, pad=pad, pad_params=pad_params), + getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), + torch.nn.Conv1d(channels, channels, 1, bias=bias), + ) + + # defile extra layer for skip connection + self.skip_layer = torch.nn.Conv1d(channels, channels, 1, bias=bias) + + def forward(self, c): + """Calculate forward propagation. + + Args: + c (Tensor): Input tensor (B, channels, T). + + Returns: + Tensor: Output tensor (B, chennels, T). + + """ + return self.stack(c) + self.skip_layer(c) diff --git a/modules/parallel_wavegan/layers/tf_layers.py b/modules/parallel_wavegan/layers/tf_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..c0f46bd755c161cda2ac904fe37f3f3c6357a88d --- /dev/null +++ b/modules/parallel_wavegan/layers/tf_layers.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 MINH ANH (@dathudeptrai) +# MIT License (https://opensource.org/licenses/MIT) + +"""Tensorflow Layer modules complatible with pytorch.""" + +import tensorflow as tf + + +class TFReflectionPad1d(tf.keras.layers.Layer): + """Tensorflow ReflectionPad1d module.""" + + def __init__(self, padding_size): + """Initialize TFReflectionPad1d module. + + Args: + padding_size (int): Padding size. + + """ + super(TFReflectionPad1d, self).__init__() + self.padding_size = padding_size + + @tf.function + def call(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Input tensor (B, T, 1, C). + + Returns: + Tensor: Padded tensor (B, T + 2 * padding_size, 1, C). + + """ + return tf.pad(x, [[0, 0], [self.padding_size, self.padding_size], [0, 0], [0, 0]], "REFLECT") + + +class TFConvTranspose1d(tf.keras.layers.Layer): + """Tensorflow ConvTranspose1d module.""" + + def __init__(self, channels, kernel_size, stride, padding): + """Initialize TFConvTranspose1d( module. + + Args: + channels (int): Number of channels. + kernel_size (int): kernel size. + strides (int): Stride width. + padding (str): Padding type ("same" or "valid"). + + """ + super(TFConvTranspose1d, self).__init__() + self.conv1d_transpose = tf.keras.layers.Conv2DTranspose( + filters=channels, + kernel_size=(kernel_size, 1), + strides=(stride, 1), + padding=padding, + ) + + @tf.function + def call(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Input tensor (B, T, 1, C). + + Returns: + Tensors: Output tensor (B, T', 1, C'). + + """ + x = self.conv1d_transpose(x) + return x + + +class TFResidualStack(tf.keras.layers.Layer): + """Tensorflow ResidualStack module.""" + + def __init__(self, + kernel_size, + channels, + dilation, + bias, + nonlinear_activation, + nonlinear_activation_params, + padding, + ): + """Initialize TFResidualStack module. + + Args: + kernel_size (int): Kernel size. + channles (int): Number of channels. + dilation (int): Dilation ine. + bias (bool): Whether to add bias parameter in convolution layers. + nonlinear_activation (str): Activation function module name. + nonlinear_activation_params (dict): Hyperparameters for activation function. + padding (str): Padding type ("same" or "valid"). + + """ + super(TFResidualStack, self).__init__() + self.block = [ + getattr(tf.keras.layers, nonlinear_activation)(**nonlinear_activation_params), + TFReflectionPad1d(dilation), + tf.keras.layers.Conv2D( + filters=channels, + kernel_size=(kernel_size, 1), + dilation_rate=(dilation, 1), + use_bias=bias, + padding="valid", + ), + getattr(tf.keras.layers, nonlinear_activation)(**nonlinear_activation_params), + tf.keras.layers.Conv2D(filters=channels, kernel_size=1, use_bias=bias) + ] + self.shortcut = tf.keras.layers.Conv2D(filters=channels, kernel_size=1, use_bias=bias) + + @tf.function + def call(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Input tensor (B, T, 1, C). + + Returns: + Tensor: Output tensor (B, T, 1, C). + + """ + _x = tf.identity(x) + for i, layer in enumerate(self.block): + _x = layer(_x) + shortcut = self.shortcut(x) + return shortcut + _x diff --git a/modules/parallel_wavegan/layers/upsample.py b/modules/parallel_wavegan/layers/upsample.py new file mode 100644 index 0000000000000000000000000000000000000000..18c6397c420a81fadc5320e3a48f3249534decd8 --- /dev/null +++ b/modules/parallel_wavegan/layers/upsample.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- + +"""Upsampling module. + +This code is modified from https://github.com/r9y9/wavenet_vocoder. + +""" + +import numpy as np +import torch +import torch.nn.functional as F + +from . import Conv1d + + +class Stretch2d(torch.nn.Module): + """Stretch2d module.""" + + def __init__(self, x_scale, y_scale, mode="nearest"): + """Initialize Stretch2d module. + + Args: + x_scale (int): X scaling factor (Time axis in spectrogram). + y_scale (int): Y scaling factor (Frequency axis in spectrogram). + mode (str): Interpolation mode. + + """ + super(Stretch2d, self).__init__() + self.x_scale = x_scale + self.y_scale = y_scale + self.mode = mode + + def forward(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Input tensor (B, C, F, T). + + Returns: + Tensor: Interpolated tensor (B, C, F * y_scale, T * x_scale), + + """ + return F.interpolate( + x, scale_factor=(self.y_scale, self.x_scale), mode=self.mode) + + +class Conv2d(torch.nn.Conv2d): + """Conv2d module with customized initialization.""" + + def __init__(self, *args, **kwargs): + """Initialize Conv2d module.""" + super(Conv2d, self).__init__(*args, **kwargs) + + def reset_parameters(self): + """Reset parameters.""" + self.weight.data.fill_(1. / np.prod(self.kernel_size)) + if self.bias is not None: + torch.nn.init.constant_(self.bias, 0.0) + + +class UpsampleNetwork(torch.nn.Module): + """Upsampling network module.""" + + def __init__(self, + upsample_scales, + nonlinear_activation=None, + nonlinear_activation_params={}, + interpolate_mode="nearest", + freq_axis_kernel_size=1, + use_causal_conv=False, + ): + """Initialize upsampling network module. + + Args: + upsample_scales (list): List of upsampling scales. + nonlinear_activation (str): Activation function name. + nonlinear_activation_params (dict): Arguments for specified activation function. + interpolate_mode (str): Interpolation mode. + freq_axis_kernel_size (int): Kernel size in the direction of frequency axis. + + """ + super(UpsampleNetwork, self).__init__() + self.use_causal_conv = use_causal_conv + self.up_layers = torch.nn.ModuleList() + for scale in upsample_scales: + # interpolation layer + stretch = Stretch2d(scale, 1, interpolate_mode) + self.up_layers += [stretch] + + # conv layer + assert (freq_axis_kernel_size - 1) % 2 == 0, "Not support even number freq axis kernel size." + freq_axis_padding = (freq_axis_kernel_size - 1) // 2 + kernel_size = (freq_axis_kernel_size, scale * 2 + 1) + if use_causal_conv: + padding = (freq_axis_padding, scale * 2) + else: + padding = (freq_axis_padding, scale) + conv = Conv2d(1, 1, kernel_size=kernel_size, padding=padding, bias=False) + self.up_layers += [conv] + + # nonlinear + if nonlinear_activation is not None: + nonlinear = getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params) + self.up_layers += [nonlinear] + + def forward(self, c): + """Calculate forward propagation. + + Args: + c : Input tensor (B, C, T). + + Returns: + Tensor: Upsampled tensor (B, C, T'), where T' = T * prod(upsample_scales). + + """ + c = c.unsqueeze(1) # (B, 1, C, T) + for f in self.up_layers: + if self.use_causal_conv and isinstance(f, Conv2d): + c = f(c)[..., :c.size(-1)] + else: + c = f(c) + return c.squeeze(1) # (B, C, T') + + +class ConvInUpsampleNetwork(torch.nn.Module): + """Convolution + upsampling network module.""" + + def __init__(self, + upsample_scales, + nonlinear_activation=None, + nonlinear_activation_params={}, + interpolate_mode="nearest", + freq_axis_kernel_size=1, + aux_channels=80, + aux_context_window=0, + use_causal_conv=False + ): + """Initialize convolution + upsampling network module. + + Args: + upsample_scales (list): List of upsampling scales. + nonlinear_activation (str): Activation function name. + nonlinear_activation_params (dict): Arguments for specified activation function. + mode (str): Interpolation mode. + freq_axis_kernel_size (int): Kernel size in the direction of frequency axis. + aux_channels (int): Number of channels of pre-convolutional layer. + aux_context_window (int): Context window size of the pre-convolutional layer. + use_causal_conv (bool): Whether to use causal structure. + + """ + super(ConvInUpsampleNetwork, self).__init__() + self.aux_context_window = aux_context_window + self.use_causal_conv = use_causal_conv and aux_context_window > 0 + # To capture wide-context information in conditional features + kernel_size = aux_context_window + 1 if use_causal_conv else 2 * aux_context_window + 1 + # NOTE(kan-bayashi): Here do not use padding because the input is already padded + self.conv_in = Conv1d(aux_channels, aux_channels, kernel_size=kernel_size, bias=False) + self.upsample = UpsampleNetwork( + upsample_scales=upsample_scales, + nonlinear_activation=nonlinear_activation, + nonlinear_activation_params=nonlinear_activation_params, + interpolate_mode=interpolate_mode, + freq_axis_kernel_size=freq_axis_kernel_size, + use_causal_conv=use_causal_conv, + ) + + def forward(self, c): + """Calculate forward propagation. + + Args: + c : Input tensor (B, C, T'). + + Returns: + Tensor: Upsampled tensor (B, C, T), + where T = (T' - aux_context_window * 2) * prod(upsample_scales). + + Note: + The length of inputs considers the context window size. + + """ + c_ = self.conv_in(c) + c = c_[:, :, :-self.aux_context_window] if self.use_causal_conv else c_ + return self.upsample(c) diff --git a/modules/parallel_wavegan/losses/__init__.py b/modules/parallel_wavegan/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b03080a907cb5cb4b316ceb74866ddbc406b33bf --- /dev/null +++ b/modules/parallel_wavegan/losses/__init__.py @@ -0,0 +1 @@ +from .stft_loss import * # NOQA diff --git a/modules/parallel_wavegan/losses/stft_loss.py b/modules/parallel_wavegan/losses/stft_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..74d2aa21ad30ba094c406366e652067462f49cd2 --- /dev/null +++ b/modules/parallel_wavegan/losses/stft_loss.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Tomoki Hayashi +# MIT License (https://opensource.org/licenses/MIT) + +"""STFT-based Loss modules.""" + +import torch +import torch.nn.functional as F + + +def stft(x, fft_size, hop_size, win_length, window): + """Perform STFT and convert to magnitude spectrogram. + + Args: + x (Tensor): Input signal tensor (B, T). + fft_size (int): FFT size. + hop_size (int): Hop size. + win_length (int): Window length. + window (str): Window function type. + + Returns: + Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1). + + """ + x_stft = torch.stft(x, fft_size, hop_size, win_length, window) + real = x_stft[..., 0] + imag = x_stft[..., 1] + + # NOTE(kan-bayashi): clamp is needed to avoid nan or inf + return torch.sqrt(torch.clamp(real ** 2 + imag ** 2, min=1e-7)).transpose(2, 1) + + +class SpectralConvergengeLoss(torch.nn.Module): + """Spectral convergence loss module.""" + + def __init__(self): + """Initilize spectral convergence loss module.""" + super(SpectralConvergengeLoss, self).__init__() + + def forward(self, x_mag, y_mag): + """Calculate forward propagation. + + Args: + x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins). + y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins). + + Returns: + Tensor: Spectral convergence loss value. + + """ + return torch.norm(y_mag - x_mag, p="fro") / torch.norm(y_mag, p="fro") + + +class LogSTFTMagnitudeLoss(torch.nn.Module): + """Log STFT magnitude loss module.""" + + def __init__(self): + """Initilize los STFT magnitude loss module.""" + super(LogSTFTMagnitudeLoss, self).__init__() + + def forward(self, x_mag, y_mag): + """Calculate forward propagation. + + Args: + x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins). + y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins). + + Returns: + Tensor: Log STFT magnitude loss value. + + """ + return F.l1_loss(torch.log(y_mag), torch.log(x_mag)) + + +class STFTLoss(torch.nn.Module): + """STFT loss module.""" + + def __init__(self, fft_size=1024, shift_size=120, win_length=600, window="hann_window"): + """Initialize STFT loss module.""" + super(STFTLoss, self).__init__() + self.fft_size = fft_size + self.shift_size = shift_size + self.win_length = win_length + self.window = getattr(torch, window)(win_length) + self.spectral_convergenge_loss = SpectralConvergengeLoss() + self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss() + + def forward(self, x, y): + """Calculate forward propagation. + + Args: + x (Tensor): Predicted signal (B, T). + y (Tensor): Groundtruth signal (B, T). + + Returns: + Tensor: Spectral convergence loss value. + Tensor: Log STFT magnitude loss value. + + """ + x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window) + y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window) + sc_loss = self.spectral_convergenge_loss(x_mag, y_mag) + mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag) + + return sc_loss, mag_loss + + +class MultiResolutionSTFTLoss(torch.nn.Module): + """Multi resolution STFT loss module.""" + + def __init__(self, + fft_sizes=[1024, 2048, 512], + hop_sizes=[120, 240, 50], + win_lengths=[600, 1200, 240], + window="hann_window"): + """Initialize Multi resolution STFT loss module. + + Args: + fft_sizes (list): List of FFT sizes. + hop_sizes (list): List of hop sizes. + win_lengths (list): List of window lengths. + window (str): Window function type. + + """ + super(MultiResolutionSTFTLoss, self).__init__() + assert len(fft_sizes) == len(hop_sizes) == len(win_lengths) + self.stft_losses = torch.nn.ModuleList() + for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths): + self.stft_losses += [STFTLoss(fs, ss, wl, window)] + + def forward(self, x, y): + """Calculate forward propagation. + + Args: + x (Tensor): Predicted signal (B, T). + y (Tensor): Groundtruth signal (B, T). + + Returns: + Tensor: Multi resolution spectral convergence loss value. + Tensor: Multi resolution log STFT magnitude loss value. + + """ + sc_loss = 0.0 + mag_loss = 0.0 + for f in self.stft_losses: + sc_l, mag_l = f(x, y) + sc_loss += sc_l + mag_loss += mag_l + sc_loss /= len(self.stft_losses) + mag_loss /= len(self.stft_losses) + + return sc_loss, mag_loss diff --git a/modules/parallel_wavegan/models/__init__.py b/modules/parallel_wavegan/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4803ba6b2a0afc8022e756ae5b3f4c7403c3c1bd --- /dev/null +++ b/modules/parallel_wavegan/models/__init__.py @@ -0,0 +1,2 @@ +from .melgan import * # NOQA +from .parallel_wavegan import * # NOQA diff --git a/modules/parallel_wavegan/models/__pycache__/__init__.cpython-38.pyc b/modules/parallel_wavegan/models/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..79b6711d154a174aaad515b78a793617933b0181 Binary files /dev/null and b/modules/parallel_wavegan/models/__pycache__/__init__.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/models/__pycache__/melgan.cpython-38.pyc b/modules/parallel_wavegan/models/__pycache__/melgan.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b72851f34a348346b89619928b640c57d3a8fe1 Binary files /dev/null and b/modules/parallel_wavegan/models/__pycache__/melgan.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/models/__pycache__/parallel_wavegan.cpython-38.pyc b/modules/parallel_wavegan/models/__pycache__/parallel_wavegan.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c979d6fcb27ebf7fd73becb75f311f55f27851c Binary files /dev/null and b/modules/parallel_wavegan/models/__pycache__/parallel_wavegan.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/models/__pycache__/source.cpython-38.pyc b/modules/parallel_wavegan/models/__pycache__/source.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..684d62eb5adb24bae290bac8ba022da850a287f4 Binary files /dev/null and b/modules/parallel_wavegan/models/__pycache__/source.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/models/melgan.py b/modules/parallel_wavegan/models/melgan.py new file mode 100644 index 0000000000000000000000000000000000000000..e021ae4817a8c1c97338e61b00b230c881836fd8 --- /dev/null +++ b/modules/parallel_wavegan/models/melgan.py @@ -0,0 +1,427 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Tomoki Hayashi +# MIT License (https://opensource.org/licenses/MIT) + +"""MelGAN Modules.""" + +import logging + +import numpy as np +import torch + +from modules.parallel_wavegan.layers import CausalConv1d +from modules.parallel_wavegan.layers import CausalConvTranspose1d +from modules.parallel_wavegan.layers import ResidualStack + + +class MelGANGenerator(torch.nn.Module): + """MelGAN generator module.""" + + def __init__(self, + in_channels=80, + out_channels=1, + kernel_size=7, + channels=512, + bias=True, + upsample_scales=[8, 8, 2, 2], + stack_kernel_size=3, + stacks=3, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + pad="ReflectionPad1d", + pad_params={}, + use_final_nonlinear_activation=True, + use_weight_norm=True, + use_causal_conv=False, + ): + """Initialize MelGANGenerator module. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + kernel_size (int): Kernel size of initial and final conv layer. + channels (int): Initial number of channels for conv layer. + bias (bool): Whether to add bias parameter in convolution layers. + upsample_scales (list): List of upsampling scales. + stack_kernel_size (int): Kernel size of dilated conv layers in residual stack. + stacks (int): Number of stacks in a single residual stack. + nonlinear_activation (str): Activation function module name. + nonlinear_activation_params (dict): Hyperparameters for activation function. + pad (str): Padding function module name before dilated convolution layer. + pad_params (dict): Hyperparameters for padding function. + use_final_nonlinear_activation (torch.nn.Module): Activation function for the final layer. + use_weight_norm (bool): Whether to use weight norm. + If set to true, it will be applied to all of the conv layers. + use_causal_conv (bool): Whether to use causal convolution. + + """ + super(MelGANGenerator, self).__init__() + + # check hyper parameters is valid + assert channels >= np.prod(upsample_scales) + assert channels % (2 ** len(upsample_scales)) == 0 + if not use_causal_conv: + assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." + + # add initial layer + layers = [] + if not use_causal_conv: + layers += [ + getattr(torch.nn, pad)((kernel_size - 1) // 2, **pad_params), + torch.nn.Conv1d(in_channels, channels, kernel_size, bias=bias), + ] + else: + layers += [ + CausalConv1d(in_channels, channels, kernel_size, + bias=bias, pad=pad, pad_params=pad_params), + ] + + for i, upsample_scale in enumerate(upsample_scales): + # add upsampling layer + layers += [getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params)] + if not use_causal_conv: + layers += [ + torch.nn.ConvTranspose1d( + channels // (2 ** i), + channels // (2 ** (i + 1)), + upsample_scale * 2, + stride=upsample_scale, + padding=upsample_scale // 2 + upsample_scale % 2, + output_padding=upsample_scale % 2, + bias=bias, + ) + ] + else: + layers += [ + CausalConvTranspose1d( + channels // (2 ** i), + channels // (2 ** (i + 1)), + upsample_scale * 2, + stride=upsample_scale, + bias=bias, + ) + ] + + # add residual stack + for j in range(stacks): + layers += [ + ResidualStack( + kernel_size=stack_kernel_size, + channels=channels // (2 ** (i + 1)), + dilation=stack_kernel_size ** j, + bias=bias, + nonlinear_activation=nonlinear_activation, + nonlinear_activation_params=nonlinear_activation_params, + pad=pad, + pad_params=pad_params, + use_causal_conv=use_causal_conv, + ) + ] + + # add final layer + layers += [getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params)] + if not use_causal_conv: + layers += [ + getattr(torch.nn, pad)((kernel_size - 1) // 2, **pad_params), + torch.nn.Conv1d(channels // (2 ** (i + 1)), out_channels, kernel_size, bias=bias), + ] + else: + layers += [ + CausalConv1d(channels // (2 ** (i + 1)), out_channels, kernel_size, + bias=bias, pad=pad, pad_params=pad_params), + ] + if use_final_nonlinear_activation: + layers += [torch.nn.Tanh()] + + # define the model as a single function + self.melgan = torch.nn.Sequential(*layers) + + # apply weight norm + if use_weight_norm: + self.apply_weight_norm() + + # reset parameters + self.reset_parameters() + + def forward(self, c): + """Calculate forward propagation. + + Args: + c (Tensor): Input tensor (B, channels, T). + + Returns: + Tensor: Output tensor (B, 1, T ** prod(upsample_scales)). + + """ + return self.melgan(c) + + def remove_weight_norm(self): + """Remove weight normalization module from all of the layers.""" + def _remove_weight_norm(m): + try: + logging.debug(f"Weight norm is removed from {m}.") + torch.nn.utils.remove_weight_norm(m) + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) + + def apply_weight_norm(self): + """Apply weight normalization module from all of the layers.""" + def _apply_weight_norm(m): + if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.ConvTranspose1d): + torch.nn.utils.weight_norm(m) + logging.debug(f"Weight norm is applied to {m}.") + + self.apply(_apply_weight_norm) + + def reset_parameters(self): + """Reset parameters. + + This initialization follows official implementation manner. + https://github.com/descriptinc/melgan-neurips/blob/master/spec2wav/modules.py + + """ + def _reset_parameters(m): + if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.ConvTranspose1d): + m.weight.data.normal_(0.0, 0.02) + logging.debug(f"Reset parameters in {m}.") + + self.apply(_reset_parameters) + + +class MelGANDiscriminator(torch.nn.Module): + """MelGAN discriminator module.""" + + def __init__(self, + in_channels=1, + out_channels=1, + kernel_sizes=[5, 3], + channels=16, + max_downsample_channels=1024, + bias=True, + downsample_scales=[4, 4, 4, 4], + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + pad="ReflectionPad1d", + pad_params={}, + ): + """Initilize MelGAN discriminator module. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + kernel_sizes (list): List of two kernel sizes. The prod will be used for the first conv layer, + and the first and the second kernel sizes will be used for the last two layers. + For example if kernel_sizes = [5, 3], the first layer kernel size will be 5 * 3 = 15, + the last two layers' kernel size will be 5 and 3, respectively. + channels (int): Initial number of channels for conv layer. + max_downsample_channels (int): Maximum number of channels for downsampling layers. + bias (bool): Whether to add bias parameter in convolution layers. + downsample_scales (list): List of downsampling scales. + nonlinear_activation (str): Activation function module name. + nonlinear_activation_params (dict): Hyperparameters for activation function. + pad (str): Padding function module name before dilated convolution layer. + pad_params (dict): Hyperparameters for padding function. + + """ + super(MelGANDiscriminator, self).__init__() + self.layers = torch.nn.ModuleList() + + # check kernel size is valid + assert len(kernel_sizes) == 2 + assert kernel_sizes[0] % 2 == 1 + assert kernel_sizes[1] % 2 == 1 + + # add first layer + self.layers += [ + torch.nn.Sequential( + getattr(torch.nn, pad)((np.prod(kernel_sizes) - 1) // 2, **pad_params), + torch.nn.Conv1d(in_channels, channels, np.prod(kernel_sizes), bias=bias), + getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), + ) + ] + + # add downsample layers + in_chs = channels + for downsample_scale in downsample_scales: + out_chs = min(in_chs * downsample_scale, max_downsample_channels) + self.layers += [ + torch.nn.Sequential( + torch.nn.Conv1d( + in_chs, out_chs, + kernel_size=downsample_scale * 10 + 1, + stride=downsample_scale, + padding=downsample_scale * 5, + groups=in_chs // 4, + bias=bias, + ), + getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), + ) + ] + in_chs = out_chs + + # add final layers + out_chs = min(in_chs * 2, max_downsample_channels) + self.layers += [ + torch.nn.Sequential( + torch.nn.Conv1d( + in_chs, out_chs, kernel_sizes[0], + padding=(kernel_sizes[0] - 1) // 2, + bias=bias, + ), + getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), + ) + ] + self.layers += [ + torch.nn.Conv1d( + out_chs, out_channels, kernel_sizes[1], + padding=(kernel_sizes[1] - 1) // 2, + bias=bias, + ), + ] + + def forward(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Input noise signal (B, 1, T). + + Returns: + List: List of output tensors of each layer. + + """ + outs = [] + for f in self.layers: + x = f(x) + outs += [x] + + return outs + + +class MelGANMultiScaleDiscriminator(torch.nn.Module): + """MelGAN multi-scale discriminator module.""" + + def __init__(self, + in_channels=1, + out_channels=1, + scales=3, + downsample_pooling="AvgPool1d", + # follow the official implementation setting + downsample_pooling_params={ + "kernel_size": 4, + "stride": 2, + "padding": 1, + "count_include_pad": False, + }, + kernel_sizes=[5, 3], + channels=16, + max_downsample_channels=1024, + bias=True, + downsample_scales=[4, 4, 4, 4], + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + pad="ReflectionPad1d", + pad_params={}, + use_weight_norm=True, + ): + """Initilize MelGAN multi-scale discriminator module. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + downsample_pooling (str): Pooling module name for downsampling of the inputs. + downsample_pooling_params (dict): Parameters for the above pooling module. + kernel_sizes (list): List of two kernel sizes. The sum will be used for the first conv layer, + and the first and the second kernel sizes will be used for the last two layers. + channels (int): Initial number of channels for conv layer. + max_downsample_channels (int): Maximum number of channels for downsampling layers. + bias (bool): Whether to add bias parameter in convolution layers. + downsample_scales (list): List of downsampling scales. + nonlinear_activation (str): Activation function module name. + nonlinear_activation_params (dict): Hyperparameters for activation function. + pad (str): Padding function module name before dilated convolution layer. + pad_params (dict): Hyperparameters for padding function. + use_causal_conv (bool): Whether to use causal convolution. + + """ + super(MelGANMultiScaleDiscriminator, self).__init__() + self.discriminators = torch.nn.ModuleList() + + # add discriminators + for _ in range(scales): + self.discriminators += [ + MelGANDiscriminator( + in_channels=in_channels, + out_channels=out_channels, + kernel_sizes=kernel_sizes, + channels=channels, + max_downsample_channels=max_downsample_channels, + bias=bias, + downsample_scales=downsample_scales, + nonlinear_activation=nonlinear_activation, + nonlinear_activation_params=nonlinear_activation_params, + pad=pad, + pad_params=pad_params, + ) + ] + self.pooling = getattr(torch.nn, downsample_pooling)(**downsample_pooling_params) + + # apply weight norm + if use_weight_norm: + self.apply_weight_norm() + + # reset parameters + self.reset_parameters() + + def forward(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Input noise signal (B, 1, T). + + Returns: + List: List of list of each discriminator outputs, which consists of each layer output tensors. + + """ + outs = [] + for f in self.discriminators: + outs += [f(x)] + x = self.pooling(x) + + return outs + + def remove_weight_norm(self): + """Remove weight normalization module from all of the layers.""" + def _remove_weight_norm(m): + try: + logging.debug(f"Weight norm is removed from {m}.") + torch.nn.utils.remove_weight_norm(m) + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) + + def apply_weight_norm(self): + """Apply weight normalization module from all of the layers.""" + def _apply_weight_norm(m): + if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.ConvTranspose1d): + torch.nn.utils.weight_norm(m) + logging.debug(f"Weight norm is applied to {m}.") + + self.apply(_apply_weight_norm) + + def reset_parameters(self): + """Reset parameters. + + This initialization follows official implementation manner. + https://github.com/descriptinc/melgan-neurips/blob/master/spec2wav/modules.py + + """ + def _reset_parameters(m): + if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.ConvTranspose1d): + m.weight.data.normal_(0.0, 0.02) + logging.debug(f"Reset parameters in {m}.") + + self.apply(_reset_parameters) diff --git a/modules/parallel_wavegan/models/parallel_wavegan.py b/modules/parallel_wavegan/models/parallel_wavegan.py new file mode 100644 index 0000000000000000000000000000000000000000..c63b59f67aa48342179415c1d1beac68574a5498 --- /dev/null +++ b/modules/parallel_wavegan/models/parallel_wavegan.py @@ -0,0 +1,434 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Tomoki Hayashi +# MIT License (https://opensource.org/licenses/MIT) + +"""Parallel WaveGAN Modules.""" + +import logging +import math + +import torch +from torch import nn + +from modules.parallel_wavegan.layers import Conv1d +from modules.parallel_wavegan.layers import Conv1d1x1 +from modules.parallel_wavegan.layers import ResidualBlock +from modules.parallel_wavegan.layers import upsample +from modules.parallel_wavegan import models + + +class ParallelWaveGANGenerator(torch.nn.Module): + """Parallel WaveGAN Generator module.""" + + def __init__(self, + in_channels=1, + out_channels=1, + kernel_size=3, + layers=30, + stacks=3, + residual_channels=64, + gate_channels=128, + skip_channels=64, + aux_channels=80, + aux_context_window=2, + dropout=0.0, + bias=True, + use_weight_norm=True, + use_causal_conv=False, + upsample_conditional_features=True, + upsample_net="ConvInUpsampleNetwork", + upsample_params={"upsample_scales": [4, 4, 4, 4]}, + use_pitch_embed=False, + ): + """Initialize Parallel WaveGAN Generator module. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + kernel_size (int): Kernel size of dilated convolution. + layers (int): Number of residual block layers. + stacks (int): Number of stacks i.e., dilation cycles. + residual_channels (int): Number of channels in residual conv. + gate_channels (int): Number of channels in gated conv. + skip_channels (int): Number of channels in skip conv. + aux_channels (int): Number of channels for auxiliary feature conv. + aux_context_window (int): Context window size for auxiliary feature. + dropout (float): Dropout rate. 0.0 means no dropout applied. + bias (bool): Whether to use bias parameter in conv layer. + use_weight_norm (bool): Whether to use weight norm. + If set to true, it will be applied to all of the conv layers. + use_causal_conv (bool): Whether to use causal structure. + upsample_conditional_features (bool): Whether to use upsampling network. + upsample_net (str): Upsampling network architecture. + upsample_params (dict): Upsampling network parameters. + + """ + super(ParallelWaveGANGenerator, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.aux_channels = aux_channels + self.layers = layers + self.stacks = stacks + self.kernel_size = kernel_size + + # check the number of layers and stacks + assert layers % stacks == 0 + layers_per_stack = layers // stacks + + # define first convolution + self.first_conv = Conv1d1x1(in_channels, residual_channels, bias=True) + + # define conv + upsampling network + if upsample_conditional_features: + upsample_params.update({ + "use_causal_conv": use_causal_conv, + }) + if upsample_net == "MelGANGenerator": + assert aux_context_window == 0 + upsample_params.update({ + "use_weight_norm": False, # not to apply twice + "use_final_nonlinear_activation": False, + }) + self.upsample_net = getattr(models, upsample_net)(**upsample_params) + else: + if upsample_net == "ConvInUpsampleNetwork": + upsample_params.update({ + "aux_channels": aux_channels, + "aux_context_window": aux_context_window, + }) + self.upsample_net = getattr(upsample, upsample_net)(**upsample_params) + else: + self.upsample_net = None + + # define residual blocks + self.conv_layers = torch.nn.ModuleList() + for layer in range(layers): + dilation = 2 ** (layer % layers_per_stack) + conv = ResidualBlock( + kernel_size=kernel_size, + residual_channels=residual_channels, + gate_channels=gate_channels, + skip_channels=skip_channels, + aux_channels=aux_channels, + dilation=dilation, + dropout=dropout, + bias=bias, + use_causal_conv=use_causal_conv, + ) + self.conv_layers += [conv] + + # define output layers + self.last_conv_layers = torch.nn.ModuleList([ + torch.nn.ReLU(inplace=True), + Conv1d1x1(skip_channels, skip_channels, bias=True), + torch.nn.ReLU(inplace=True), + Conv1d1x1(skip_channels, out_channels, bias=True), + ]) + + self.use_pitch_embed = use_pitch_embed + if use_pitch_embed: + self.pitch_embed = nn.Embedding(300, aux_channels, 0) + self.c_proj = nn.Linear(2 * aux_channels, aux_channels) + + # apply weight norm + if use_weight_norm: + self.apply_weight_norm() + + def forward(self, x, c=None, pitch=None, **kwargs): + """Calculate forward propagation. + + Args: + x (Tensor): Input noise signal (B, C_in, T). + c (Tensor): Local conditioning auxiliary features (B, C ,T'). + pitch (Tensor): Local conditioning pitch (B, T'). + + Returns: + Tensor: Output tensor (B, C_out, T) + + """ + # perform upsampling + if c is not None and self.upsample_net is not None: + if self.use_pitch_embed: + p = self.pitch_embed(pitch) + c = self.c_proj(torch.cat([c.transpose(1, 2), p], -1)).transpose(1, 2) + c = self.upsample_net(c) + assert c.size(-1) == x.size(-1), (c.size(-1), x.size(-1)) + + # encode to hidden representation + x = self.first_conv(x) + skips = 0 + for f in self.conv_layers: + x, h = f(x, c) + skips += h + skips *= math.sqrt(1.0 / len(self.conv_layers)) + + # apply final layers + x = skips + for f in self.last_conv_layers: + x = f(x) + + return x + + def remove_weight_norm(self): + """Remove weight normalization module from all of the layers.""" + def _remove_weight_norm(m): + try: + logging.debug(f"Weight norm is removed from {m}.") + torch.nn.utils.remove_weight_norm(m) + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) + + def apply_weight_norm(self): + """Apply weight normalization module from all of the layers.""" + def _apply_weight_norm(m): + if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Conv2d): + torch.nn.utils.weight_norm(m) + logging.debug(f"Weight norm is applied to {m}.") + + self.apply(_apply_weight_norm) + + @staticmethod + def _get_receptive_field_size(layers, stacks, kernel_size, + dilation=lambda x: 2 ** x): + assert layers % stacks == 0 + layers_per_cycle = layers // stacks + dilations = [dilation(i % layers_per_cycle) for i in range(layers)] + return (kernel_size - 1) * sum(dilations) + 1 + + @property + def receptive_field_size(self): + """Return receptive field size.""" + return self._get_receptive_field_size(self.layers, self.stacks, self.kernel_size) + + +class ParallelWaveGANDiscriminator(torch.nn.Module): + """Parallel WaveGAN Discriminator module.""" + + def __init__(self, + in_channels=1, + out_channels=1, + kernel_size=3, + layers=10, + conv_channels=64, + dilation_factor=1, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + bias=True, + use_weight_norm=True, + ): + """Initialize Parallel WaveGAN Discriminator module. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + kernel_size (int): Number of output channels. + layers (int): Number of conv layers. + conv_channels (int): Number of chnn layers. + dilation_factor (int): Dilation factor. For example, if dilation_factor = 2, + the dilation will be 2, 4, 8, ..., and so on. + nonlinear_activation (str): Nonlinear function after each conv. + nonlinear_activation_params (dict): Nonlinear function parameters + bias (bool): Whether to use bias parameter in conv. + use_weight_norm (bool) Whether to use weight norm. + If set to true, it will be applied to all of the conv layers. + + """ + super(ParallelWaveGANDiscriminator, self).__init__() + assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." + assert dilation_factor > 0, "Dilation factor must be > 0." + self.conv_layers = torch.nn.ModuleList() + conv_in_channels = in_channels + for i in range(layers - 1): + if i == 0: + dilation = 1 + else: + dilation = i if dilation_factor == 1 else dilation_factor ** i + conv_in_channels = conv_channels + padding = (kernel_size - 1) // 2 * dilation + conv_layer = [ + Conv1d(conv_in_channels, conv_channels, + kernel_size=kernel_size, padding=padding, + dilation=dilation, bias=bias), + getattr(torch.nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params) + ] + self.conv_layers += conv_layer + padding = (kernel_size - 1) // 2 + last_conv_layer = Conv1d( + conv_in_channels, out_channels, + kernel_size=kernel_size, padding=padding, bias=bias) + self.conv_layers += [last_conv_layer] + + # apply weight norm + if use_weight_norm: + self.apply_weight_norm() + + def forward(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Input noise signal (B, 1, T). + + Returns: + Tensor: Output tensor (B, 1, T) + + """ + for f in self.conv_layers: + x = f(x) + return x + + def apply_weight_norm(self): + """Apply weight normalization module from all of the layers.""" + def _apply_weight_norm(m): + if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Conv2d): + torch.nn.utils.weight_norm(m) + logging.debug(f"Weight norm is applied to {m}.") + + self.apply(_apply_weight_norm) + + def remove_weight_norm(self): + """Remove weight normalization module from all of the layers.""" + def _remove_weight_norm(m): + try: + logging.debug(f"Weight norm is removed from {m}.") + torch.nn.utils.remove_weight_norm(m) + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) + + +class ResidualParallelWaveGANDiscriminator(torch.nn.Module): + """Parallel WaveGAN Discriminator module.""" + + def __init__(self, + in_channels=1, + out_channels=1, + kernel_size=3, + layers=30, + stacks=3, + residual_channels=64, + gate_channels=128, + skip_channels=64, + dropout=0.0, + bias=True, + use_weight_norm=True, + use_causal_conv=False, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + ): + """Initialize Parallel WaveGAN Discriminator module. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + kernel_size (int): Kernel size of dilated convolution. + layers (int): Number of residual block layers. + stacks (int): Number of stacks i.e., dilation cycles. + residual_channels (int): Number of channels in residual conv. + gate_channels (int): Number of channels in gated conv. + skip_channels (int): Number of channels in skip conv. + dropout (float): Dropout rate. 0.0 means no dropout applied. + bias (bool): Whether to use bias parameter in conv. + use_weight_norm (bool): Whether to use weight norm. + If set to true, it will be applied to all of the conv layers. + use_causal_conv (bool): Whether to use causal structure. + nonlinear_activation_params (dict): Nonlinear function parameters + + """ + super(ResidualParallelWaveGANDiscriminator, self).__init__() + assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." + + self.in_channels = in_channels + self.out_channels = out_channels + self.layers = layers + self.stacks = stacks + self.kernel_size = kernel_size + + # check the number of layers and stacks + assert layers % stacks == 0 + layers_per_stack = layers // stacks + + # define first convolution + self.first_conv = torch.nn.Sequential( + Conv1d1x1(in_channels, residual_channels, bias=True), + getattr(torch.nn, nonlinear_activation)( + inplace=True, **nonlinear_activation_params), + ) + + # define residual blocks + self.conv_layers = torch.nn.ModuleList() + for layer in range(layers): + dilation = 2 ** (layer % layers_per_stack) + conv = ResidualBlock( + kernel_size=kernel_size, + residual_channels=residual_channels, + gate_channels=gate_channels, + skip_channels=skip_channels, + aux_channels=-1, + dilation=dilation, + dropout=dropout, + bias=bias, + use_causal_conv=use_causal_conv, + ) + self.conv_layers += [conv] + + # define output layers + self.last_conv_layers = torch.nn.ModuleList([ + getattr(torch.nn, nonlinear_activation)( + inplace=True, **nonlinear_activation_params), + Conv1d1x1(skip_channels, skip_channels, bias=True), + getattr(torch.nn, nonlinear_activation)( + inplace=True, **nonlinear_activation_params), + Conv1d1x1(skip_channels, out_channels, bias=True), + ]) + + # apply weight norm + if use_weight_norm: + self.apply_weight_norm() + + def forward(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Input noise signal (B, 1, T). + + Returns: + Tensor: Output tensor (B, 1, T) + + """ + x = self.first_conv(x) + + skips = 0 + for f in self.conv_layers: + x, h = f(x, None) + skips += h + skips *= math.sqrt(1.0 / len(self.conv_layers)) + + # apply final layers + x = skips + for f in self.last_conv_layers: + x = f(x) + return x + + def apply_weight_norm(self): + """Apply weight normalization module from all of the layers.""" + def _apply_weight_norm(m): + if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Conv2d): + torch.nn.utils.weight_norm(m) + logging.debug(f"Weight norm is applied to {m}.") + + self.apply(_apply_weight_norm) + + def remove_weight_norm(self): + """Remove weight normalization module from all of the layers.""" + def _remove_weight_norm(m): + try: + logging.debug(f"Weight norm is removed from {m}.") + torch.nn.utils.remove_weight_norm(m) + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) diff --git a/modules/parallel_wavegan/models/source.py b/modules/parallel_wavegan/models/source.py new file mode 100644 index 0000000000000000000000000000000000000000..f2a006e53c0e2194036fd08ea9d6ed4d9a10d6cf --- /dev/null +++ b/modules/parallel_wavegan/models/source.py @@ -0,0 +1,538 @@ +import torch +import numpy as np +import sys +import torch.nn.functional as torch_nn_func + + +class SineGen(torch.nn.Module): + """ Definition of sine generator + SineGen(samp_rate, harmonic_num = 0, + sine_amp = 0.1, noise_std = 0.003, + voiced_threshold = 0, + flag_for_pulse=False) + + samp_rate: sampling rate in Hz + harmonic_num: number of harmonic overtones (default 0) + sine_amp: amplitude of sine-wavefrom (default 0.1) + noise_std: std of Gaussian noise (default 0.003) + voiced_thoreshold: F0 threshold for U/V classification (default 0) + flag_for_pulse: this SinGen is used inside PulseGen (default False) + + Note: when flag_for_pulse is True, the first time step of a voiced + segment is always sin(np.pi) or cos(0) + """ + + def __init__(self, samp_rate, harmonic_num=0, + sine_amp=0.1, noise_std=0.003, + voiced_threshold=0, + flag_for_pulse=False): + super(SineGen, self).__init__() + self.sine_amp = sine_amp + self.noise_std = noise_std + self.harmonic_num = harmonic_num + self.dim = self.harmonic_num + 1 + self.sampling_rate = samp_rate + self.voiced_threshold = voiced_threshold + self.flag_for_pulse = flag_for_pulse + + def _f02uv(self, f0): + # generate uv signal + uv = torch.ones_like(f0) + uv = uv * (f0 > self.voiced_threshold) + return uv + + def _f02sine(self, f0_values): + """ f0_values: (batchsize, length, dim) + where dim indicates fundamental tone and overtones + """ + # convert to F0 in rad. The interger part n can be ignored + # because 2 * np.pi * n doesn't affect phase + rad_values = (f0_values / self.sampling_rate) % 1 + + # initial phase noise (no noise for fundamental component) + rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \ + device=f0_values.device) + rand_ini[:, 0] = 0 + rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini + + # instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad) + if not self.flag_for_pulse: + # for normal case + + # To prevent torch.cumsum numerical overflow, + # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1. + # Buffer tmp_over_one_idx indicates the time step to add -1. + # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi + tmp_over_one = torch.cumsum(rad_values, 1) % 1 + tmp_over_one_idx = (tmp_over_one[:, 1:, :] - + tmp_over_one[:, :-1, :]) < 0 + cumsum_shift = torch.zeros_like(rad_values) + cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 + + sines = torch.sin(torch.cumsum(rad_values + cumsum_shift, dim=1) + * 2 * np.pi) + else: + # If necessary, make sure that the first time step of every + # voiced segments is sin(pi) or cos(0) + # This is used for pulse-train generation + + # identify the last time step in unvoiced segments + uv = self._f02uv(f0_values) + uv_1 = torch.roll(uv, shifts=-1, dims=1) + uv_1[:, -1, :] = 1 + u_loc = (uv < 1) * (uv_1 > 0) + + # get the instantanouse phase + tmp_cumsum = torch.cumsum(rad_values, dim=1) + # different batch needs to be processed differently + for idx in range(f0_values.shape[0]): + temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :] + temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :] + # stores the accumulation of i.phase within + # each voiced segments + tmp_cumsum[idx, :, :] = 0 + tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum + + # rad_values - tmp_cumsum: remove the accumulation of i.phase + # within the previous voiced segment. + i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1) + + # get the sines + sines = torch.cos(i_phase * 2 * np.pi) + return sines + + def forward(self, f0): + """ sine_tensor, uv = forward(f0) + input F0: tensor(batchsize=1, length, dim=1) + f0 for unvoiced steps should be 0 + output sine_tensor: tensor(batchsize=1, length, dim) + output uv: tensor(batchsize=1, length, 1) + """ + with torch.no_grad(): + f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, + device=f0.device) + # fundamental component + f0_buf[:, :, 0] = f0[:, :, 0] + for idx in np.arange(self.harmonic_num): + # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic + f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * (idx + 2) + + # generate sine waveforms + sine_waves = self._f02sine(f0_buf) * self.sine_amp + + # generate uv signal + # uv = torch.ones(f0.shape) + # uv = uv * (f0 > self.voiced_threshold) + uv = self._f02uv(f0) + + # noise: for unvoiced should be similar to sine_amp + # std = self.sine_amp/3 -> max value ~ self.sine_amp + # . for voiced regions is self.noise_std + noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3 + noise = noise_amp * torch.randn_like(sine_waves) + + # first: set the unvoiced part to 0 by uv + # then: additive noise + sine_waves = sine_waves * uv + noise + return sine_waves, uv, noise + + +class PulseGen(torch.nn.Module): + """ Definition of Pulse train generator + + There are many ways to implement pulse generator. + Here, PulseGen is based on SinGen. For a perfect + """ + def __init__(self, samp_rate, pulse_amp = 0.1, + noise_std = 0.003, voiced_threshold = 0): + super(PulseGen, self).__init__() + self.pulse_amp = pulse_amp + self.sampling_rate = samp_rate + self.voiced_threshold = voiced_threshold + self.noise_std = noise_std + self.l_sinegen = SineGen(self.sampling_rate, harmonic_num=0, \ + sine_amp=self.pulse_amp, noise_std=0, \ + voiced_threshold=self.voiced_threshold, \ + flag_for_pulse=True) + + def forward(self, f0): + """ Pulse train generator + pulse_train, uv = forward(f0) + input F0: tensor(batchsize=1, length, dim=1) + f0 for unvoiced steps should be 0 + output pulse_train: tensor(batchsize=1, length, dim) + output uv: tensor(batchsize=1, length, 1) + + Note: self.l_sine doesn't make sure that the initial phase of + a voiced segment is np.pi, the first pulse in a voiced segment + may not be at the first time step within a voiced segment + """ + with torch.no_grad(): + sine_wav, uv, noise = self.l_sinegen(f0) + + # sine without additive noise + pure_sine = sine_wav - noise + + # step t corresponds to a pulse if + # sine[t] > sine[t+1] & sine[t] > sine[t-1] + # & sine[t-1], sine[t+1], and sine[t] are voiced + # or + # sine[t] is voiced, sine[t-1] is unvoiced + # we use torch.roll to simulate sine[t+1] and sine[t-1] + sine_1 = torch.roll(pure_sine, shifts=1, dims=1) + uv_1 = torch.roll(uv, shifts=1, dims=1) + uv_1[:, 0, :] = 0 + sine_2 = torch.roll(pure_sine, shifts=-1, dims=1) + uv_2 = torch.roll(uv, shifts=-1, dims=1) + uv_2[:, -1, :] = 0 + + loc = (pure_sine > sine_1) * (pure_sine > sine_2) \ + * (uv_1 > 0) * (uv_2 > 0) * (uv > 0) \ + + (uv_1 < 1) * (uv > 0) + + # pulse train without noise + pulse_train = pure_sine * loc + + # additive noise to pulse train + # note that noise from sinegen is zero in voiced regions + pulse_noise = torch.randn_like(pure_sine) * self.noise_std + + # with additive noise on pulse, and unvoiced regions + pulse_train += pulse_noise * loc + pulse_noise * (1 - uv) + return pulse_train, sine_wav, uv, pulse_noise + + +class SignalsConv1d(torch.nn.Module): + """ Filtering input signal with time invariant filter + Note: FIRFilter conducted filtering given fixed FIR weight + SignalsConv1d convolves two signals + Note: this is based on torch.nn.functional.conv1d + + """ + + def __init__(self): + super(SignalsConv1d, self).__init__() + + def forward(self, signal, system_ir): + """ output = forward(signal, system_ir) + + signal: (batchsize, length1, dim) + system_ir: (length2, dim) + + output: (batchsize, length1, dim) + """ + if signal.shape[-1] != system_ir.shape[-1]: + print("Error: SignalsConv1d expects shape:") + print("signal (batchsize, length1, dim)") + print("system_id (batchsize, length2, dim)") + print("But received signal: {:s}".format(str(signal.shape))) + print(" system_ir: {:s}".format(str(system_ir.shape))) + sys.exit(1) + padding_length = system_ir.shape[0] - 1 + groups = signal.shape[-1] + + # pad signal on the left + signal_pad = torch_nn_func.pad(signal.permute(0, 2, 1), \ + (padding_length, 0)) + # prepare system impulse response as (dim, 1, length2) + # also flip the impulse response + ir = torch.flip(system_ir.unsqueeze(1).permute(2, 1, 0), \ + dims=[2]) + # convolute + output = torch_nn_func.conv1d(signal_pad, ir, groups=groups) + return output.permute(0, 2, 1) + + +class CyclicNoiseGen_v1(torch.nn.Module): + """ CyclicnoiseGen_v1 + Cyclic noise with a single parameter of beta. + Pytorch v1 implementation assumes f_t is also fixed + """ + + def __init__(self, samp_rate, + noise_std=0.003, voiced_threshold=0): + super(CyclicNoiseGen_v1, self).__init__() + self.samp_rate = samp_rate + self.noise_std = noise_std + self.voiced_threshold = voiced_threshold + + self.l_pulse = PulseGen(samp_rate, pulse_amp=1.0, + noise_std=noise_std, + voiced_threshold=voiced_threshold) + self.l_conv = SignalsConv1d() + + def noise_decay(self, beta, f0mean): + """ decayed_noise = noise_decay(beta, f0mean) + decayed_noise = n[t]exp(-t * f_mean / beta / samp_rate) + + beta: (dim=1) or (batchsize=1, 1, dim=1) + f0mean (batchsize=1, 1, dim=1) + + decayed_noise (batchsize=1, length, dim=1) + """ + with torch.no_grad(): + # exp(-1.0 n / T) < 0.01 => n > -log(0.01)*T = 4.60*T + # truncate the noise when decayed by -40 dB + length = 4.6 * self.samp_rate / f0mean + length = length.int() + time_idx = torch.arange(0, length, device=beta.device) + time_idx = time_idx.unsqueeze(0).unsqueeze(2) + time_idx = time_idx.repeat(beta.shape[0], 1, beta.shape[2]) + + noise = torch.randn(time_idx.shape, device=beta.device) + + # due to Pytorch implementation, use f0_mean as the f0 factor + decay = torch.exp(-time_idx * f0mean / beta / self.samp_rate) + return noise * self.noise_std * decay + + def forward(self, f0s, beta): + """ Producde cyclic-noise + """ + # pulse train + pulse_train, sine_wav, uv, noise = self.l_pulse(f0s) + pure_pulse = pulse_train - noise + + # decayed_noise (length, dim=1) + if (uv < 1).all(): + # all unvoiced + cyc_noise = torch.zeros_like(sine_wav) + else: + f0mean = f0s[uv > 0].mean() + + decayed_noise = self.noise_decay(beta, f0mean)[0, :, :] + # convolute + cyc_noise = self.l_conv(pure_pulse, decayed_noise) + + # add noise in invoiced segments + cyc_noise = cyc_noise + noise * (1.0 - uv) + return cyc_noise, pulse_train, sine_wav, uv, noise + + +class SineGen(torch.nn.Module): + """ Definition of sine generator + SineGen(samp_rate, harmonic_num = 0, + sine_amp = 0.1, noise_std = 0.003, + voiced_threshold = 0, + flag_for_pulse=False) + + samp_rate: sampling rate in Hz + harmonic_num: number of harmonic overtones (default 0) + sine_amp: amplitude of sine-wavefrom (default 0.1) + noise_std: std of Gaussian noise (default 0.003) + voiced_thoreshold: F0 threshold for U/V classification (default 0) + flag_for_pulse: this SinGen is used inside PulseGen (default False) + + Note: when flag_for_pulse is True, the first time step of a voiced + segment is always sin(np.pi) or cos(0) + """ + + def __init__(self, samp_rate, harmonic_num=0, + sine_amp=0.1, noise_std=0.003, + voiced_threshold=0, + flag_for_pulse=False): + super(SineGen, self).__init__() + self.sine_amp = sine_amp + self.noise_std = noise_std + self.harmonic_num = harmonic_num + self.dim = self.harmonic_num + 1 + self.sampling_rate = samp_rate + self.voiced_threshold = voiced_threshold + self.flag_for_pulse = flag_for_pulse + + def _f02uv(self, f0): + # generate uv signal + uv = torch.ones_like(f0) + uv = uv * (f0 > self.voiced_threshold) + return uv + + def _f02sine(self, f0_values): + """ f0_values: (batchsize, length, dim) + where dim indicates fundamental tone and overtones + """ + # convert to F0 in rad. The interger part n can be ignored + # because 2 * np.pi * n doesn't affect phase + rad_values = (f0_values / self.sampling_rate) % 1 + + # initial phase noise (no noise for fundamental component) + rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \ + device=f0_values.device) + rand_ini[:, 0] = 0 + rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini + + # instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad) + if not self.flag_for_pulse: + # for normal case + + # To prevent torch.cumsum numerical overflow, + # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1. + # Buffer tmp_over_one_idx indicates the time step to add -1. + # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi + tmp_over_one = torch.cumsum(rad_values, 1) % 1 + tmp_over_one_idx = (tmp_over_one[:, 1:, :] - + tmp_over_one[:, :-1, :]) < 0 + cumsum_shift = torch.zeros_like(rad_values) + cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 + + sines = torch.sin(torch.cumsum(rad_values + cumsum_shift, dim=1) + * 2 * np.pi) + else: + # If necessary, make sure that the first time step of every + # voiced segments is sin(pi) or cos(0) + # This is used for pulse-train generation + + # identify the last time step in unvoiced segments + uv = self._f02uv(f0_values) + uv_1 = torch.roll(uv, shifts=-1, dims=1) + uv_1[:, -1, :] = 1 + u_loc = (uv < 1) * (uv_1 > 0) + + # get the instantanouse phase + tmp_cumsum = torch.cumsum(rad_values, dim=1) + # different batch needs to be processed differently + for idx in range(f0_values.shape[0]): + temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :] + temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :] + # stores the accumulation of i.phase within + # each voiced segments + tmp_cumsum[idx, :, :] = 0 + tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum + + # rad_values - tmp_cumsum: remove the accumulation of i.phase + # within the previous voiced segment. + i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1) + + # get the sines + sines = torch.cos(i_phase * 2 * np.pi) + return sines + + def forward(self, f0): + """ sine_tensor, uv = forward(f0) + input F0: tensor(batchsize=1, length, dim=1) + f0 for unvoiced steps should be 0 + output sine_tensor: tensor(batchsize=1, length, dim) + output uv: tensor(batchsize=1, length, 1) + """ + with torch.no_grad(): + f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, \ + device=f0.device) + # fundamental component + f0_buf[:, :, 0] = f0[:, :, 0] + for idx in np.arange(self.harmonic_num): + # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic + f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * (idx + 2) + + # generate sine waveforms + sine_waves = self._f02sine(f0_buf) * self.sine_amp + + # generate uv signal + # uv = torch.ones(f0.shape) + # uv = uv * (f0 > self.voiced_threshold) + uv = self._f02uv(f0) + + # noise: for unvoiced should be similar to sine_amp + # std = self.sine_amp/3 -> max value ~ self.sine_amp + # . for voiced regions is self.noise_std + noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3 + noise = noise_amp * torch.randn_like(sine_waves) + + # first: set the unvoiced part to 0 by uv + # then: additive noise + sine_waves = sine_waves * uv + noise + return sine_waves, uv, noise + + +class SourceModuleCycNoise_v1(torch.nn.Module): + """ SourceModuleCycNoise_v1 + SourceModule(sampling_rate, noise_std=0.003, voiced_threshod=0) + sampling_rate: sampling_rate in Hz + + noise_std: std of Gaussian noise (default: 0.003) + voiced_threshold: threshold to set U/V given F0 (default: 0) + + cyc, noise, uv = SourceModuleCycNoise_v1(F0_upsampled, beta) + F0_upsampled (batchsize, length, 1) + beta (1) + cyc (batchsize, length, 1) + noise (batchsize, length, 1) + uv (batchsize, length, 1) + """ + + def __init__(self, sampling_rate, noise_std=0.003, voiced_threshod=0): + super(SourceModuleCycNoise_v1, self).__init__() + self.sampling_rate = sampling_rate + self.noise_std = noise_std + self.l_cyc_gen = CyclicNoiseGen_v1(sampling_rate, noise_std, + voiced_threshod) + + def forward(self, f0_upsamped, beta): + """ + cyc, noise, uv = SourceModuleCycNoise_v1(F0, beta) + F0_upsampled (batchsize, length, 1) + beta (1) + cyc (batchsize, length, 1) + noise (batchsize, length, 1) + uv (batchsize, length, 1) + """ + # source for harmonic branch + cyc, pulse, sine, uv, add_noi = self.l_cyc_gen(f0_upsamped, beta) + + # source for noise branch, in the same shape as uv + noise = torch.randn_like(uv) * self.noise_std / 3 + return cyc, noise, uv + + +class SourceModuleHnNSF(torch.nn.Module): + """ SourceModule for hn-nsf + SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0) + sampling_rate: sampling_rate in Hz + harmonic_num: number of harmonic above F0 (default: 0) + sine_amp: amplitude of sine source signal (default: 0.1) + add_noise_std: std of additive Gaussian noise (default: 0.003) + note that amplitude of noise in unvoiced is decided + by sine_amp + voiced_threshold: threhold to set U/V given F0 (default: 0) + + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + uv (batchsize, length, 1) + """ + + def __init__(self, sampling_rate, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0): + super(SourceModuleHnNSF, self).__init__() + + self.sine_amp = sine_amp + self.noise_std = add_noise_std + + # to produce sine waveforms + self.l_sin_gen = SineGen(sampling_rate, harmonic_num, + sine_amp, add_noise_std, voiced_threshod) + + # to merge source harmonics into a single excitation + self.l_linear = torch.nn.Linear(harmonic_num + 1, 1) + self.l_tanh = torch.nn.Tanh() + + def forward(self, x): + """ + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + """ + # source for harmonic branch + sine_wavs, uv, _ = self.l_sin_gen(x) + sine_merge = self.l_tanh(self.l_linear(sine_wavs)) + + # source for noise branch, in the same shape as uv + noise = torch.randn_like(uv) * self.sine_amp / 3 + return sine_merge, noise, uv + + +if __name__ == '__main__': + source = SourceModuleCycNoise_v1(24000) + x = torch.randn(16, 25600, 1) + + diff --git a/modules/parallel_wavegan/optimizers/__init__.py b/modules/parallel_wavegan/optimizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a0e0c5932838281e912079e5784d84d43444a61a --- /dev/null +++ b/modules/parallel_wavegan/optimizers/__init__.py @@ -0,0 +1,2 @@ +from torch.optim import * # NOQA +from .radam import * # NOQA diff --git a/modules/parallel_wavegan/optimizers/radam.py b/modules/parallel_wavegan/optimizers/radam.py new file mode 100644 index 0000000000000000000000000000000000000000..e805d7e34921bee436e1e7fd9e1f753c7609186b --- /dev/null +++ b/modules/parallel_wavegan/optimizers/radam.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- + +"""RAdam optimizer. + +This code is drived from https://github.com/LiyuanLucasLiu/RAdam. +""" + +import math +import torch + +from torch.optim.optimizer import Optimizer + + +class RAdam(Optimizer): + """Rectified Adam optimizer.""" + + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): + """Initilize RAdam optimizer.""" + defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) + self.buffer = [[None, None, None] for ind in range(10)] + super(RAdam, self).__init__(params, defaults) + + def __setstate__(self, state): + """Set state.""" + super(RAdam, self).__setstate__(state) + + def step(self, closure=None): + """Run one step.""" + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError('RAdam does not support sparse gradients') + + p_data_fp32 = p.data.float() + + state = self.state[p] + + if len(state) == 0: + state['step'] = 0 + state['exp_avg'] = torch.zeros_like(p_data_fp32) + state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) + else: + state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) + state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32) + + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + beta1, beta2 = group['betas'] + + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + exp_avg.mul_(beta1).add_(1 - beta1, grad) + + state['step'] += 1 + buffered = self.buffer[int(state['step'] % 10)] + if state['step'] == buffered[0]: + N_sma, step_size = buffered[1], buffered[2] + else: + buffered[0] = state['step'] + beta2_t = beta2 ** state['step'] + N_sma_max = 2 / (1 - beta2) - 1 + N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t) + buffered[1] = N_sma + + # more conservative since it's an approximated value + if N_sma >= 5: + step_size = math.sqrt( + (1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step']) # NOQA + else: + step_size = 1.0 / (1 - beta1 ** state['step']) + buffered[2] = step_size + + if group['weight_decay'] != 0: + p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32) + + # more conservative since it's an approximated value + if N_sma >= 5: + denom = exp_avg_sq.sqrt().add_(group['eps']) + p_data_fp32.addcdiv_(-step_size * group['lr'], exp_avg, denom) + else: + p_data_fp32.add_(-step_size * group['lr'], exp_avg) + + p.data.copy_(p_data_fp32) + + return loss diff --git a/modules/parallel_wavegan/stft_loss.py b/modules/parallel_wavegan/stft_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..229e6c777dc9ec7f710842d1e648dba1189ec8b4 --- /dev/null +++ b/modules/parallel_wavegan/stft_loss.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Tomoki Hayashi +# MIT License (https://opensource.org/licenses/MIT) + +"""STFT-based Loss modules.""" +import librosa +import torch + +from modules.parallel_wavegan.losses import LogSTFTMagnitudeLoss, SpectralConvergengeLoss, stft + + +class STFTLoss(torch.nn.Module): + """STFT loss module.""" + + def __init__(self, fft_size=1024, shift_size=120, win_length=600, window="hann_window", + use_mel_loss=False): + """Initialize STFT loss module.""" + super(STFTLoss, self).__init__() + self.fft_size = fft_size + self.shift_size = shift_size + self.win_length = win_length + self.window = getattr(torch, window)(win_length) + self.spectral_convergenge_loss = SpectralConvergengeLoss() + self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss() + self.use_mel_loss = use_mel_loss + self.mel_basis = None + + def forward(self, x, y): + """Calculate forward propagation. + + Args: + x (Tensor): Predicted signal (B, T). + y (Tensor): Groundtruth signal (B, T). + + Returns: + Tensor: Spectral convergence loss value. + Tensor: Log STFT magnitude loss value. + + """ + x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window) + y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window) + if self.use_mel_loss: + if self.mel_basis is None: + self.mel_basis = torch.from_numpy(librosa.filters.mel(22050, self.fft_size, 80)).cuda().T + x_mag = x_mag @ self.mel_basis + y_mag = y_mag @ self.mel_basis + + sc_loss = self.spectral_convergenge_loss(x_mag, y_mag) + mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag) + + return sc_loss, mag_loss + + +class MultiResolutionSTFTLoss(torch.nn.Module): + """Multi resolution STFT loss module.""" + + def __init__(self, + fft_sizes=[1024, 2048, 512], + hop_sizes=[120, 240, 50], + win_lengths=[600, 1200, 240], + window="hann_window", + use_mel_loss=False): + """Initialize Multi resolution STFT loss module. + + Args: + fft_sizes (list): List of FFT sizes. + hop_sizes (list): List of hop sizes. + win_lengths (list): List of window lengths. + window (str): Window function type. + + """ + super(MultiResolutionSTFTLoss, self).__init__() + assert len(fft_sizes) == len(hop_sizes) == len(win_lengths) + self.stft_losses = torch.nn.ModuleList() + for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths): + self.stft_losses += [STFTLoss(fs, ss, wl, window, use_mel_loss)] + + def forward(self, x, y): + """Calculate forward propagation. + + Args: + x (Tensor): Predicted signal (B, T). + y (Tensor): Groundtruth signal (B, T). + + Returns: + Tensor: Multi resolution spectral convergence loss value. + Tensor: Multi resolution log STFT magnitude loss value. + + """ + sc_loss = 0.0 + mag_loss = 0.0 + for f in self.stft_losses: + sc_l, mag_l = f(x, y) + sc_loss += sc_l + mag_loss += mag_l + sc_loss /= len(self.stft_losses) + mag_loss /= len(self.stft_losses) + + return sc_loss, mag_loss diff --git a/modules/parallel_wavegan/utils/__init__.py b/modules/parallel_wavegan/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e8fa95a020706b5412c3959fbf6e5980019c0d5f --- /dev/null +++ b/modules/parallel_wavegan/utils/__init__.py @@ -0,0 +1 @@ +from .utils import * # NOQA diff --git a/modules/parallel_wavegan/utils/__pycache__/__init__.cpython-38.pyc b/modules/parallel_wavegan/utils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..856a1203a49a5f131e2e1f110f701ee988cc1b5f Binary files /dev/null and b/modules/parallel_wavegan/utils/__pycache__/__init__.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/utils/__pycache__/utils.cpython-38.pyc b/modules/parallel_wavegan/utils/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f3736354ac834dab8860599354f3a0d2726b613 Binary files /dev/null and b/modules/parallel_wavegan/utils/__pycache__/utils.cpython-38.pyc differ diff --git a/modules/parallel_wavegan/utils/utils.py b/modules/parallel_wavegan/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6a30e803723d224d9825a753baf4cd91c94c1677 --- /dev/null +++ b/modules/parallel_wavegan/utils/utils.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Tomoki Hayashi +# MIT License (https://opensource.org/licenses/MIT) + +"""Utility functions.""" + +import fnmatch +import logging +import os +import sys + +import h5py +import numpy as np + + +def find_files(root_dir, query="*.wav", include_root_dir=True): + """Find files recursively. + + Args: + root_dir (str): Root root_dir to find. + query (str): Query to find. + include_root_dir (bool): If False, root_dir name is not included. + + Returns: + list: List of found filenames. + + """ + files = [] + for root, dirnames, filenames in os.walk(root_dir, followlinks=True): + for filename in fnmatch.filter(filenames, query): + files.append(os.path.join(root, filename)) + if not include_root_dir: + files = [file_.replace(root_dir + "/", "") for file_ in files] + + return files + + +def read_hdf5(hdf5_name, hdf5_path): + """Read hdf5 dataset. + + Args: + hdf5_name (str): Filename of hdf5 file. + hdf5_path (str): Dataset name in hdf5 file. + + Return: + any: Dataset values. + + """ + if not os.path.exists(hdf5_name): + logging.error(f"There is no such a hdf5 file ({hdf5_name}).") + sys.exit(1) + + hdf5_file = h5py.File(hdf5_name, "r") + + if hdf5_path not in hdf5_file: + logging.error(f"There is no such a data in hdf5 file. ({hdf5_path})") + sys.exit(1) + + hdf5_data = hdf5_file[hdf5_path][()] + hdf5_file.close() + + return hdf5_data + + +def write_hdf5(hdf5_name, hdf5_path, write_data, is_overwrite=True): + """Write dataset to hdf5. + + Args: + hdf5_name (str): Hdf5 dataset filename. + hdf5_path (str): Dataset path in hdf5. + write_data (ndarray): Data to write. + is_overwrite (bool): Whether to overwrite dataset. + + """ + # convert to numpy array + write_data = np.array(write_data) + + # check folder existence + folder_name, _ = os.path.split(hdf5_name) + if not os.path.exists(folder_name) and len(folder_name) != 0: + os.makedirs(folder_name) + + # check hdf5 existence + if os.path.exists(hdf5_name): + # if already exists, open with r+ mode + hdf5_file = h5py.File(hdf5_name, "r+") + # check dataset existence + if hdf5_path in hdf5_file: + if is_overwrite: + logging.warning("Dataset in hdf5 file already exists. " + "recreate dataset in hdf5.") + hdf5_file.__delitem__(hdf5_path) + else: + logging.error("Dataset in hdf5 file already exists. " + "if you want to overwrite, please set is_overwrite = True.") + hdf5_file.close() + sys.exit(1) + else: + # if not exists, open with w mode + hdf5_file = h5py.File(hdf5_name, "w") + + # write data to hdf5 + hdf5_file.create_dataset(hdf5_path, data=write_data) + hdf5_file.flush() + hdf5_file.close() + + +class HDF5ScpLoader(object): + """Loader class for a fests.scp file of hdf5 file. + + Examples: + key1 /some/path/a.h5:feats + key2 /some/path/b.h5:feats + key3 /some/path/c.h5:feats + key4 /some/path/d.h5:feats + ... + >>> loader = HDF5ScpLoader("hdf5.scp") + >>> array = loader["key1"] + + key1 /some/path/a.h5 + key2 /some/path/b.h5 + key3 /some/path/c.h5 + key4 /some/path/d.h5 + ... + >>> loader = HDF5ScpLoader("hdf5.scp", "feats") + >>> array = loader["key1"] + + """ + + def __init__(self, feats_scp, default_hdf5_path="feats"): + """Initialize HDF5 scp loader. + + Args: + feats_scp (str): Kaldi-style feats.scp file with hdf5 format. + default_hdf5_path (str): Path in hdf5 file. If the scp contain the info, not used. + + """ + self.default_hdf5_path = default_hdf5_path + with open(feats_scp, encoding='utf-8') as f: + lines = [line.replace("\n", "") for line in f.readlines()] + self.data = {} + for line in lines: + key, value = line.split() + self.data[key] = value + + def get_path(self, key): + """Get hdf5 file path for a given key.""" + return self.data[key] + + def __getitem__(self, key): + """Get ndarray for a given key.""" + p = self.data[key] + if ":" in p: + return read_hdf5(*p.split(":")) + else: + return read_hdf5(p, self.default_hdf5_path) + + def __len__(self): + """Return the length of the scp file.""" + return len(self.data) + + def __iter__(self): + """Return the iterator of the scp file.""" + return iter(self.data) + + def keys(self): + """Return the keys of the scp file.""" + return self.data.keys() diff --git a/network/diff/__pycache__/candidate_decoder.cpython-38.pyc b/network/diff/__pycache__/candidate_decoder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..23ffbe728d4ac88fd1970a2f0d6d517ee2d86000 Binary files /dev/null and b/network/diff/__pycache__/candidate_decoder.cpython-38.pyc differ diff --git a/network/diff/__pycache__/diffusion.cpython-38.pyc b/network/diff/__pycache__/diffusion.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfaeb03a6cda242ad7e94ef2a5a757000f702308 Binary files /dev/null and b/network/diff/__pycache__/diffusion.cpython-38.pyc differ diff --git a/network/diff/__pycache__/net.cpython-38.pyc b/network/diff/__pycache__/net.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e44334bf72eae2fedc62291a535a5c5ac4f0be7 Binary files /dev/null and b/network/diff/__pycache__/net.cpython-38.pyc differ diff --git a/network/diff/candidate_decoder.py b/network/diff/candidate_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..bccb47aad0285cab1e7aaca759294c8a1270849c --- /dev/null +++ b/network/diff/candidate_decoder.py @@ -0,0 +1,98 @@ +from modules.fastspeech.tts_modules import FastspeechDecoder +# from modules.fastspeech.fast_tacotron import DecoderRNN +# from modules.fastspeech.speedy_speech.speedy_speech import ConvBlocks +# from modules.fastspeech.conformer.conformer import ConformerDecoder +import torch +from torch.nn import functional as F +import torch.nn as nn +import math +from utils.hparams import hparams +from modules.commons.common_layers import Mish +Linear = nn.Linear + +class SinusoidalPosEmb(nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, x): + device = x.device + half_dim = self.dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, device=device) * -emb) + emb = x[:, None] * emb[None, :] + emb = torch.cat((emb.sin(), emb.cos()), dim=-1) + return emb + + +def Conv1d(*args, **kwargs): + layer = nn.Conv1d(*args, **kwargs) + nn.init.kaiming_normal_(layer.weight) + return layer + + +class FFT(FastspeechDecoder): # unused, because DiffSinger only uses FastspeechEncoder + # NOTE: this part of script is *isolated* from other scripts, which means + # it may not be compatible with the current version. + + def __init__(self, hidden_size=None, num_layers=None, kernel_size=None, num_heads=None): + super().__init__(hidden_size, num_layers, kernel_size, num_heads=num_heads) + dim = hparams['residual_channels'] + self.input_projection = Conv1d(hparams['audio_num_mel_bins'], dim, 1) + self.diffusion_embedding = SinusoidalPosEmb(dim) + self.mlp = nn.Sequential( + nn.Linear(dim, dim * 4), + Mish(), + nn.Linear(dim * 4, dim) + ) + self.get_mel_out = Linear(hparams['hidden_size'], 80, bias=True) + self.get_decode_inp = Linear(hparams['hidden_size'] + dim + dim, + hparams['hidden_size']) # hs + dim + 80 -> hs + + def forward(self, spec, diffusion_step, cond, padding_mask=None, attn_mask=None, return_hiddens=False): + """ + :param spec: [B, 1, 80, T] + :param diffusion_step: [B, 1] + :param cond: [B, M, T] + :return: + """ + x = spec[:, 0] + x = self.input_projection(x).permute([0, 2, 1]) # [B, T, residual_channel] + diffusion_step = self.diffusion_embedding(diffusion_step) + diffusion_step = self.mlp(diffusion_step) # [B, dim] + cond = cond.permute([0, 2, 1]) # [B, T, M] + + seq_len = cond.shape[1] # [T_mel] + time_embed = diffusion_step[:, None, :] # [B, 1, dim] + time_embed = time_embed.repeat([1, seq_len, 1]) # # [B, T, dim] + + decoder_inp = torch.cat([x, cond, time_embed], dim=-1) # [B, T, dim + H + dim] + decoder_inp = self.get_decode_inp(decoder_inp) # [B, T, H] + x = decoder_inp + + ''' + Required x: [B, T, C] + :return: [B, T, C] or [L, B, T, C] + ''' + padding_mask = x.abs().sum(-1).eq(0).data if padding_mask is None else padding_mask + nonpadding_mask_TB = 1 - padding_mask.transpose(0, 1).float()[:, :, None] # [T, B, 1] + if self.use_pos_embed: + positions = self.pos_embed_alpha * self.embed_positions(x[..., 0]) + x = x + positions + x = F.dropout(x, p=self.dropout, training=self.training) + # B x T x C -> T x B x C + x = x.transpose(0, 1) * nonpadding_mask_TB + hiddens = [] + for layer in self.layers: + x = layer(x, encoder_padding_mask=padding_mask, attn_mask=attn_mask) * nonpadding_mask_TB + hiddens.append(x) + if self.use_last_norm: + x = self.layer_norm(x) * nonpadding_mask_TB + if return_hiddens: + x = torch.stack(hiddens, 0) # [L, T, B, C] + x = x.transpose(1, 2) # [L, B, T, C] + else: + x = x.transpose(0, 1) # [B, T, C] + + x = self.get_mel_out(x).permute([0, 2, 1]) # [B, 80, T] + return x[:, None, :, :] \ No newline at end of file diff --git a/network/diff/diffusion.py b/network/diff/diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..0ff05212929a970974a59735735d704be83ecd3c --- /dev/null +++ b/network/diff/diffusion.py @@ -0,0 +1,332 @@ +from collections import deque +from functools import partial +from inspect import isfunction +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn +from tqdm import tqdm + +from modules.fastspeech.fs2 import FastSpeech2 +# from modules.diffsinger_midi.fs2 import FastSpeech2MIDI +from utils.hparams import hparams +from training.train_pipeline import Batch2Loss + + +def exists(x): + return x is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +# gaussian diffusion trainer class + +def extract(a, t, x_shape): + b, *_ = t.shape + out = a.gather(-1, t) + return out.reshape(b, *((1,) * (len(x_shape) - 1))) + + +def noise_like(shape, device, repeat=False): + repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) + noise = lambda: torch.randn(shape, device=device) + return repeat_noise() if repeat else noise() + + +def linear_beta_schedule(timesteps, max_beta=hparams.get('max_beta', 0.01)): + """ + linear schedule + """ + betas = np.linspace(1e-4, max_beta, timesteps) + return betas + + +def cosine_beta_schedule(timesteps, s=0.008): + """ + cosine schedule + as proposed in https://openreview.net/forum?id=-NEXDKk8gZ + """ + steps = timesteps + 1 + x = np.linspace(0, steps, steps) + alphas_cumprod = np.cos(((x / steps) + s) / (1 + s) * np.pi * 0.5) ** 2 + alphas_cumprod = alphas_cumprod / alphas_cumprod[0] + betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1]) + return np.clip(betas, a_min=0, a_max=0.999) + + +beta_schedule = { + "cosine": cosine_beta_schedule, + "linear": linear_beta_schedule, +} + + +class GaussianDiffusion(nn.Module): + def __init__(self, phone_encoder, out_dims, denoise_fn, + timesteps=1000, K_step=1000, loss_type=hparams.get('diff_loss_type', 'l1'), betas=None, spec_min=None, + spec_max=None): + super().__init__() + self.denoise_fn = denoise_fn + # if hparams.get('use_midi') is not None and hparams['use_midi']: + # self.fs2 = FastSpeech2MIDI(phone_encoder, out_dims) + # else: + self.fs2 = FastSpeech2(phone_encoder, out_dims) + self.mel_bins = out_dims + + if exists(betas): + betas = betas.detach().cpu().numpy() if isinstance(betas, torch.Tensor) else betas + else: + if 'schedule_type' in hparams.keys(): + betas = beta_schedule[hparams['schedule_type']](timesteps) + else: + betas = cosine_beta_schedule(timesteps) + + alphas = 1. - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + self.K_step = K_step + self.loss_type = loss_type + + self.noise_list = deque(maxlen=4) + + to_torch = partial(torch.tensor, dtype=torch.float32) + + self.register_buffer('betas', to_torch(betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + posterior_variance = betas * (1. - alphas_cumprod_prev) / (1. - alphas_cumprod) + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + self.register_buffer('posterior_variance', to_torch(posterior_variance)) + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) + self.register_buffer('posterior_mean_coef1', to_torch( + betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))) + self.register_buffer('posterior_mean_coef2', to_torch( + (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))) + + self.register_buffer('spec_min', torch.FloatTensor(spec_min)[None, None, :hparams['keep_bins']]) + self.register_buffer('spec_max', torch.FloatTensor(spec_max)[None, None, :hparams['keep_bins']]) + + def q_mean_variance(self, x_start, t): + mean = extract(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + variance = extract(1. - self.alphas_cumprod, t, x_start.shape) + log_variance = extract(self.log_one_minus_alphas_cumprod, t, x_start.shape) + return mean, variance, log_variance + + def predict_start_from_noise(self, x_t, t, noise): + return ( + extract(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + extract(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise + ) + + def q_posterior(self, x_start, x_t, t): + posterior_mean = ( + extract(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract(self.posterior_mean_coef2, t, x_t.shape) * x_t + ) + posterior_variance = extract(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = extract(self.posterior_log_variance_clipped, t, x_t.shape) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, x, t, cond, clip_denoised: bool): + noise_pred = self.denoise_fn(x, t, cond=cond) + x_recon = self.predict_start_from_noise(x, t=t, noise=noise_pred) + + if clip_denoised: + x_recon.clamp_(-1., 1.) + + model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, t, cond, clip_denoised=True, repeat_noise=False): + b, *_, device = *x.shape, x.device + model_mean, _, model_log_variance = self.p_mean_variance(x=x, t=t, cond=cond, clip_denoised=clip_denoised) + noise = noise_like(x.shape, device, repeat_noise) + # no noise when t == 0 + nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def p_sample_plms(self, x, t, interval, cond, clip_denoised=True, repeat_noise=False): + """ + Use the PLMS method from [Pseudo Numerical Methods for Diffusion Models on Manifolds](https://arxiv.org/abs/2202.09778). + """ + + def get_x_pred(x, noise_t, t): + a_t = extract(self.alphas_cumprod, t, x.shape) + a_prev = extract(self.alphas_cumprod, torch.max(t-interval, torch.zeros_like(t)), x.shape) + a_t_sq, a_prev_sq = a_t.sqrt(), a_prev.sqrt() + + x_delta = (a_prev - a_t) * ((1 / (a_t_sq * (a_t_sq + a_prev_sq))) * x - 1 / (a_t_sq * (((1 - a_prev) * a_t).sqrt() + ((1 - a_t) * a_prev).sqrt())) * noise_t) + x_pred = x + x_delta + + return x_pred + + noise_list = self.noise_list + noise_pred = self.denoise_fn(x, t, cond=cond) + + if len(noise_list) == 0: + x_pred = get_x_pred(x, noise_pred, t) + noise_pred_prev = self.denoise_fn(x_pred, max(t-interval, 0), cond=cond) + noise_pred_prime = (noise_pred + noise_pred_prev) / 2 + elif len(noise_list) == 1: + noise_pred_prime = (3 * noise_pred - noise_list[-1]) / 2 + elif len(noise_list) == 2: + noise_pred_prime = (23 * noise_pred - 16 * noise_list[-1] + 5 * noise_list[-2]) / 12 + elif len(noise_list) >= 3: + noise_pred_prime = (55 * noise_pred - 59 * noise_list[-1] + 37 * noise_list[-2] - 9 * noise_list[-3]) / 24 + + x_prev = get_x_pred(x, noise_pred_prime, t) + noise_list.append(noise_pred) + + return x_prev + + def q_sample(self, x_start, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + return ( + extract(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + ) + + def p_losses(self, x_start, t, cond, noise=None, nonpadding=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + x_recon = self.denoise_fn(x_noisy, t, cond) + + if self.loss_type == 'l1': + if nonpadding is not None: + loss = ((noise - x_recon).abs() * nonpadding.unsqueeze(1)).mean() + else: + # print('are you sure w/o nonpadding?') + loss = (noise - x_recon).abs().mean() + + elif self.loss_type == 'l2': + loss = F.mse_loss(noise, x_recon) + else: + raise NotImplementedError() + + return loss + + def forward(self, hubert, mel2ph=None, spk_embed=None, + ref_mels=None, f0=None, uv=None, energy=None, infer=False, **kwargs): + ''' + conditioning diffusion, use fastspeech2 encoder output as the condition + ''' + ret = self.fs2(hubert, mel2ph, spk_embed, None, f0, uv, energy, + skip_decoder=True, infer=infer, **kwargs) + cond = ret['decoder_inp'].transpose(1, 2) + b, *_, device = *hubert.shape, hubert.device + + if not infer: + Batch2Loss.module4( + self.p_losses, + self.norm_spec(ref_mels), cond, ret, self.K_step, b, device + ) + else: + ''' + ret['fs2_mel'] = ret['mel_out'] + fs2_mels = ret['mel_out'] + t = self.K_step + fs2_mels = self.norm_spec(fs2_mels) + fs2_mels = fs2_mels.transpose(1, 2)[:, None, :, :] + x = self.q_sample(x_start=fs2_mels, t=torch.tensor([t - 1], device=device).long()) + if hparams.get('gaussian_start') is not None and hparams['gaussian_start']: + print('===> gaussion start.') + shape = (cond.shape[0], 1, self.mel_bins, cond.shape[2]) + x = torch.randn(shape, device=device) + ''' + if 'use_gt_mel' in kwargs.keys() and kwargs['use_gt_mel']: + t =kwargs['add_noise_step'] + print('===>using ground truth mel as start, please make sure parameter "key==0" !') + fs2_mels = ref_mels + fs2_mels = self.norm_spec(fs2_mels) + fs2_mels = fs2_mels.transpose(1, 2)[:, None, :, :] + x = self.q_sample(x_start=fs2_mels, t=torch.tensor([t - 1], device=device).long()) + # for i in tqdm(reversed(range(0, t)), desc='sample time step', total=t): + # x = self.p_sample(x, torch.full((b,), i, device=device, dtype=torch.long), cond) + else: + t = self.K_step + #print('===> gaussion start.') + shape = (cond.shape[0], 1, self.mel_bins, cond.shape[2]) + x = torch.randn(shape, device=device) + if hparams.get('pndm_speedup') and hparams['pndm_speedup'] > 1: + self.noise_list = deque(maxlen=4) + iteration_interval =hparams['pndm_speedup'] + for i in tqdm(reversed(range(0, t, iteration_interval)), desc='sample time step', + total=t // iteration_interval): + x = self.p_sample_plms(x, torch.full((b,), i, device=device, dtype=torch.long), iteration_interval, + cond) + else: + for i in tqdm(reversed(range(0, t)), desc='sample time step', total=t): + x = self.p_sample(x, torch.full((b,), i, device=device, dtype=torch.long), cond) + x = x[:, 0].transpose(1, 2) + if mel2ph is not None: # for singing + ret['mel_out'] = self.denorm_spec(x) * ((mel2ph > 0).float()[:, :, None]) + else: + ret['mel_out'] = self.denorm_spec(x) + return ret + + def norm_spec(self, x): + return (x - self.spec_min) / (self.spec_max - self.spec_min) * 2 - 1 + + def denorm_spec(self, x): + return (x + 1) / 2 * (self.spec_max - self.spec_min) + self.spec_min + + def cwt2f0_norm(self, cwt_spec, mean, std, mel2ph): + return self.fs2.cwt2f0_norm(cwt_spec, mean, std, mel2ph) + + def out2mel(self, x): + return x + + +class OfflineGaussianDiffusion(GaussianDiffusion): + def forward(self, txt_tokens, mel2ph=None, spk_embed=None, + ref_mels=None, f0=None, uv=None, energy=None, infer=False, **kwargs): + b, *_, device = *txt_tokens.shape, txt_tokens.device + + ret = self.fs2(txt_tokens, mel2ph, spk_embed, ref_mels, f0, uv, energy, + skip_decoder=True, infer=True, **kwargs) + cond = ret['decoder_inp'].transpose(1, 2) + fs2_mels = ref_mels[1] + ref_mels = ref_mels[0] + + if not infer: + t = torch.randint(0, self.K_step, (b,), device=device).long() + x = ref_mels + x = self.norm_spec(x) + x = x.transpose(1, 2)[:, None, :, :] # [B, 1, M, T] + ret['diff_loss'] = self.p_losses(x, t, cond) + else: + t = self.K_step + fs2_mels = self.norm_spec(fs2_mels) + fs2_mels = fs2_mels.transpose(1, 2)[:, None, :, :] + + x = self.q_sample(x_start=fs2_mels, t=torch.tensor([t - 1], device=device).long()) + + if hparams.get('gaussian_start') is not None and hparams['gaussian_start']: + print('===> gaussion start.') + shape = (cond.shape[0], 1, self.mel_bins, cond.shape[2]) + x = torch.randn(shape, device=device) + for i in tqdm(reversed(range(0, t)), desc='sample time step', total=t): + x = self.p_sample(x, torch.full((b,), i, device=device, dtype=torch.long), cond) + x = x[:, 0].transpose(1, 2) + ret['mel_out'] = self.denorm_spec(x) + + return ret \ No newline at end of file diff --git a/network/diff/net.py b/network/diff/net.py new file mode 100644 index 0000000000000000000000000000000000000000..df46b54cbd545b99e13b7f3802b7fc18f388e0c8 --- /dev/null +++ b/network/diff/net.py @@ -0,0 +1,135 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from math import sqrt + +from utils.hparams import hparams +from modules.commons.common_layers import Mish + +Linear = nn.Linear +ConvTranspose2d = nn.ConvTranspose2d + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + def override(self, attrs): + if isinstance(attrs, dict): + self.__dict__.update(**attrs) + elif isinstance(attrs, (list, tuple, set)): + for attr in attrs: + self.override(attr) + elif attrs is not None: + raise NotImplementedError + return self + + +class SinusoidalPosEmb(nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, x): + device = x.device + half_dim = self.dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, device=device) * -emb) + emb = x[:, None] * emb[None, :] + emb = torch.cat((emb.sin(), emb.cos()), dim=-1) + return emb + + +def Conv1d(*args, **kwargs): + layer = nn.Conv1d(*args, **kwargs) + nn.init.kaiming_normal_(layer.weight) + return layer + + +@torch.jit.script +def silu(x): + return x * torch.sigmoid(x) + + +class ResidualBlock(nn.Module): + def __init__(self, encoder_hidden, residual_channels, dilation): + super().__init__() + self.dilated_conv = Conv1d(residual_channels, 2 * residual_channels, 3, padding=dilation, dilation=dilation) + self.diffusion_projection = Linear(residual_channels, residual_channels) + self.conditioner_projection = Conv1d(encoder_hidden, 2 * residual_channels, 1) + self.output_projection = Conv1d(residual_channels, 2 * residual_channels, 1) + + def forward(self, x, conditioner, diffusion_step): + diffusion_step = self.diffusion_projection(diffusion_step).unsqueeze(-1) + conditioner = self.conditioner_projection(conditioner) + y = x + diffusion_step + + y = self.dilated_conv(y) + conditioner + + gate, filter = torch.chunk(y, 2, dim=1) + # Using torch.split instead of torch.chunk to avoid using onnx::Slice + # gate, filter = torch.split(y, torch.div(y.shape[1], 2), dim=1) + + y = torch.sigmoid(gate) * torch.tanh(filter) + + y = self.output_projection(y) + residual, skip = torch.chunk(y, 2, dim=1) + # Using torch.split instead of torch.chunk to avoid using onnx::Slice + # residual, skip = torch.split(y, torch.div(y.shape[1], 2), dim=1) + + return (x + residual) / sqrt(2.0), skip + +class DiffNet(nn.Module): + def __init__(self, in_dims=80): + super().__init__() + self.params = params = AttrDict( + # Model params + encoder_hidden=hparams['hidden_size'], + residual_layers=hparams['residual_layers'], + residual_channels=hparams['residual_channels'], + dilation_cycle_length=hparams['dilation_cycle_length'], + ) + self.input_projection = Conv1d(in_dims, params.residual_channels, 1) + self.diffusion_embedding = SinusoidalPosEmb(params.residual_channels) + dim = params.residual_channels + self.mlp = nn.Sequential( + nn.Linear(dim, dim * 4), + Mish(), + nn.Linear(dim * 4, dim) + ) + self.residual_layers = nn.ModuleList([ + ResidualBlock(params.encoder_hidden, params.residual_channels, 2 ** (i % params.dilation_cycle_length)) + for i in range(params.residual_layers) + ]) + self.skip_projection = Conv1d(params.residual_channels, params.residual_channels, 1) + self.output_projection = Conv1d(params.residual_channels, in_dims, 1) + nn.init.zeros_(self.output_projection.weight) + + def forward(self, spec, diffusion_step, cond): + """ + + :param spec: [B, 1, M, T] + :param diffusion_step: [B, 1] + :param cond: [B, M, T] + :return: + """ + x = spec[:, 0] + x = self.input_projection(x) # x [B, residual_channel, T] + + x = F.relu(x) + diffusion_step = self.diffusion_embedding(diffusion_step) + diffusion_step = self.mlp(diffusion_step) + skip = [] + for layer_id, layer in enumerate(self.residual_layers): + x, skip_connection = layer(x, cond, diffusion_step) + skip.append(skip_connection) + + x = torch.sum(torch.stack(skip), dim=0) / sqrt(len(self.residual_layers)) + x = self.skip_projection(x) + x = F.relu(x) + x = self.output_projection(x) # [B, 80, T] + return x[:, None, :, :] diff --git a/network/hubert/__pycache__/hubert_model.cpython-38.pyc b/network/hubert/__pycache__/hubert_model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..877626aac6c044efc0b362a07328a14903be4ae7 Binary files /dev/null and b/network/hubert/__pycache__/hubert_model.cpython-38.pyc differ diff --git a/network/hubert/__pycache__/vec_model.cpython-38.pyc b/network/hubert/__pycache__/vec_model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13cb994a010702d266eb9925006e4971a7092732 Binary files /dev/null and b/network/hubert/__pycache__/vec_model.cpython-38.pyc differ diff --git a/network/hubert/hubert_model.py b/network/hubert/hubert_model.py new file mode 100644 index 0000000000000000000000000000000000000000..54379f3d47383810c545c53e042d806734e9f8de --- /dev/null +++ b/network/hubert/hubert_model.py @@ -0,0 +1,276 @@ +import copy +import os +import random +from typing import Optional, Tuple + +import librosa +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as t_func +from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present + +from utils import hparams + + +class Hubert(nn.Module): + def __init__(self, num_label_embeddings: int = 100, mask: bool = True): + super().__init__() + self._mask = mask + self.feature_extractor = FeatureExtractor() + self.feature_projection = FeatureProjection() + self.positional_embedding = PositionalConvEmbedding() + self.norm = nn.LayerNorm(768) + self.dropout = nn.Dropout(0.1) + self.encoder = TransformerEncoder( + nn.TransformerEncoderLayer( + 768, 12, 3072, activation="gelu", batch_first=True + ), + 12, + ) + self.proj = nn.Linear(768, 256) + + self.masked_spec_embed = nn.Parameter(torch.FloatTensor(768).uniform_()) + self.label_embedding = nn.Embedding(num_label_embeddings, 256) + + def mask(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + mask = None + if self.training and self._mask: + mask = _compute_mask((x.size(0), x.size(1)), 0.8, 10, x.device, 2) + x[mask] = self.masked_spec_embed.to(x.dtype) + return x, mask + + def encode( + self, x: torch.Tensor, layer: Optional[int] = None + ) -> Tuple[torch.Tensor, torch.Tensor]: + x = self.feature_extractor(x) + x = self.feature_projection(x.transpose(1, 2)) + x, mask = self.mask(x) + x = x + self.positional_embedding(x) + x = self.dropout(self.norm(x)) + x = self.encoder(x, output_layer=layer) + return x, mask + + def logits(self, x: torch.Tensor) -> torch.Tensor: + logits = torch.cosine_similarity( + x.unsqueeze(2), + self.label_embedding.weight.unsqueeze(0).unsqueeze(0), + dim=-1, + ) + return logits / 0.1 + + def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + x, mask = self.encode(x) + x = self.proj(x) + logits = self.logits(x) + return logits, mask + + +class HubertSoft(Hubert): + def __init__(self): + super().__init__() + + # @torch.inference_mode() + def units(self, wav: torch.Tensor) -> torch.Tensor: + wav = torch.nn.functional.pad(wav, ((400 - 320) // 2, (400 - 320) // 2)) + x, _ = self.encode(wav) + return self.proj(x) + + def forward(self, wav: torch.Tensor): + return self.units(wav) + + +class FeatureExtractor(nn.Module): + def __init__(self): + super().__init__() + self.conv0 = nn.Conv1d(1, 512, 10, 5, bias=False) + self.norm0 = nn.GroupNorm(512, 512) + self.conv1 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv2 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv3 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv4 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv5 = nn.Conv1d(512, 512, 2, 2, bias=False) + self.conv6 = nn.Conv1d(512, 512, 2, 2, bias=False) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = t_func.gelu(self.norm0(self.conv0(x))) + x = t_func.gelu(self.conv1(x)) + x = t_func.gelu(self.conv2(x)) + x = t_func.gelu(self.conv3(x)) + x = t_func.gelu(self.conv4(x)) + x = t_func.gelu(self.conv5(x)) + x = t_func.gelu(self.conv6(x)) + return x + + +class FeatureProjection(nn.Module): + def __init__(self): + super().__init__() + self.norm = nn.LayerNorm(512) + self.projection = nn.Linear(512, 768) + self.dropout = nn.Dropout(0.1) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.norm(x) + x = self.projection(x) + x = self.dropout(x) + return x + + +class PositionalConvEmbedding(nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv1d( + 768, + 768, + kernel_size=128, + padding=128 // 2, + groups=16, + ) + self.conv = nn.utils.weight_norm(self.conv, name="weight", dim=2) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.conv(x.transpose(1, 2)) + x = t_func.gelu(x[:, :, :-1]) + return x.transpose(1, 2) + + +class TransformerEncoder(nn.Module): + def __init__( + self, encoder_layer: nn.TransformerEncoderLayer, num_layers: int + ) -> None: + super(TransformerEncoder, self).__init__() + self.layers = nn.ModuleList( + [copy.deepcopy(encoder_layer) for _ in range(num_layers)] + ) + self.num_layers = num_layers + + def forward( + self, + src: torch.Tensor, + mask: torch.Tensor = None, + src_key_padding_mask: torch.Tensor = None, + output_layer: Optional[int] = None, + ) -> torch.Tensor: + output = src + for layer in self.layers[:output_layer]: + output = layer( + output, src_mask=mask, src_key_padding_mask=src_key_padding_mask + ) + return output + + +def _compute_mask( + shape: Tuple[int, int], + mask_prob: float, + mask_length: int, + device: torch.device, + min_masks: int = 0, +) -> torch.Tensor: + batch_size, sequence_length = shape + + if mask_length < 1: + raise ValueError("`mask_length` has to be bigger than 0.") + + if mask_length > sequence_length: + raise ValueError( + f"`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: {mask_length} and `sequence_length`: {sequence_length}`" + ) + + # compute number of masked spans in batch + num_masked_spans = int(mask_prob * sequence_length / mask_length + random.random()) + num_masked_spans = max(num_masked_spans, min_masks) + + # make sure num masked indices <= sequence_length + if num_masked_spans * mask_length > sequence_length: + num_masked_spans = sequence_length // mask_length + + # SpecAugment mask to fill + mask = torch.zeros((batch_size, sequence_length), device=device, dtype=torch.bool) + + # uniform distribution to sample from, make sure that offset samples are < sequence_length + uniform_dist = torch.ones( + (batch_size, sequence_length - (mask_length - 1)), device=device + ) + + # get random indices to mask + mask_indices = torch.multinomial(uniform_dist, num_masked_spans) + + # expand masked indices to masked spans + mask_indices = ( + mask_indices.unsqueeze(dim=-1) + .expand((batch_size, num_masked_spans, mask_length)) + .reshape(batch_size, num_masked_spans * mask_length) + ) + offsets = ( + torch.arange(mask_length, device=device)[None, None, :] + .expand((batch_size, num_masked_spans, mask_length)) + .reshape(batch_size, num_masked_spans * mask_length) + ) + mask_idxs = mask_indices + offsets + + # scatter indices to mask + mask = mask.scatter(1, mask_idxs, True) + + return mask + + +def hubert_soft( + path: str +) -> HubertSoft: + r"""HuBERT-Soft from `"A Comparison of Discrete and Soft Speech Units for Improved Voice Conversion"`. + Args: + path (str): path of a pretrained model + """ + dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") + hubert = HubertSoft() + checkpoint = torch.load(path) + consume_prefix_in_state_dict_if_present(checkpoint, "module.") + hubert.load_state_dict(checkpoint) + hubert.eval().to(dev) + return hubert + + +def get_units(hbt_soft, raw_wav_path, dev=torch.device('cuda')): + wav, sr = librosa.load(raw_wav_path, sr=None) + assert (sr >= 16000) + if len(wav.shape) > 1: + wav = librosa.to_mono(wav) + if sr != 16000: + wav16 = librosa.resample(wav, sr, 16000) + else: + wav16 = wav + dev = torch.device("cuda" if (dev == torch.device('cuda') and torch.cuda.is_available()) else "cpu") + torch.cuda.is_available() and torch.cuda.empty_cache() + with torch.inference_mode(): + units = hbt_soft.units(torch.FloatTensor(wav16.astype(float)).unsqueeze(0).unsqueeze(0).to(dev)) + return units + + +def get_end_file(dir_path, end): + file_list = [] + for root, dirs, files in os.walk(dir_path): + files = [f for f in files if f[0] != '.'] + dirs[:] = [d for d in dirs if d[0] != '.'] + for f_file in files: + if f_file.endswith(end): + file_list.append(os.path.join(root, f_file).replace("\\", "/")) + return file_list + + +if __name__ == '__main__': + from pathlib import Path + + dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # hubert的模型路径 + hbt_model = hubert_soft(str(list(Path(hparams['hubert_path']).home().rglob('*.pt'))[0])) + # 这个不用改,自动在根目录下所有wav的同文件夹生成其对应的npy + file_lists = list(Path(hparams['raw_data_dir']).rglob('*.wav')) + nums = len(file_lists) + count = 0 + for wav_path in file_lists: + npy_path = wav_path.with_suffix(".npy") + npy_content = get_units(hbt_model, wav_path).cpu().numpy()[0] + np.save(str(npy_path), npy_content) + count += 1 + print(f"hubert process:{round(count * 100 / nums, 2)}%") diff --git a/network/hubert/vec_model.py b/network/hubert/vec_model.py new file mode 100644 index 0000000000000000000000000000000000000000..ee4b7a152ab9f299d43d6f519564caca25d836a5 --- /dev/null +++ b/network/hubert/vec_model.py @@ -0,0 +1,60 @@ +from pathlib import Path + +import librosa +import numpy as np +import torch + + + +def load_model(vec_path): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print("load model(s) from {}".format(vec_path)) + from fairseq import checkpoint_utils + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + [vec_path], + suffix="", + ) + model = models[0] + model = model.to(device) + model.eval() + return model + + +def get_vec_units(con_model, audio_path, dev): + audio, sampling_rate = librosa.load(audio_path) + if len(audio.shape) > 1: + audio = librosa.to_mono(audio.transpose(1, 0)) + if sampling_rate != 16000: + audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000) + + feats = torch.from_numpy(audio).float() + if feats.dim() == 2: # double channels + feats = feats.mean(-1) + assert feats.dim() == 1, feats.dim() + feats = feats.view(1, -1) + padding_mask = torch.BoolTensor(feats.shape).fill_(False) + inputs = { + "source": feats.to(dev), + "padding_mask": padding_mask.to(dev), + "output_layer": 9, # layer 9 + } + with torch.no_grad(): + logits = con_model.extract_features(**inputs) + feats = con_model.final_proj(logits[0]) + return feats + + +if __name__ == '__main__': + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model_path = "../../checkpoints/checkpoint_best_legacy_500.pt" # checkpoint_best_legacy_500.pt + vec_model = load_model(model_path) + # 这个不用改,自动在根目录下所有wav的同文件夹生成其对应的npy + file_lists = list(Path("../../data/vecfox").rglob('*.wav')) + nums = len(file_lists) + count = 0 + for wav_path in file_lists: + npy_path = wav_path.with_suffix(".npy") + npy_content = get_vec_units(vec_model, str(wav_path), device).cpu().numpy()[0] + np.save(str(npy_path), npy_content) + count += 1 + print(f"hubert process:{round(count * 100 / nums, 2)}%") diff --git a/network/vocoders/__init__.py b/network/vocoders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6631bafa406a3e3add4903f3e7a11957d416a78f --- /dev/null +++ b/network/vocoders/__init__.py @@ -0,0 +1,2 @@ +from network.vocoders import hifigan +from network.vocoders import nsf_hifigan diff --git a/network/vocoders/__pycache__/__init__.cpython-38.pyc b/network/vocoders/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34f3ccdc52637b18428ae7335d6e1063f5e12fda Binary files /dev/null and b/network/vocoders/__pycache__/__init__.cpython-38.pyc differ diff --git a/network/vocoders/__pycache__/base_vocoder.cpython-38.pyc b/network/vocoders/__pycache__/base_vocoder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf6210242ee7ca268e80f2c2e55f5c04119ce306 Binary files /dev/null and b/network/vocoders/__pycache__/base_vocoder.cpython-38.pyc differ diff --git a/network/vocoders/__pycache__/hifigan.cpython-38.pyc b/network/vocoders/__pycache__/hifigan.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fdacbba6320cbc032beb7b70c742db8c0c98d3b0 Binary files /dev/null and b/network/vocoders/__pycache__/hifigan.cpython-38.pyc differ diff --git a/network/vocoders/__pycache__/nsf_hifigan.cpython-38.pyc b/network/vocoders/__pycache__/nsf_hifigan.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2260b176a210d5fec3269ea0d3f1b618b5a77753 Binary files /dev/null and b/network/vocoders/__pycache__/nsf_hifigan.cpython-38.pyc differ diff --git a/network/vocoders/__pycache__/pwg.cpython-38.pyc b/network/vocoders/__pycache__/pwg.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..def508d0e8f28d332075ffb6098bfacf79f612fa Binary files /dev/null and b/network/vocoders/__pycache__/pwg.cpython-38.pyc differ diff --git a/network/vocoders/__pycache__/vocoder_utils.cpython-38.pyc b/network/vocoders/__pycache__/vocoder_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5d1324a12266ff142ca0e9a727d5bec5be8ae1f Binary files /dev/null and b/network/vocoders/__pycache__/vocoder_utils.cpython-38.pyc differ diff --git a/network/vocoders/base_vocoder.py b/network/vocoders/base_vocoder.py new file mode 100644 index 0000000000000000000000000000000000000000..fe49a9e4f790ecdc5e76d60a23f96602b59fc48d --- /dev/null +++ b/network/vocoders/base_vocoder.py @@ -0,0 +1,39 @@ +import importlib +VOCODERS = {} + + +def register_vocoder(cls): + VOCODERS[cls.__name__.lower()] = cls + VOCODERS[cls.__name__] = cls + return cls + + +def get_vocoder_cls(hparams): + if hparams['vocoder'] in VOCODERS: + return VOCODERS[hparams['vocoder']] + else: + vocoder_cls = hparams['vocoder'] + pkg = ".".join(vocoder_cls.split(".")[:-1]) + cls_name = vocoder_cls.split(".")[-1] + vocoder_cls = getattr(importlib.import_module(pkg), cls_name) + return vocoder_cls + + +class BaseVocoder: + def spec2wav(self, mel): + """ + + :param mel: [T, 80] + :return: wav: [T'] + """ + + raise NotImplementedError + + @staticmethod + def wav2spec(wav_fn): + """ + + :param wav_fn: str + :return: wav, mel: [T, 80] + """ + raise NotImplementedError diff --git a/network/vocoders/hifigan.py b/network/vocoders/hifigan.py new file mode 100644 index 0000000000000000000000000000000000000000..8838b9316e9c5608dba799381c52276710136d4a --- /dev/null +++ b/network/vocoders/hifigan.py @@ -0,0 +1,83 @@ +import glob +import json +import os +import re + +import librosa +import torch + +import utils +from modules.hifigan.hifigan import HifiGanGenerator +from utils.hparams import hparams, set_hparams +from network.vocoders.base_vocoder import register_vocoder +from network.vocoders.pwg import PWG +from network.vocoders.vocoder_utils import denoise + + +def load_model(config_path, file_path): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + ext = os.path.splitext(file_path)[-1] + if ext == '.pth': + if '.yaml' in config_path: + config = set_hparams(config_path, global_hparams=False) + elif '.json' in config_path: + config = json.load(open(config_path, 'r', encoding='utf-8')) + model = torch.load(file_path, map_location="cpu") + elif ext == '.ckpt': + ckpt_dict = torch.load(file_path, map_location="cpu") + if '.yaml' in config_path: + config = set_hparams(config_path, global_hparams=False) + state = ckpt_dict["state_dict"]["model_gen"] + elif '.json' in config_path: + config = json.load(open(config_path, 'r', encoding='utf-8')) + state = ckpt_dict["generator"] + model = HifiGanGenerator(config) + model.load_state_dict(state, strict=True) + model.remove_weight_norm() + model = model.eval().to(device) + print(f"| Loaded model parameters from {file_path}.") + print(f"| HifiGAN device: {device}.") + return model, config, device + + +total_time = 0 + + +@register_vocoder +class HifiGAN(PWG): + def __init__(self): + base_dir = hparams['vocoder_ckpt'] + config_path = f'{base_dir}/config.yaml' + if os.path.exists(config_path): + file_path = sorted(glob.glob(f'{base_dir}/model_ckpt_steps_*.*'), key= + lambda x: int(re.findall(f'{base_dir}/model_ckpt_steps_(\d+).*', x.replace('\\','/'))[0]))[-1] + print('| load HifiGAN: ', file_path) + self.model, self.config, self.device = load_model(config_path=config_path, file_path=file_path) + else: + config_path = f'{base_dir}/config.json' + ckpt = f'{base_dir}/generator_v1' + if os.path.exists(config_path): + self.model, self.config, self.device = load_model(config_path=config_path, file_path=file_path) + + def spec2wav(self, mel, **kwargs): + device = self.device + with torch.no_grad(): + c = torch.FloatTensor(mel).unsqueeze(0).transpose(2, 1).to(device) + with utils.Timer('hifigan', print_time=hparams['profile_infer']): + f0 = kwargs.get('f0') + if f0 is not None and hparams.get('use_nsf'): + f0 = torch.FloatTensor(f0[None, :]).to(device) + y = self.model(c, f0).view(-1) + else: + y = self.model(c).view(-1) + wav_out = y.cpu().numpy() + if hparams.get('vocoder_denoise_c', 0.0) > 0: + wav_out = denoise(wav_out, v=hparams['vocoder_denoise_c']) + return wav_out + + # @staticmethod + # def wav2spec(wav_fn, **kwargs): + # wav, _ = librosa.core.load(wav_fn, sr=hparams['audio_sample_rate']) + # wav_torch = torch.FloatTensor(wav)[None, :] + # mel = mel_spectrogram(wav_torch, hparams).numpy()[0] + # return wav, mel.T diff --git a/network/vocoders/nsf_hifigan.py b/network/vocoders/nsf_hifigan.py new file mode 100644 index 0000000000000000000000000000000000000000..93975546a7acff64279b3fc84b4edd0a7d292714 --- /dev/null +++ b/network/vocoders/nsf_hifigan.py @@ -0,0 +1,92 @@ +import os +import torch +from modules.nsf_hifigan.models import load_model, Generator +from modules.nsf_hifigan.nvSTFT import load_wav_to_torch, STFT +from utils.hparams import hparams +from network.vocoders.base_vocoder import BaseVocoder, register_vocoder + +@register_vocoder +class NsfHifiGAN(BaseVocoder): + def __init__(self, device=None): + if device is None: + device = 'cuda' if torch.cuda.is_available() else 'cpu' + self.device = device + model_path = hparams['vocoder_ckpt'] + if os.path.exists(model_path): + print('| Load HifiGAN: ', model_path) + self.model, self.h = load_model(model_path, device=self.device) + else: + print('Error: HifiGAN model file is not found!') + + def spec2wav_torch(self, mel, **kwargs): # mel: [B, T, bins] + if self.h.sampling_rate != hparams['audio_sample_rate']: + print('Mismatch parameters: hparams[\'audio_sample_rate\']=',hparams['audio_sample_rate'],'!=',self.h.sampling_rate,'(vocoder)') + if self.h.num_mels != hparams['audio_num_mel_bins']: + print('Mismatch parameters: hparams[\'audio_num_mel_bins\']=',hparams['audio_num_mel_bins'],'!=',self.h.num_mels,'(vocoder)') + if self.h.n_fft != hparams['fft_size']: + print('Mismatch parameters: hparams[\'fft_size\']=',hparams['fft_size'],'!=',self.h.n_fft,'(vocoder)') + if self.h.win_size != hparams['win_size']: + print('Mismatch parameters: hparams[\'win_size\']=',hparams['win_size'],'!=',self.h.win_size,'(vocoder)') + if self.h.hop_size != hparams['hop_size']: + print('Mismatch parameters: hparams[\'hop_size\']=',hparams['hop_size'],'!=',self.h.hop_size,'(vocoder)') + if self.h.fmin != hparams['fmin']: + print('Mismatch parameters: hparams[\'fmin\']=',hparams['fmin'],'!=',self.h.fmin,'(vocoder)') + if self.h.fmax != hparams['fmax']: + print('Mismatch parameters: hparams[\'fmax\']=',hparams['fmax'],'!=',self.h.fmax,'(vocoder)') + with torch.no_grad(): + c = mel.transpose(2, 1) #[B, T, bins] + #log10 to log mel + c = 2.30259 * c + f0 = kwargs.get('f0') #[B, T] + if f0 is not None and hparams.get('use_nsf'): + y = self.model(c, f0).view(-1) + else: + y = self.model(c).view(-1) + return y + + def spec2wav(self, mel, **kwargs): + if self.h.sampling_rate != hparams['audio_sample_rate']: + print('Mismatch parameters: hparams[\'audio_sample_rate\']=',hparams['audio_sample_rate'],'!=',self.h.sampling_rate,'(vocoder)') + if self.h.num_mels != hparams['audio_num_mel_bins']: + print('Mismatch parameters: hparams[\'audio_num_mel_bins\']=',hparams['audio_num_mel_bins'],'!=',self.h.num_mels,'(vocoder)') + if self.h.n_fft != hparams['fft_size']: + print('Mismatch parameters: hparams[\'fft_size\']=',hparams['fft_size'],'!=',self.h.n_fft,'(vocoder)') + if self.h.win_size != hparams['win_size']: + print('Mismatch parameters: hparams[\'win_size\']=',hparams['win_size'],'!=',self.h.win_size,'(vocoder)') + if self.h.hop_size != hparams['hop_size']: + print('Mismatch parameters: hparams[\'hop_size\']=',hparams['hop_size'],'!=',self.h.hop_size,'(vocoder)') + if self.h.fmin != hparams['fmin']: + print('Mismatch parameters: hparams[\'fmin\']=',hparams['fmin'],'!=',self.h.fmin,'(vocoder)') + if self.h.fmax != hparams['fmax']: + print('Mismatch parameters: hparams[\'fmax\']=',hparams['fmax'],'!=',self.h.fmax,'(vocoder)') + with torch.no_grad(): + c = torch.FloatTensor(mel).unsqueeze(0).transpose(2, 1).to(self.device) + #log10 to log mel + c = 2.30259 * c + f0 = kwargs.get('f0') + if f0 is not None and hparams.get('use_nsf'): + f0 = torch.FloatTensor(f0[None, :]).to(self.device) + y = self.model(c, f0).view(-1) + else: + y = self.model(c).view(-1) + wav_out = y.cpu().numpy() + return wav_out + + @staticmethod + def wav2spec(inp_path, device=None): + if device is None: + device = 'cuda' if torch.cuda.is_available() else 'cpu' + sampling_rate = hparams['audio_sample_rate'] + num_mels = hparams['audio_num_mel_bins'] + n_fft = hparams['fft_size'] + win_size =hparams['win_size'] + hop_size = hparams['hop_size'] + fmin = hparams['fmin'] + fmax = hparams['fmax'] + stft = STFT(sampling_rate, num_mels, n_fft, win_size, hop_size, fmin, fmax) + with torch.no_grad(): + wav_torch, _ = load_wav_to_torch(inp_path, target_sr=stft.target_sr) + mel_torch = stft.get_mel(wav_torch.unsqueeze(0).to(device)).squeeze(0).T + #log mel to log10 mel + mel_torch = 0.434294 * mel_torch + return wav_torch.cpu().numpy(), mel_torch.cpu().numpy() \ No newline at end of file diff --git a/network/vocoders/pwg.py b/network/vocoders/pwg.py new file mode 100644 index 0000000000000000000000000000000000000000..cf2de16f271b66c308c604e52e9ab89242d5663e --- /dev/null +++ b/network/vocoders/pwg.py @@ -0,0 +1,137 @@ +import glob +import re +import librosa +import torch +import yaml +from sklearn.preprocessing import StandardScaler +from torch import nn +from modules.parallel_wavegan.models import ParallelWaveGANGenerator +from modules.parallel_wavegan.utils import read_hdf5 +from utils.hparams import hparams +from utils.pitch_utils import f0_to_coarse +from network.vocoders.base_vocoder import BaseVocoder, register_vocoder +import numpy as np + + +def load_pwg_model(config_path, checkpoint_path, stats_path): + # load config + with open(config_path, encoding='utf-8') as f: + config = yaml.load(f, Loader=yaml.Loader) + + # setup + if torch.cuda.is_available(): + device = torch.device("cuda") + else: + device = torch.device("cpu") + model = ParallelWaveGANGenerator(**config["generator_params"]) + + ckpt_dict = torch.load(checkpoint_path, map_location="cpu") + if 'state_dict' not in ckpt_dict: # official vocoder + model.load_state_dict(torch.load(checkpoint_path, map_location="cpu")["model"]["generator"]) + scaler = StandardScaler() + if config["format"] == "hdf5": + scaler.mean_ = read_hdf5(stats_path, "mean") + scaler.scale_ = read_hdf5(stats_path, "scale") + elif config["format"] == "npy": + scaler.mean_ = np.load(stats_path)[0] + scaler.scale_ = np.load(stats_path)[1] + else: + raise ValueError("support only hdf5 or npy format.") + else: # custom PWG vocoder + fake_task = nn.Module() + fake_task.model_gen = model + fake_task.load_state_dict(torch.load(checkpoint_path, map_location="cpu")["state_dict"], strict=False) + scaler = None + + model.remove_weight_norm() + model = model.eval().to(device) + print(f"| Loaded model parameters from {checkpoint_path}.") + print(f"| PWG device: {device}.") + return model, scaler, config, device + + +@register_vocoder +class PWG(BaseVocoder): + def __init__(self): + if hparams['vocoder_ckpt'] == '': # load LJSpeech PWG pretrained model + base_dir = 'wavegan_pretrained' + ckpts = glob.glob(f'{base_dir}/checkpoint-*steps.pkl') + ckpt = sorted(ckpts, key= + lambda x: int(re.findall(f'{base_dir}/checkpoint-(\d+)steps.pkl', x)[0]))[-1] + config_path = f'{base_dir}/config.yaml' + print('| load PWG: ', ckpt) + self.model, self.scaler, self.config, self.device = load_pwg_model( + config_path=config_path, + checkpoint_path=ckpt, + stats_path=f'{base_dir}/stats.h5', + ) + else: + base_dir = hparams['vocoder_ckpt'] + print(base_dir) + config_path = f'{base_dir}/config.yaml' + ckpt = sorted(glob.glob(f'{base_dir}/model_ckpt_steps_*.ckpt'), key= + lambda x: int(re.findall(f'{base_dir}/model_ckpt_steps_(\d+).ckpt', x)[0]))[-1] + print('| load PWG: ', ckpt) + self.scaler = None + self.model, _, self.config, self.device = load_pwg_model( + config_path=config_path, + checkpoint_path=ckpt, + stats_path=f'{base_dir}/stats.h5', + ) + + def spec2wav(self, mel, **kwargs): + # start generation + config = self.config + device = self.device + pad_size = (config["generator_params"]["aux_context_window"], + config["generator_params"]["aux_context_window"]) + c = mel + if self.scaler is not None: + c = self.scaler.transform(c) + + with torch.no_grad(): + z = torch.randn(1, 1, c.shape[0] * config["hop_size"]).to(device) + c = np.pad(c, (pad_size, (0, 0)), "edge") + c = torch.FloatTensor(c).unsqueeze(0).transpose(2, 1).to(device) + p = kwargs.get('f0') + if p is not None: + p = f0_to_coarse(p) + p = np.pad(p, (pad_size,), "edge") + p = torch.LongTensor(p[None, :]).to(device) + y = self.model(z, c, p).view(-1) + wav_out = y.cpu().numpy() + return wav_out + + @staticmethod + def wav2spec(wav_fn, return_linear=False): + from preprocessing.data_gen_utils import process_utterance + res = process_utterance( + wav_fn, fft_size=hparams['fft_size'], + hop_size=hparams['hop_size'], + win_length=hparams['win_size'], + num_mels=hparams['audio_num_mel_bins'], + fmin=hparams['fmin'], + fmax=hparams['fmax'], + sample_rate=hparams['audio_sample_rate'], + loud_norm=hparams['loud_norm'], + min_level_db=hparams['min_level_db'], + return_linear=return_linear, vocoder='pwg', eps=float(hparams.get('wav2spec_eps', 1e-10))) + if return_linear: + return res[0], res[1].T, res[2].T # [T, 80], [T, n_fft] + else: + return res[0], res[1].T + + @staticmethod + def wav2mfcc(wav_fn): + fft_size = hparams['fft_size'] + hop_size = hparams['hop_size'] + win_length = hparams['win_size'] + sample_rate = hparams['audio_sample_rate'] + wav, _ = librosa.core.load(wav_fn, sr=sample_rate) + mfcc = librosa.feature.mfcc(y=wav, sr=sample_rate, n_mfcc=13, + n_fft=fft_size, hop_length=hop_size, + win_length=win_length, pad_mode="constant", power=1.0) + mfcc_delta = librosa.feature.delta(mfcc, order=1) + mfcc_delta_delta = librosa.feature.delta(mfcc, order=2) + mfcc = np.concatenate([mfcc, mfcc_delta, mfcc_delta_delta]).T + return mfcc diff --git a/network/vocoders/vocoder_utils.py b/network/vocoders/vocoder_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..db5d5ca1765928e4b047db04435a8a39b52592ca --- /dev/null +++ b/network/vocoders/vocoder_utils.py @@ -0,0 +1,15 @@ +import librosa + +from utils.hparams import hparams +import numpy as np + + +def denoise(wav, v=0.1): + spec = librosa.stft(y=wav, n_fft=hparams['fft_size'], hop_length=hparams['hop_size'], + win_length=hparams['win_size'], pad_mode='constant') + spec_m = np.abs(spec) + spec_m = np.clip(spec_m - v, a_min=0, a_max=None) + spec_a = np.angle(spec) + + return librosa.istft(spec_m * np.exp(1j * spec_a), hop_length=hparams['hop_size'], + win_length=hparams['win_size']) diff --git a/preprocessing/SVCpre.py b/preprocessing/SVCpre.py new file mode 100644 index 0000000000000000000000000000000000000000..2faa0737fb5d61f6bdb4ac1fb959711c50311d0e --- /dev/null +++ b/preprocessing/SVCpre.py @@ -0,0 +1,63 @@ +''' + + item: one piece of data + item_name: data id + wavfn: wave file path + txt: lyrics + ph: phoneme + tgfn: text grid file path (unused) + spk: dataset name + wdb: word boundary + ph_durs: phoneme durations + midi: pitch as midi notes + midi_dur: midi duration + is_slur: keep singing upon note changes +''' + + +from copy import deepcopy + +import logging + +from preprocessing.process_pipeline import File2Batch +from utils.hparams import hparams +from preprocessing.base_binarizer import BaseBinarizer + +SVCSINGING_ITEM_ATTRIBUTES = ['wav_fn', 'spk_id'] +class SVCBinarizer(BaseBinarizer): + def __init__(self, item_attributes=SVCSINGING_ITEM_ATTRIBUTES): + super().__init__(item_attributes) + print('spkers: ', set(item['spk_id'] for item in self.items.values())) + self.item_names = sorted(list(self.items.keys())) + self._train_item_names, self._test_item_names = self.split_train_test_set(self.item_names) + # self._valid_item_names=[] + + def split_train_test_set(self, item_names): + item_names = deepcopy(item_names) + if hparams['choose_test_manually']: + test_item_names = [x for x in item_names if any([x.startswith(ts) for ts in hparams['test_prefixes']])] + else: + test_item_names = item_names[-5:] + train_item_names = [x for x in item_names if x not in set(test_item_names)] + logging.info("train {}".format(len(train_item_names))) + logging.info("test {}".format(len(test_item_names))) + return train_item_names, test_item_names + + @property + def train_item_names(self): + return self._train_item_names + + @property + def valid_item_names(self): + return self._test_item_names + + @property + def test_item_names(self): + return self._test_item_names + + def load_meta_data(self): + self.items = File2Batch.file2temporary_dict() + + def _phone_encoder(self): + from preprocessing.hubertinfer import Hubertencoder + return Hubertencoder(hparams['hubert_path']) \ No newline at end of file diff --git a/preprocessing/__pycache__/SVCpre.cpython-38.pyc b/preprocessing/__pycache__/SVCpre.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..19cea41fd198bdd01d2234f1c6ad698aaa29ec7f Binary files /dev/null and b/preprocessing/__pycache__/SVCpre.cpython-38.pyc differ diff --git a/preprocessing/__pycache__/base_binarizer.cpython-38.pyc b/preprocessing/__pycache__/base_binarizer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6de402f6ee5607ab3388fb88f5ea13271ce4cbb Binary files /dev/null and b/preprocessing/__pycache__/base_binarizer.cpython-38.pyc differ diff --git a/preprocessing/__pycache__/data_gen_utils.cpython-38.pyc b/preprocessing/__pycache__/data_gen_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..43e5877963f70434da67019bfd6eb3be1783592d Binary files /dev/null and b/preprocessing/__pycache__/data_gen_utils.cpython-38.pyc differ diff --git a/preprocessing/__pycache__/hubertinfer.cpython-38.pyc b/preprocessing/__pycache__/hubertinfer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0af487a6e5a0b61b9a9ac2954201302891054d4e Binary files /dev/null and b/preprocessing/__pycache__/hubertinfer.cpython-38.pyc differ diff --git a/preprocessing/__pycache__/process_pipeline.cpython-38.pyc b/preprocessing/__pycache__/process_pipeline.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac59fa750d10bcaeda1676bdda471a427a6242a8 Binary files /dev/null and b/preprocessing/__pycache__/process_pipeline.cpython-38.pyc differ diff --git a/preprocessing/base_binarizer.py b/preprocessing/base_binarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..f070584c60c091c3fa4bc1da377733698b0165b6 --- /dev/null +++ b/preprocessing/base_binarizer.py @@ -0,0 +1,237 @@ +import os +from webbrowser import get +os.environ["OMP_NUM_THREADS"] = "1" +import yaml +from utils.multiprocess_utils import chunked_multiprocess_run +import random +import json +# from resemblyzer import VoiceEncoder +from tqdm import tqdm +from preprocessing.data_gen_utils import get_mel2ph, get_pitch_parselmouth, build_phone_encoder,get_pitch_crepe +from utils.hparams import set_hparams, hparams +import numpy as np +from utils.indexed_datasets import IndexedDatasetBuilder + + +class BinarizationError(Exception): + pass + +BASE_ITEM_ATTRIBUTES = ['txt', 'ph', 'wav_fn', 'tg_fn', 'spk_id'] + +class BaseBinarizer: + ''' + Base class for data processing. + 1. *process* and *process_data_split*: + process entire data, generate the train-test split (support parallel processing); + 2. *process_item*: + process singe piece of data; + 3. *get_pitch*: + infer the pitch using some algorithm; + 4. *get_align*: + get the alignment using 'mel2ph' format (see https://arxiv.org/abs/1905.09263). + 5. phoneme encoder, voice encoder, etc. + + Subclasses should define: + 1. *load_metadata*: + how to read multiple datasets from files; + 2. *train_item_names*, *valid_item_names*, *test_item_names*: + how to split the dataset; + 3. load_ph_set: + the phoneme set. + ''' + def __init__(self, item_attributes=BASE_ITEM_ATTRIBUTES): + self.binarization_args = hparams['binarization_args'] + #self.pre_align_args = hparams['pre_align_args'] + + self.items = {} + # every item in self.items has some attributes + self.item_attributes = item_attributes + + self.load_meta_data() + # check program correctness 检查itemdict的key只能在给定的列表中取值 + assert all([attr in self.item_attributes for attr in list(self.items.values())[0].keys()]) + self.item_names = sorted(list(self.items.keys())) + + if self.binarization_args['shuffle']: + random.seed(1234) + random.shuffle(self.item_names) + + # set default get_pitch algorithm + if hparams['use_crepe']: + self.get_pitch_algorithm = get_pitch_crepe + else: + self.get_pitch_algorithm = get_pitch_parselmouth + + def load_meta_data(self): + raise NotImplementedError + + @property + def train_item_names(self): + raise NotImplementedError + + @property + def valid_item_names(self): + raise NotImplementedError + + @property + def test_item_names(self): + raise NotImplementedError + + def build_spk_map(self): + spk_map = set() + for item_name in self.item_names: + spk_name = self.items[item_name]['spk_id'] + spk_map.add(spk_name) + spk_map = {x: i for i, x in enumerate(sorted(list(spk_map)))} + assert len(spk_map) == 0 or len(spk_map) <= hparams['num_spk'], len(spk_map) + return spk_map + + def item_name2spk_id(self, item_name): + return self.spk_map[self.items[item_name]['spk_id']] + + def _phone_encoder(self): + ''' + use hubert encoder + ''' + raise NotImplementedError + ''' + create 'phone_set.json' file if it doesn't exist + ''' + ph_set_fn = f"{hparams['binary_data_dir']}/phone_set.json" + ph_set = [] + if hparams['reset_phone_dict'] or not os.path.exists(ph_set_fn): + self.load_ph_set(ph_set) + ph_set = sorted(set(ph_set)) + json.dump(ph_set, open(ph_set_fn, 'w', encoding='utf-8')) + print("| Build phone set: ", ph_set) + else: + ph_set = json.load(open(ph_set_fn, 'r', encoding='utf-8')) + print("| Load phone set: ", ph_set) + return build_phone_encoder(hparams['binary_data_dir']) + + + def load_ph_set(self, ph_set): + raise NotImplementedError + + def meta_data_iterator(self, prefix): + if prefix == 'valid': + item_names = self.valid_item_names + elif prefix == 'test': + item_names = self.test_item_names + else: + item_names = self.train_item_names + for item_name in item_names: + meta_data = self.items[item_name] + yield item_name, meta_data + + def process(self): + os.makedirs(hparams['binary_data_dir'], exist_ok=True) + self.spk_map = self.build_spk_map() + print("| spk_map: ", self.spk_map) + spk_map_fn = f"{hparams['binary_data_dir']}/spk_map.json" + json.dump(self.spk_map, open(spk_map_fn, 'w', encoding='utf-8')) + + self.phone_encoder =self._phone_encoder() + self.process_data_split('valid') + self.process_data_split('test') + self.process_data_split('train') + + def process_data_split(self, prefix): + data_dir = hparams['binary_data_dir'] + args = [] + builder = IndexedDatasetBuilder(f'{data_dir}/{prefix}') + lengths = [] + f0s = [] + total_sec = 0 + # if self.binarization_args['with_spk_embed']: + # voice_encoder = VoiceEncoder().cuda() + + for item_name, meta_data in self.meta_data_iterator(prefix): + args.append([item_name, meta_data, self.binarization_args]) + spec_min=[] + spec_max=[] + # code for single cpu processing + for i in tqdm(reversed(range(len(args))), total=len(args)): + a = args[i] + item = self.process_item(*a) + if item is None: + continue + spec_min.append(item['spec_min']) + spec_max.append(item['spec_max']) + # item['spk_embe'] = voice_encoder.embed_utterance(item['wav']) \ + # if self.binardization_args['with_spk_embed'] else None + if not self.binarization_args['with_wav'] and 'wav' in item: + if hparams['debug']: + print("del wav") + del item['wav'] + if(hparams['debug']): + print(item) + builder.add_item(item) + lengths.append(item['len']) + total_sec += item['sec'] + # if item.get('f0') is not None: + # f0s.append(item['f0']) + if prefix=='train': + spec_max=np.max(spec_max,0) + spec_min=np.min(spec_min,0) + print(spec_max.shape) + with open(hparams['config_path'], encoding='utf-8') as f: + _hparams=yaml.safe_load(f) + _hparams['spec_max']=spec_max.tolist() + _hparams['spec_min']=spec_min.tolist() + with open(hparams['config_path'], 'w', encoding='utf-8') as f: + yaml.safe_dump(_hparams,f) + builder.finalize() + np.save(f'{data_dir}/{prefix}_lengths.npy', lengths) + if len(f0s) > 0: + f0s = np.concatenate(f0s, 0) + f0s = f0s[f0s != 0] + np.save(f'{data_dir}/{prefix}_f0s_mean_std.npy', [np.mean(f0s).item(), np.std(f0s).item()]) + print(f"| {prefix} total duration: {total_sec:.3f}s") + + def process_item(self, item_name, meta_data, binarization_args): + from preprocessing.process_pipeline import File2Batch + return File2Batch.temporary_dict2processed_input(item_name, meta_data, self.phone_encoder, binarization_args) + + def get_align(self, meta_data, mel, phone_encoded, res): + raise NotImplementedError + + def get_align_from_textgrid(self, meta_data, mel, phone_encoded, res): + ''' + NOTE: this part of script is *isolated* from other scripts, which means + it may not be compatible with the current version. + ''' + return + tg_fn, ph = meta_data['tg_fn'], meta_data['ph'] + if tg_fn is not None and os.path.exists(tg_fn): + mel2ph, dur = get_mel2ph(tg_fn, ph, mel, hparams) + else: + raise BinarizationError(f"Align not found") + if mel2ph.max() - 1 >= len(phone_encoded): + raise BinarizationError( + f"Align does not match: mel2ph.max() - 1: {mel2ph.max() - 1}, len(phone_encoded): {len(phone_encoded)}") + res['mel2ph'] = mel2ph + res['dur'] = dur + + def get_f0cwt(self, f0, res): + ''' + NOTE: this part of script is *isolated* from other scripts, which means + it may not be compatible with the current version. + ''' + return + from utils.cwt import get_cont_lf0, get_lf0_cwt + uv, cont_lf0_lpf = get_cont_lf0(f0) + logf0s_mean_org, logf0s_std_org = np.mean(cont_lf0_lpf), np.std(cont_lf0_lpf) + cont_lf0_lpf_norm = (cont_lf0_lpf - logf0s_mean_org) / logf0s_std_org + Wavelet_lf0, scales = get_lf0_cwt(cont_lf0_lpf_norm) + if np.any(np.isnan(Wavelet_lf0)): + raise BinarizationError("NaN CWT") + res['cwt_spec'] = Wavelet_lf0 + res['cwt_scales'] = scales + res['f0_mean'] = logf0s_mean_org + res['f0_std'] = logf0s_std_org + + +if __name__ == "__main__": + set_hparams() + BaseBinarizer().process() diff --git a/preprocessing/binarize.py b/preprocessing/binarize.py new file mode 100644 index 0000000000000000000000000000000000000000..df3bff078132d5c1e031af449855fe9c2ba998a1 --- /dev/null +++ b/preprocessing/binarize.py @@ -0,0 +1,20 @@ +import os + +os.environ["OMP_NUM_THREADS"] = "1" + +import importlib +from utils.hparams import set_hparams, hparams + + +def binarize(): + binarizer_cls = hparams.get("binarizer_cls", 'basics.base_binarizer.BaseBinarizer') + pkg = ".".join(binarizer_cls.split(".")[:-1]) + cls_name = binarizer_cls.split(".")[-1] + binarizer_cls = getattr(importlib.import_module(pkg), cls_name) + print("| Binarizer: ", binarizer_cls) + binarizer_cls().process() + + +if __name__ == '__main__': + set_hparams() + binarize() diff --git a/preprocessing/data_gen_utils.py b/preprocessing/data_gen_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..68b9f981144329698da77a463222a73618e6007b --- /dev/null +++ b/preprocessing/data_gen_utils.py @@ -0,0 +1,426 @@ +from io import BytesIO +import json +import os +import re +import struct +import warnings +from collections import OrderedDict + +import librosa +import numpy as np +import parselmouth +import pyloudnorm as pyln +import resampy +import torch +import torchcrepe +import webrtcvad +from scipy.ndimage.morphology import binary_dilation +from skimage.transform import resize +import pyworld as world + +from utils import audio +from utils.pitch_utils import f0_to_coarse +from utils.text_encoder import TokenTextEncoder + +warnings.filterwarnings("ignore") +PUNCS = '!,.?;:' + +int16_max = (2 ** 15) - 1 + + +def trim_long_silences(path, sr=None, return_raw_wav=False, norm=True, vad_max_silence_length=12): + """ + Ensures that segments without voice in the waveform remain no longer than a + threshold determined by the VAD parameters in params.py. + :param wav: the raw waveform as a numpy array of floats + :param vad_max_silence_length: Maximum number of consecutive silent frames a segment can have. + :return: the same waveform with silences trimmed away (length <= original wav length) + """ + + ## Voice Activation Detection + # Window size of the VAD. Must be either 10, 20 or 30 milliseconds. + # This sets the granularity of the VAD. Should not need to be changed. + sampling_rate = 16000 + wav_raw, sr = librosa.core.load(path, sr=sr) + + if norm: + meter = pyln.Meter(sr) # create BS.1770 meter + loudness = meter.integrated_loudness(wav_raw) + wav_raw = pyln.normalize.loudness(wav_raw, loudness, -20.0) + if np.abs(wav_raw).max() > 1.0: + wav_raw = wav_raw / np.abs(wav_raw).max() + + wav = librosa.resample(wav_raw, sr, sampling_rate, res_type='kaiser_best') + + vad_window_length = 30 # In milliseconds + # Number of frames to average together when performing the moving average smoothing. + # The larger this value, the larger the VAD variations must be to not get smoothed out. + vad_moving_average_width = 8 + + # Compute the voice detection window size + samples_per_window = (vad_window_length * sampling_rate) // 1000 + + # Trim the end of the audio to have a multiple of the window size + wav = wav[:len(wav) - (len(wav) % samples_per_window)] + + # Convert the float waveform to 16-bit mono PCM + pcm_wave = struct.pack("%dh" % len(wav), *(np.round(wav * int16_max)).astype(np.int16)) + + # Perform voice activation detection + voice_flags = [] + vad = webrtcvad.Vad(mode=3) + for window_start in range(0, len(wav), samples_per_window): + window_end = window_start + samples_per_window + voice_flags.append(vad.is_speech(pcm_wave[window_start * 2:window_end * 2], + sample_rate=sampling_rate)) + voice_flags = np.array(voice_flags) + + # Smooth the voice detection with a moving average + def moving_average(array, width): + array_padded = np.concatenate((np.zeros((width - 1) // 2), array, np.zeros(width // 2))) + ret = np.cumsum(array_padded, dtype=float) + ret[width:] = ret[width:] - ret[:-width] + return ret[width - 1:] / width + + audio_mask = moving_average(voice_flags, vad_moving_average_width) + audio_mask = np.round(audio_mask).astype(np.bool) + + # Dilate the voiced regions + audio_mask = binary_dilation(audio_mask, np.ones(vad_max_silence_length + 1)) + audio_mask = np.repeat(audio_mask, samples_per_window) + audio_mask = resize(audio_mask, (len(wav_raw),)) > 0 + if return_raw_wav: + return wav_raw, audio_mask, sr + return wav_raw[audio_mask], audio_mask, sr + + +def process_utterance(wav_path, + fft_size=1024, + hop_size=256, + win_length=1024, + window="hann", + num_mels=80, + fmin=80, + fmax=7600, + eps=1e-6, + sample_rate=22050, + loud_norm=False, + min_level_db=-100, + return_linear=False, + trim_long_sil=False, vocoder='pwg'): + if isinstance(wav_path, str) or isinstance(wav_path, BytesIO): + if trim_long_sil: + wav, _, _ = trim_long_silences(wav_path, sample_rate) + else: + wav, _ = librosa.core.load(wav_path, sr=sample_rate) + else: + wav = wav_path + if loud_norm: + meter = pyln.Meter(sample_rate) # create BS.1770 meter + loudness = meter.integrated_loudness(wav) + wav = pyln.normalize.loudness(wav, loudness, -22.0) + if np.abs(wav).max() > 1: + wav = wav / np.abs(wav).max() + + # get amplitude spectrogram + x_stft = librosa.stft(wav, n_fft=fft_size, hop_length=hop_size, + win_length=win_length, window=window, pad_mode="constant") + spc = np.abs(x_stft) # (n_bins, T) + + # get mel basis + fmin = 0 if fmin == -1 else fmin + fmax = sample_rate / 2 if fmax == -1 else fmax + mel_basis = librosa.filters.mel(sample_rate, fft_size, num_mels, fmin, fmax) + mel = mel_basis @ spc + + if vocoder == 'pwg': + mel = np.log10(np.maximum(eps, mel)) # (n_mel_bins, T) + else: + assert False, f'"{vocoder}" is not in ["pwg"].' + + l_pad, r_pad = audio.librosa_pad_lr(wav, fft_size, hop_size, 1) + wav = np.pad(wav, (l_pad, r_pad), mode='constant', constant_values=0.0) + wav = wav[:mel.shape[1] * hop_size] + + if not return_linear: + return wav, mel + else: + spc = audio.amp_to_db(spc) + spc = audio.normalize(spc, {'min_level_db': min_level_db}) + return wav, mel, spc + + +def get_pitch_parselmouth(wav_data, mel, hparams): + """ + + :param wav_data: [T] + :param mel: [T, 80] + :param hparams: + :return: + """ + # time_step = hparams['hop_size'] / hparams['audio_sample_rate'] + # f0_min = hparams['f0_min'] + # f0_max = hparams['f0_max'] + + # if hparams['hop_size'] == 128: + # pad_size = 4 + # elif hparams['hop_size'] == 256: + # pad_size = 2 + # else: + # assert False + + # f0 = parselmouth.Sound(wav_data, hparams['audio_sample_rate']).to_pitch_ac( + # time_step=time_step, voicing_threshold=0.6, + # pitch_floor=f0_min, pitch_ceiling=f0_max).selected_array['frequency'] + # lpad = pad_size * 2 + # rpad = len(mel) - len(f0) - lpad + # f0 = np.pad(f0, [[lpad, rpad]], mode='constant') + # # mel and f0 are extracted by 2 different libraries. we should force them to have the same length. + # # Attention: we find that new version of some libraries could cause ``rpad'' to be a negetive value... + # # Just to be sure, we recommend users to set up the same environments as them in requirements_auto.txt (by Anaconda) + # delta_l = len(mel) - len(f0) + # assert np.abs(delta_l) <= 8 + # if delta_l > 0: + # f0 = np.concatenate([f0, [f0[-1]] * delta_l], 0) + # f0 = f0[:len(mel)] + # pad_size=(int(len(wav_data) // hparams['hop_size']) - len(f0) + 1) // 2 + # f0 = np.pad(f0,[[pad_size,len(mel) - len(f0) - pad_size]], mode='constant') + # pitch_coarse = f0_to_coarse(f0, hparams) + # return f0, pitch_coarse + + # Bye bye Parselmouth ! + return get_pitch_world(wav_data, mel, hparams) + +def get_pitch_world(wav_data, mel, hparams): + """ + + :param wav_data: [T] + :param mel: [T, 80] + :param hparams: + :return: + """ + time_step = 1000 * hparams['hop_size'] / hparams['audio_sample_rate'] + f0_min = hparams['f0_min'] + f0_max = hparams['f0_max'] + + # Here's to hoping it uses numpy stuff ! + f0, _ = world.harvest(wav_data.astype(np.double), hparams['audio_sample_rate'], f0_min, f0_max, time_step) + + # Change padding + len_diff = len(mel) - len(f0) + if len_diff > 0: + pad_len = (len_diff + 1) // 2 + f0 = np.pad(f0, [[pad_len, len_diff - pad_len]]) + else: + pad_len = (1 - len_diff) // 2 + rpad = pad_len + len_diff + if rpad != 0: + f0 = f0[pad_len:rpad] + f0 = f0[pad_len:] + pitch_coarse = f0_to_coarse(f0, hparams) + return f0, pitch_coarse + + +def get_pitch_crepe(wav_data, mel, hparams, threshold=0.05): + # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + device = torch.device("cuda") + # crepe只支持16khz采样率,需要重采样 + wav16k = resampy.resample(wav_data, hparams['audio_sample_rate'], 16000) + wav16k_torch = torch.FloatTensor(wav16k).unsqueeze(0).to(device) + + # 频率范围 + f0_min = hparams['f0_min'] + f0_max = hparams['f0_max'] + + # 重采样后按照hopsize=80,也就是5ms一帧分析f0 + f0, pd = torchcrepe.predict(wav16k_torch, 16000, 80, f0_min, f0_max, pad=True, model='full', batch_size=1024, + device=device, return_periodicity=True) + + # 滤波,去掉静音,设置uv阈值,参考原仓库readme + pd = torchcrepe.filter.median(pd, 3) + pd = torchcrepe.threshold.Silence(-60.)(pd, wav16k_torch, 16000, 80) + f0 = torchcrepe.threshold.At(threshold)(f0, pd) + f0 = torchcrepe.filter.mean(f0, 3) + + # 将nan频率(uv部分)转换为0频率 + f0 = torch.where(torch.isnan(f0), torch.full_like(f0, 0), f0) + + ''' + np.savetxt('问棋-crepe.csv',np.array([0.005*np.arange(len(f0[0])),f0[0].cpu().numpy()]).transpose(),delimiter=',') + ''' + + # 去掉0频率,并线性插值 + nzindex = torch.nonzero(f0[0]).squeeze() + f0 = torch.index_select(f0[0], dim=0, index=nzindex).cpu().numpy() + time_org = 0.005 * nzindex.cpu().numpy() + time_frame = np.arange(len(mel)) * hparams['hop_size'] / hparams['audio_sample_rate'] + if f0.shape[0] == 0: + f0 = torch.FloatTensor(time_frame.shape[0]).fill_(0) + print('f0 all zero!') + else: + f0 = np.interp(time_frame, time_org, f0, left=f0[0], right=f0[-1]) + pitch_coarse = f0_to_coarse(f0, hparams) + return f0, pitch_coarse + + +def remove_empty_lines(text): + """remove empty lines""" + assert (len(text) > 0) + assert (isinstance(text, list)) + text = [t.strip() for t in text] + if "" in text: + text.remove("") + return text + + +class TextGrid(object): + def __init__(self, text): + text = remove_empty_lines(text) + self.text = text + self.line_count = 0 + self._get_type() + self._get_time_intval() + self._get_size() + self.tier_list = [] + self._get_item_list() + + def _extract_pattern(self, pattern, inc): + """ + Parameters + ---------- + pattern : regex to extract pattern + inc : increment of line count after extraction + Returns + ------- + group : extracted info + """ + try: + group = re.match(pattern, self.text[self.line_count]).group(1) + self.line_count += inc + except AttributeError: + raise ValueError("File format error at line %d:%s" % (self.line_count, self.text[self.line_count])) + return group + + def _get_type(self): + self.file_type = self._extract_pattern(r"File type = \"(.*)\"", 2) + + def _get_time_intval(self): + self.xmin = self._extract_pattern(r"xmin = (.*)", 1) + self.xmax = self._extract_pattern(r"xmax = (.*)", 2) + + def _get_size(self): + self.size = int(self._extract_pattern(r"size = (.*)", 2)) + + def _get_item_list(self): + """Only supports IntervalTier currently""" + for itemIdx in range(1, self.size + 1): + tier = OrderedDict() + item_list = [] + tier_idx = self._extract_pattern(r"item \[(.*)\]:", 1) + tier_class = self._extract_pattern(r"class = \"(.*)\"", 1) + if tier_class != "IntervalTier": + raise NotImplementedError("Only IntervalTier class is supported currently") + tier_name = self._extract_pattern(r"name = \"(.*)\"", 1) + tier_xmin = self._extract_pattern(r"xmin = (.*)", 1) + tier_xmax = self._extract_pattern(r"xmax = (.*)", 1) + tier_size = self._extract_pattern(r"intervals: size = (.*)", 1) + for i in range(int(tier_size)): + item = OrderedDict() + item["idx"] = self._extract_pattern(r"intervals \[(.*)\]", 1) + item["xmin"] = self._extract_pattern(r"xmin = (.*)", 1) + item["xmax"] = self._extract_pattern(r"xmax = (.*)", 1) + item["text"] = self._extract_pattern(r"text = \"(.*)\"", 1) + item_list.append(item) + tier["idx"] = tier_idx + tier["class"] = tier_class + tier["name"] = tier_name + tier["xmin"] = tier_xmin + tier["xmax"] = tier_xmax + tier["size"] = tier_size + tier["items"] = item_list + self.tier_list.append(tier) + + def toJson(self): + _json = OrderedDict() + _json["file_type"] = self.file_type + _json["xmin"] = self.xmin + _json["xmax"] = self.xmax + _json["size"] = self.size + _json["tiers"] = self.tier_list + return json.dumps(_json, ensure_ascii=False, indent=2) + + +def get_mel2ph(tg_fn, ph, mel, hparams): + ph_list = ph.split(" ") + with open(tg_fn, "r", encoding='utf-8') as f: + tg = f.readlines() + tg = remove_empty_lines(tg) + tg = TextGrid(tg) + tg = json.loads(tg.toJson()) + split = np.ones(len(ph_list) + 1, np.float) * -1 + tg_idx = 0 + ph_idx = 0 + tg_align = [x for x in tg['tiers'][-1]['items']] + tg_align_ = [] + for x in tg_align: + x['xmin'] = float(x['xmin']) + x['xmax'] = float(x['xmax']) + if x['text'] in ['sil', 'sp', '', 'SIL', 'PUNC']: + x['text'] = '' + if len(tg_align_) > 0 and tg_align_[-1]['text'] == '': + tg_align_[-1]['xmax'] = x['xmax'] + continue + tg_align_.append(x) + tg_align = tg_align_ + tg_len = len([x for x in tg_align if x['text'] != '']) + ph_len = len([x for x in ph_list if not is_sil_phoneme(x)]) + assert tg_len == ph_len, (tg_len, ph_len, tg_align, ph_list, tg_fn) + while tg_idx < len(tg_align) or ph_idx < len(ph_list): + if tg_idx == len(tg_align) and is_sil_phoneme(ph_list[ph_idx]): + split[ph_idx] = 1e8 + ph_idx += 1 + continue + x = tg_align[tg_idx] + if x['text'] == '' and ph_idx == len(ph_list): + tg_idx += 1 + continue + assert ph_idx < len(ph_list), (tg_len, ph_len, tg_align, ph_list, tg_fn) + ph = ph_list[ph_idx] + if x['text'] == '' and not is_sil_phoneme(ph): + assert False, (ph_list, tg_align) + if x['text'] != '' and is_sil_phoneme(ph): + ph_idx += 1 + else: + assert (x['text'] == '' and is_sil_phoneme(ph)) \ + or x['text'].lower() == ph.lower() \ + or x['text'].lower() == 'sil', (x['text'], ph) + split[ph_idx] = x['xmin'] + if ph_idx > 0 and split[ph_idx - 1] == -1 and is_sil_phoneme(ph_list[ph_idx - 1]): + split[ph_idx - 1] = split[ph_idx] + ph_idx += 1 + tg_idx += 1 + assert tg_idx == len(tg_align), (tg_idx, [x['text'] for x in tg_align]) + assert ph_idx >= len(ph_list) - 1, (ph_idx, ph_list, len(ph_list), [x['text'] for x in tg_align], tg_fn) + mel2ph = np.zeros([mel.shape[0]], np.int) + split[0] = 0 + split[-1] = 1e8 + for i in range(len(split) - 1): + assert split[i] != -1 and split[i] <= split[i + 1], (split[:-1],) + split = [int(s * hparams['audio_sample_rate'] / hparams['hop_size'] + 0.5) for s in split] + for ph_idx in range(len(ph_list)): + mel2ph[split[ph_idx]:split[ph_idx + 1]] = ph_idx + 1 + mel2ph_torch = torch.from_numpy(mel2ph) + T_t = len(ph_list) + dur = mel2ph_torch.new_zeros([T_t + 1]).scatter_add(0, mel2ph_torch, torch.ones_like(mel2ph_torch)) + dur = dur[1:].numpy() + return mel2ph, dur + + +def build_phone_encoder(data_dir): + phone_list_file = os.path.join(data_dir, 'phone_set.json') + phone_list = json.load(open(phone_list_file, encoding='utf-8')) + return TokenTextEncoder(None, vocab_list=phone_list, replace_oov=',') + + +def is_sil_phoneme(p): + return not p[0].isalpha() diff --git a/preprocessing/hubertinfer.py b/preprocessing/hubertinfer.py new file mode 100644 index 0000000000000000000000000000000000000000..1922e284be17feac2a8a75db507b5d34c54891e9 --- /dev/null +++ b/preprocessing/hubertinfer.py @@ -0,0 +1,42 @@ +import os.path +from io import BytesIO +from pathlib import Path + +import numpy as np +import torch + +from network.hubert.hubert_model import hubert_soft, get_units +from network.hubert.vec_model import load_model, get_vec_units +from utils.hparams import hparams + + +class Hubertencoder(): + def __init__(self, pt_path='checkpoints/hubert/hubert_soft.pt'): + if not 'use_vec' in hparams.keys(): + hparams['use_vec'] = False + if hparams['use_vec']: + pt_path = "checkpoints/vec/checkpoint_best_legacy_500.pt" + self.dev = torch.device("cuda") + self.hbt_model = load_model(pt_path) + else: + pt_path = list(Path(pt_path).parent.rglob('*.pt'))[0] + if 'hubert_gpu' in hparams.keys(): + self.use_gpu = hparams['hubert_gpu'] + else: + self.use_gpu = True + self.dev = torch.device("cuda" if self.use_gpu and torch.cuda.is_available() else "cpu") + self.hbt_model = hubert_soft(str(pt_path)).to(self.dev) + + def encode(self, wav_path): + if isinstance(wav_path, BytesIO): + npy_path = "" + wav_path.seek(0) + else: + npy_path = Path(wav_path).with_suffix('.npy') + if os.path.exists(npy_path): + units = np.load(str(npy_path)) + elif hparams['use_vec']: + units = get_vec_units(self.hbt_model, wav_path, self.dev).cpu().numpy()[0] + else: + units = get_units(self.hbt_model, wav_path, self.dev).cpu().numpy()[0] + return units # [T,256] diff --git a/preprocessing/process_pipeline.py b/preprocessing/process_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..12dd11b1621bd321c46443cf4de277974db390bd --- /dev/null +++ b/preprocessing/process_pipeline.py @@ -0,0 +1,199 @@ +''' + file -> temporary_dict -> processed_input -> batch +''' +from utils.hparams import hparams +from network.vocoders.base_vocoder import VOCODERS +import numpy as np +import traceback +from pathlib import Path +from .data_gen_utils import get_pitch_parselmouth,get_pitch_crepe +from .base_binarizer import BinarizationError +import torch +import utils + +class File2Batch: + ''' + pipeline: file -> temporary_dict -> processed_input -> batch + ''' + + @staticmethod + def file2temporary_dict(): + ''' + read from file, store data in temporary dicts + ''' + raw_data_dir = Path(hparams['raw_data_dir']) + # meta_midi = json.load(open(os.path.join(raw_data_dir, 'meta.json'))) # [list of dict] + + # if hparams['perform_enhance'] and not hparams['infer']: + # vocoder=get_vocoder_cls(hparams)() + # raw_files = list(raw_data_dir.rglob(f"*.wav")) + # dic=[] + # time_step = hparams['hop_size'] / hparams['audio_sample_rate'] + # f0_min = hparams['f0_min'] + # f0_max = hparams['f0_max'] + # for file in raw_files: + # y, sr = librosa.load(file, sr=hparams['audio_sample_rate']) + # f0 = parselmouth.Sound(y, hparams['audio_sample_rate']).to_pitch_ac( + # time_step=time_step , voicing_threshold=0.6, + # pitch_floor=f0_min, pitch_ceiling=f0_max).selected_array['frequency'] + # f0_mean=np.mean(f0[f0>0]) + # dic.append(f0_mean) + # for idx in np.where(dic>np.percentile(dic, 80))[0]: + # file=raw_files[idx] + # wav,mel=vocoder.wav2spec(str(file)) + # f0,_=get_pitch_parselmouth(wav,mel,hparams) + # f0[f0>0]=f0[f0>0]*(2**(2/12)) + # wav_pred=vocoder.spec2wav(torch.FloatTensor(mel),f0=torch.FloatTensor(f0)) + # sf.write(file.with_name(file.name[:-4]+'_high.wav'), wav_pred, 24000, 'PCM_16') + utterance_labels =[] + utterance_labels.extend(list(raw_data_dir.rglob(f"*.wav"))) + utterance_labels.extend(list(raw_data_dir.rglob(f"*.ogg"))) + #open(os.path.join(raw_data_dir, 'transcriptions.txt'), encoding='utf-8').readlines() + + all_temp_dict = {} + for utterance_label in utterance_labels: + #song_info = utterance_label.split('|') + item_name =str(utterance_label)#raw_item_name = song_info[0] + # print(item_name) + temp_dict = {} + temp_dict['wav_fn'] =str(utterance_label)#f'{raw_data_dir}/wavs/{item_name}.wav' + # temp_dict['txt'] = song_info[1] + + # temp_dict['ph'] = song_info[2] + # # self.item2wdb[item_name] = list(np.nonzero([1 if x in ALL_YUNMU + ['AP', 'SP'] else 0 for x in song_info[2].split()])[0]) + # temp_dict['word_boundary'] = np.array([1 if x in ALL_YUNMU + ['AP', 'SP'] else 0 for x in song_info[2].split()]) + # temp_dict['ph_durs'] = [float(x) for x in song_info[5].split(" ")] + + # temp_dict['pitch_midi'] = np.array([note_to_midi(x.split("/")[0]) if x != 'rest' else 0 + # for x in song_info[3].split(" ")]) + # temp_dict['midi_dur'] = np.array([float(x) for x in song_info[4].split(" ")]) + # temp_dict['is_slur'] = np.array([int(x) for x in song_info[6].split(" ")]) + temp_dict['spk_id'] = hparams['speaker_id'] + # assert temp_dict['pitch_midi'].shape == temp_dict['midi_dur'].shape == temp_dict['is_slur'].shape, \ + # (temp_dict['pitch_midi'].shape, temp_dict['midi_dur'].shape, temp_dict['is_slur'].shape) + + all_temp_dict[item_name] = temp_dict + + return all_temp_dict + + @staticmethod + def temporary_dict2processed_input(item_name, temp_dict, encoder, binarization_args): + ''' + process data in temporary_dicts + ''' + def get_pitch(wav, mel): + # get ground truth f0 by self.get_pitch_algorithm + if hparams['use_crepe']: + gt_f0, gt_pitch_coarse = get_pitch_crepe(wav, mel, hparams) + else: + gt_f0, gt_pitch_coarse = get_pitch_parselmouth(wav, mel, hparams) + if sum(gt_f0) == 0: + raise BinarizationError("Empty **gt** f0") + processed_input['f0'] = gt_f0 + processed_input['pitch'] = gt_pitch_coarse + + def get_align(meta_data, mel, phone_encoded, hop_size=hparams['hop_size'], audio_sample_rate=hparams['audio_sample_rate']): + mel2ph = np.zeros([mel.shape[0]], int) + start_frame=0 + ph_durs = mel.shape[0]/phone_encoded.shape[0] + if hparams['debug']: + print(mel.shape,phone_encoded.shape,mel.shape[0]/phone_encoded.shape[0]) + for i_ph in range(phone_encoded.shape[0]): + + end_frame = int(i_ph*ph_durs +ph_durs+ 0.5) + mel2ph[start_frame:end_frame+1] = i_ph + 1 + start_frame = end_frame+1 + + processed_input['mel2ph'] = mel2ph + + if hparams['vocoder'] in VOCODERS: + wav, mel = VOCODERS[hparams['vocoder']].wav2spec(temp_dict['wav_fn']) + else: + wav, mel = VOCODERS[hparams['vocoder'].split('.')[-1]].wav2spec(temp_dict['wav_fn']) + processed_input = { + 'item_name': item_name, 'mel': mel, 'wav': wav, + 'sec': len(wav) / hparams['audio_sample_rate'], 'len': mel.shape[0] + } + processed_input = {**temp_dict, **processed_input} # merge two dicts + processed_input['spec_min']=np.min(mel,axis=0) + processed_input['spec_max']=np.max(mel,axis=0) + #(processed_input['spec_min'].shape) + try: + if binarization_args['with_f0']: + get_pitch(wav, mel) + if binarization_args['with_hubert']: + try: + hubert_encoded = processed_input['hubert'] = encoder.encode(temp_dict['wav_fn']) + except: + traceback.print_exc() + raise Exception(f"hubert encode error") + if binarization_args['with_align']: + get_align(temp_dict, mel, hubert_encoded) + except Exception as e: + print(f"| Skip item ({e}). item_name: {item_name}, wav_fn: {temp_dict['wav_fn']}") + return None + return processed_input + + @staticmethod + def processed_input2batch(samples): + ''' + Args: + samples: one batch of processed_input + NOTE: + the batch size is controlled by hparams['max_sentences'] + ''' + if len(samples) == 0: + return {} + id = torch.LongTensor([s['id'] for s in samples]) + item_names = [s['item_name'] for s in samples] + #text = [s['text'] for s in samples] + #txt_tokens = utils.collate_1d([s['txt_token'] for s in samples], 0) + hubert = utils.collate_2d([s['hubert'] for s in samples], 0.0) + f0 = utils.collate_1d([s['f0'] for s in samples], 0.0) + pitch = utils.collate_1d([s['pitch'] for s in samples]) + uv = utils.collate_1d([s['uv'] for s in samples]) + energy = utils.collate_1d([s['energy'] for s in samples], 0.0) + mel2ph = utils.collate_1d([s['mel2ph'] for s in samples], 0.0) \ + if samples[0]['mel2ph'] is not None else None + mels = utils.collate_2d([s['mel'] for s in samples], 0.0) + #txt_lengths = torch.LongTensor([s['txt_token'].numel() for s in samples]) + hubert_lengths = torch.LongTensor([s['hubert'].shape[0] for s in samples]) + mel_lengths = torch.LongTensor([s['mel'].shape[0] for s in samples]) + + batch = { + 'id': id, + 'item_name': item_names, + 'nsamples': len(samples), + # 'text': text, + # 'txt_tokens': txt_tokens, + # 'txt_lengths': txt_lengths, + 'hubert':hubert, + 'mels': mels, + 'mel_lengths': mel_lengths, + 'mel2ph': mel2ph, + 'energy': energy, + 'pitch': pitch, + 'f0': f0, + 'uv': uv, + } + #========not used================= + # if hparams['use_spk_embed']: + # spk_embed = torch.stack([s['spk_embed'] for s in samples]) + # batch['spk_embed'] = spk_embed + # if hparams['use_spk_id']: + # spk_ids = torch.LongTensor([s['spk_id'] for s in samples]) + # batch['spk_ids'] = spk_ids + # if hparams['pitch_type'] == 'cwt': + # cwt_spec = utils.collate_2d([s['cwt_spec'] for s in samples]) + # f0_mean = torch.Tensor([s['f0_mean'] for s in samples]) + # f0_std = torch.Tensor([s['f0_std'] for s in samples]) + # batch.update({'cwt_spec': cwt_spec, 'f0_mean': f0_mean, 'f0_std': f0_std}) + # elif hparams['pitch_type'] == 'ph': + # batch['f0'] = utils.collate_1d([s['f0_ph'] for s in samples]) + + # batch['pitch_midi'] = utils.collate_1d([s['pitch_midi'] for s in samples], 0) + # batch['midi_dur'] = utils.collate_1d([s['midi_dur'] for s in samples], 0) + # batch['is_slur'] = utils.collate_1d([s['is_slur'] for s in samples], 0) + # batch['word_boundary'] = utils.collate_1d([s['word_boundary'] for s in samples], 0) + + return batch \ No newline at end of file diff --git a/raw/test_input.wav b/raw/test_input.wav new file mode 100644 index 0000000000000000000000000000000000000000..b04148a941b60c6b9ff8db693c83b1ff8d07ffa1 Binary files /dev/null and b/raw/test_input.wav differ diff --git a/requirements.png b/requirements.png new file mode 100644 index 0000000000000000000000000000000000000000..7c4e35bfb664f6a997c8cf69c0e536c01653d655 Binary files /dev/null and b/requirements.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..8cb6168f7d7e42d744d5cd3493dd88c37c887ed5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,105 @@ +absl-py==1.3.0 +aiohttp==3.8.3 +aiosignal==1.2.0 +appdirs==1.4.4 +asttokens==2.0.8 +async-timeout==4.0.2 +attrs==22.1.0 +audioread==3.0.0 +backcall==0.2.0 +cachetools==5.2.0 +cffi==1.15.1 +charset-normalizer==2.1.1 +colorama==0.4.6 +contourpy==1.0.5 +cycler==0.11.0 +debugpy==1.6.3 +decorator==5.1.1 +einops==0.5.0 +entrypoints==0.4 +executing==1.1.1 +fonttools==4.38.0 +frozenlist==1.3.1 +fsspec==2022.10.0 +future==0.18.2 +google-auth==2.13.0 +google-auth-oauthlib==0.4.6 +grpcio==1.50.0 +h5py==3.7.0 +idna==3.4 +imageio==2.22.2 +importlib-metadata==5.0.0 +ipykernel==6.16.2 +ipython==8.5.0 +jedi==0.18.1 +joblib==1.2.0 +jupyter_client==7.4.4 +jupyter_core==4.11.2 +kiwisolver==1.4.4 +librosa==0.9.1 +llvmlite==0.39.1 +Markdown==3.4.1 +MarkupSafe==2.1.1 +matplotlib==3.6.1 +matplotlib-inline==0.1.6 +multidict==6.0.2 +nest-asyncio==1.5.6 +networkx==2.8.7 +numba==0.56.3 +numpy==1.23.4 +oauthlib==3.2.2 +packaging==21.3 +parso==0.8.3 +pickleshare==0.7.5 +Pillow==9.2.0 +pooch==1.6.0 +praat-parselmouth==0.4.1 +prompt-toolkit==3.0.31 +protobuf==3.19.6 +psutil==5.9.3 +pure-eval==0.2.2 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.21 +pycwt==0.3.0a22 +pyDeprecate==0.3.0 +Pygments==2.13.0 +pyloudnorm==0.1.0 +pyparsing==3.0.9 +python-dateutil==2.8.2 +pytorch-lightning==1.3.3 +PyWavelets==1.4.1 +pywin32==304 +PyYAML==5.4.1 +pyzmq==24.0.1 +requests==2.28.1 +requests-oauthlib==1.3.1 +resampy==0.4.2 +rsa==4.9 +scikit-image==0.19.3 +scikit-learn==1.1.3 +scipy==1.9.3 +six==1.16.0 +soundfile==0.11.0 +stack-data==0.5.1 +tensorboard==2.10.1 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +threadpoolctl==3.1.0 +tifffile==2022.10.10 +torch==1.12.1+cu113 +torchaudio==0.12.1+cu113 +torchcrepe==0.0.17 +torchmetrics==0.5.0 +torchvision==0.13.1+cu113 +tornado==6.2 +tqdm==4.64.1 +traitlets==5.5.0 +typing_extensions==4.4.0 +urllib3==1.26.12 +wcwidth==0.2.5 +webrtcvad==2.0.10 +Werkzeug==2.2.2 +wincertstore==0.2 +yarl==1.8.1 +zipp==3.10.0 diff --git a/requirements_short.txt b/requirements_short.txt new file mode 100644 index 0000000000000000000000000000000000000000..20841a6b859fc857d56762823eb59ace6d8b2fab --- /dev/null +++ b/requirements_short.txt @@ -0,0 +1,13 @@ +torchcrepe +praat-parselmouth==0.4.1 +scikit-image +ipython +ipykernel +pyloudnorm +webrtcvad +h5py +einops +pycwt +torchmetrics==0.5 +pytorch_lightning==1.3.3 +pyworld \ No newline at end of file diff --git a/results/test_output.wav b/results/test_output.wav new file mode 100644 index 0000000000000000000000000000000000000000..7f279f967e04deec498b68324b349d801aa2e9ac --- /dev/null +++ b/results/test_output.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944a26fb0f3bf8d6194d8ec42496b6bc89ca17572d56fd9e65df0cc22f54e8be +size 1086508 diff --git a/run.py b/run.py new file mode 100644 index 0000000000000000000000000000000000000000..40d119f4be35d5631e283dca414bb6f407ea71b5 --- /dev/null +++ b/run.py @@ -0,0 +1,16 @@ +import importlib +import os +from utils.hparams import set_hparams, hparams +set_hparams(print_hparams=False) + +def run_task(): + assert hparams['task_cls'] != '' + pkg = ".".join(hparams["task_cls"].split(".")[:-1]) + cls_name = hparams["task_cls"].split(".")[-1] + task_cls = getattr(importlib.import_module(pkg), cls_name) + task_cls.start() + + +if __name__ == '__main__': + run_task() + diff --git a/simplify.py b/simplify.py new file mode 100644 index 0000000000000000000000000000000000000000..75c187cc77e41b26a4bfd6bbfe3dad85e434ea59 --- /dev/null +++ b/simplify.py @@ -0,0 +1,28 @@ +from argparse import ArgumentParser + +import torch + + +def simplify_pth(pth_name, project_name): + model_path = f'./checkpoints/{project_name}' + checkpoint_dict = torch.load(f'{model_path}/{pth_name}') + torch.save({'epoch': checkpoint_dict['epoch'], + 'state_dict': checkpoint_dict['state_dict'], + 'global_step': None, + 'checkpoint_callback_best': None, + 'optimizer_states': None, + 'lr_schedulers': None + }, f'./clean_{pth_name}') + + +def main(): + parser = ArgumentParser() + parser.add_argument('--proj', type=str) + parser.add_argument('--steps', type=str) + args = parser.parse_args() + model_name = f"model_ckpt_steps_{args.steps}.ckpt" + simplify_pth(model_name, args.proj) + + +if __name__ == '__main__': + main() diff --git a/test_output.wav b/test_output.wav new file mode 100644 index 0000000000000000000000000000000000000000..15294e495c8682104a176a62052be45e1c033985 --- /dev/null +++ b/test_output.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd08a3539ce72ae787f25062dd2404d0d229306c897e76323ce9e365ab36094d +size 1086516 diff --git a/training/__pycache__/train_pipeline.cpython-38.pyc b/training/__pycache__/train_pipeline.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d3bb1542d0d0d46272bf572feabe0ac6b796e59 Binary files /dev/null and b/training/__pycache__/train_pipeline.cpython-38.pyc differ diff --git a/training/config.yaml b/training/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d49f0ecb2e118cc255d97e6077eb8e62046f4a05 --- /dev/null +++ b/training/config.yaml @@ -0,0 +1,349 @@ +K_step: 1000 +accumulate_grad_batches: 1 +audio_num_mel_bins: 80 +audio_sample_rate: 24000 +binarization_args: + shuffle: false + with_align: true + with_f0: true + with_hubert: true + with_spk_embed: false + with_wav: false +binarizer_cls: preprocessing.SVCpre.SVCBinarizer +binary_data_dir: data/binary/atri +check_val_every_n_epoch: 10 +choose_test_manually: false +clip_grad_norm: 1 +config_path: training/config.yaml +content_cond_steps: [] +cwt_add_f0_loss: false +cwt_hidden_size: 128 +cwt_layers: 2 +cwt_loss: l1 +cwt_std_scale: 0.8 +datasets: +- opencpop +debug: false +dec_ffn_kernel_size: 9 +dec_layers: 4 +decay_steps: 30000 +decoder_type: fft +dict_dir: '' +diff_decoder_type: wavenet +diff_loss_type: l2 +dilation_cycle_length: 4 +dropout: 0.1 +ds_workers: 4 +dur_enc_hidden_stride_kernel: +- 0,2,3 +- 0,2,3 +- 0,1,3 +dur_loss: mse +dur_predictor_kernel: 3 +dur_predictor_layers: 5 +enc_ffn_kernel_size: 9 +enc_layers: 4 +encoder_K: 8 +encoder_type: fft +endless_ds: False +f0_bin: 256 +f0_max: 1100.0 +f0_min: 50.0 +ffn_act: gelu +ffn_padding: SAME +fft_size: 512 +fmax: 12000 +fmin: 30 +fs2_ckpt: '' +gaussian_start: true +gen_dir_name: '' +gen_tgt_spk_id: -1 +hidden_size: 256 +hop_size: 128 +hubert_gpu: true +hubert_path: checkpoints/hubert/hubert_soft.pt +infer: false +keep_bins: 80 +lambda_commit: 0.25 +lambda_energy: 0.0 +lambda_f0: 1.0 +lambda_ph_dur: 0.3 +lambda_sent_dur: 1.0 +lambda_uv: 1.0 +lambda_word_dur: 1.0 +load_ckpt: '' +log_interval: 100 +loud_norm: false +lr: 5.0e-05 +max_beta: 0.02 +max_epochs: 3000 +max_eval_sentences: 1 +max_eval_tokens: 60000 +max_frames: 42000 +max_input_tokens: 60000 +max_sentences: 24 +max_tokens: 128000 +max_updates: 1000000 +mel_loss: ssim:0.5|l1:0.5 +mel_vmax: 1.5 +mel_vmin: -6.0 +min_level_db: -120 +norm_type: gn +num_ckpt_keep: 10 +num_heads: 2 +num_sanity_val_steps: 1 +num_spk: 1 +num_test_samples: 0 +num_valid_plots: 10 +optimizer_adam_beta1: 0.9 +optimizer_adam_beta2: 0.98 +out_wav_norm: false +pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt +pe_enable: false +perform_enhance: true +pitch_ar: false +pitch_enc_hidden_stride_kernel: +- 0,2,5 +- 0,2,5 +- 0,2,5 +pitch_extractor: parselmouth +pitch_loss: l2 +pitch_norm: log +pitch_type: frame +pndm_speedup: 10 +pre_align_args: + allow_no_txt: false + denoise: false + forced_align: mfa + txt_processor: zh_g2pM + use_sox: true + use_tone: false +pre_align_cls: data_gen.singing.pre_align.SingingPreAlign +predictor_dropout: 0.5 +predictor_grad: 0.1 +predictor_hidden: -1 +predictor_kernel: 5 +predictor_layers: 5 +prenet_dropout: 0.5 +prenet_hidden_size: 256 +pretrain_fs_ckpt: pretrain/nyaru/model_ckpt_steps_60000.ckpt +processed_data_dir: xxx +profile_infer: false +raw_data_dir: data/raw/atri +ref_norm_layer: bn +rel_pos: true +reset_phone_dict: true +residual_channels: 256 +residual_layers: 20 +save_best: false +save_ckpt: true +save_codes: +- configs +- modules +- src +- utils +save_f0: true +save_gt: false +schedule_type: linear +seed: 1234 +sort_by_len: true +speaker_id: atri +spec_max: +- 0.2987259328365326 +- 0.29721200466156006 +- 0.23978209495544434 +- 0.208412766456604 +- 0.25777050852775574 +- 0.2514476478099823 +- 0.1129382848739624 +- 0.03415697440505028 +- 0.09860049188137054 +- 0.10637332499027252 +- 0.13287633657455444 +- 0.19744250178337097 +- 0.10040587931871414 +- 0.13735432922840118 +- 0.15107455849647522 +- 0.17196381092071533 +- 0.08298977464437485 +- 0.0632769986987114 +- 0.02723858878016472 +- -0.001819317927584052 +- -0.029565516859292984 +- -0.023574354127049446 +- -0.01633293740451336 +- 0.07143621146678925 +- 0.021580500528216362 +- 0.07257916033267975 +- -0.024349519982933998 +- -0.06165708228945732 +- -0.10486568510532379 +- -0.1363687664270401 +- -0.13333871960639954 +- -0.13955898582935333 +- -0.16613495349884033 +- -0.17636367678642273 +- -0.2786925733089447 +- -0.22967253625392914 +- -0.31897130608558655 +- -0.18007366359233856 +- -0.29366692900657654 +- -0.2871025800704956 +- -0.36748355627059937 +- -0.46071451902389526 +- -0.5464922189712524 +- -0.5719417333602905 +- -0.6020897626876831 +- -0.6239874958992004 +- -0.5653440952301025 +- -0.6508013606071472 +- -0.628247857093811 +- -0.6809687614440918 +- -0.569259762763977 +- -0.5423558354377747 +- -0.5811785459518433 +- -0.5359002351760864 +- -0.6565515398979187 +- -0.7143737077713013 +- -0.8502675890922546 +- -0.7979224920272827 +- -0.7110578417778015 +- -0.763409435749054 +- -0.7984790802001953 +- -0.6927220821380615 +- -0.658117413520813 +- -0.7486468553543091 +- -0.5949879884719849 +- -0.7494576573371887 +- -0.7400822639465332 +- -0.6822793483734131 +- -0.7773582339286804 +- -0.661201536655426 +- -0.791329026222229 +- -0.8982341885566711 +- -0.8736728429794312 +- -0.7701027393341064 +- -0.8490535616874695 +- -0.7479292154312134 +- -0.9320166110992432 +- -1.2862414121627808 +- -2.8936190605163574 +- -2.924229860305786 +spec_min: +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -6.0 +- -5.999454021453857 +- -5.8822431564331055 +- -5.892064571380615 +- -5.882402420043945 +- -5.786972522735596 +- -5.746835231781006 +- -5.8594512939453125 +- -5.7389445304870605 +- -5.718059539794922 +- -5.779720306396484 +- -5.801984786987305 +- -6.0 +- -6.0 +spk_cond_steps: [] +stop_token_weight: 5.0 +task_cls: training.task.SVC_task.SVCTask +test_ids: [] +test_input_dir: '' +test_num: 0 +test_prefixes: +- test +test_set_name: test +timesteps: 1000 +train_set_name: train +use_crepe: true +use_denoise: false +use_energy_embed: false +use_gt_dur: false +use_gt_f0: false +use_midi: false +use_nsf: true +use_pitch_embed: true +use_pos_embed: true +use_spk_embed: false +use_spk_id: false +use_split_spk_id: false +use_uv: false +use_var_enc: false +use_vec: false +val_check_interval: 2000 +valid_num: 0 +valid_set_name: valid +vocoder: network.vocoders.hifigan.HifiGAN +vocoder_ckpt: checkpoints/0109_hifigan_bigpopcs_hop128 +warmup_updates: 2000 +wav2spec_eps: 1e-6 +weight_decay: 0 +win_size: 512 +work_dir: checkpoints/atri +no_fs2: false \ No newline at end of file diff --git a/training/config_nsf.yaml b/training/config_nsf.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37beccc14e0b8978e065a79bc7319c2c057bf1e5 --- /dev/null +++ b/training/config_nsf.yaml @@ -0,0 +1,445 @@ +K_step: 1000 +accumulate_grad_batches: 1 +audio_num_mel_bins: 128 +audio_sample_rate: 44100 +binarization_args: + shuffle: false + with_align: true + with_f0: true + with_hubert: true + with_spk_embed: false + with_wav: false +binarizer_cls: preprocessing.SVCpre.SVCBinarizer +binary_data_dir: data/binary/Meiko +check_val_every_n_epoch: 10 +choose_test_manually: false +clip_grad_norm: 1 +config_path: training/config_nsf.yaml +content_cond_steps: [] +cwt_add_f0_loss: false +cwt_hidden_size: 128 +cwt_layers: 2 +cwt_loss: l1 +cwt_std_scale: 0.8 +datasets: +- opencpop +debug: false +dec_ffn_kernel_size: 9 +dec_layers: 4 +decay_steps: 50000 +decoder_type: fft +dict_dir: '' +diff_decoder_type: wavenet +diff_loss_type: l2 +dilation_cycle_length: 4 +dropout: 0.1 +ds_workers: 4 +dur_enc_hidden_stride_kernel: +- 0,2,3 +- 0,2,3 +- 0,1,3 +dur_loss: mse +dur_predictor_kernel: 3 +dur_predictor_layers: 5 +enc_ffn_kernel_size: 9 +enc_layers: 4 +encoder_K: 8 +encoder_type: fft +endless_ds: false +f0_bin: 256 +f0_max: 1100.0 +f0_min: 40.0 +ffn_act: gelu +ffn_padding: SAME +fft_size: 2048 +fmax: 16000 +fmin: 40 +fs2_ckpt: '' +gaussian_start: true +gen_dir_name: '' +gen_tgt_spk_id: -1 +hidden_size: 256 +hop_size: 512 +hubert_gpu: true +hubert_path: checkpoints/hubert/hubert_soft.pt +infer: false +keep_bins: 128 +lambda_commit: 0.25 +lambda_energy: 0.0 +lambda_f0: 1.0 +lambda_ph_dur: 0.3 +lambda_sent_dur: 1.0 +lambda_uv: 1.0 +lambda_word_dur: 1.0 +load_ckpt: '' +log_interval: 100 +loud_norm: false +lr: 0.0006 +max_beta: 0.02 +max_epochs: 3000 +max_eval_sentences: 1 +max_eval_tokens: 60000 +max_frames: 42000 +max_input_tokens: 60000 +max_sentences: 11 +max_tokens: 128000 +max_updates: 1000000 +mel_loss: ssim:0.5|l1:0.5 +mel_vmax: 1.5 +mel_vmin: -6.0 +min_level_db: -120 +no_fs2: true +norm_type: gn +num_ckpt_keep: 10 +num_heads: 2 +num_sanity_val_steps: 1 +num_spk: 1 +num_test_samples: 0 +num_valid_plots: 10 +optimizer_adam_beta1: 0.9 +optimizer_adam_beta2: 0.98 +out_wav_norm: false +pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt +pe_enable: false +perform_enhance: true +pitch_ar: false +pitch_enc_hidden_stride_kernel: +- 0,2,5 +- 0,2,5 +- 0,2,5 +pitch_extractor: parselmouth +pitch_loss: l2 +pitch_norm: log +pitch_type: frame +pndm_speedup: 10 +pre_align_args: + allow_no_txt: false + denoise: false + forced_align: mfa + txt_processor: zh_g2pM + use_sox: true + use_tone: false +pre_align_cls: data_gen.singing.pre_align.SingingPreAlign +predictor_dropout: 0.5 +predictor_grad: 0.1 +predictor_hidden: -1 +predictor_kernel: 5 +predictor_layers: 5 +prenet_dropout: 0.5 +prenet_hidden_size: 256 +pretrain_fs_ckpt: '' +processed_data_dir: xxx +profile_infer: false +raw_data_dir: data/raw/Meiko +ref_norm_layer: bn +rel_pos: true +reset_phone_dict: true +residual_channels: 384 +residual_layers: 20 +save_best: true +save_ckpt: true +save_codes: +- configs +- modules +- src +- utils +save_f0: true +save_gt: false +schedule_type: linear +seed: 1234 +sort_by_len: true +speaker_id: Meiko +spec_max: +- 0.11616316437721252 +- 0.009597139433026314 +- 0.28568679094314575 +- 0.5713539123535156 +- 0.6507775187492371 +- 0.6846900582313538 +- 0.7684511542320251 +- 0.7574314475059509 +- 0.7267094254493713 +- 0.8298212289810181 +- 0.6814215183258057 +- 0.7774385213851929 +- 0.7883802056312561 +- 0.7771736979484558 +- 0.7607403993606567 +- 0.8505979180335999 +- 0.7654092311859131 +- 0.7792922258377075 +- 0.814899206161499 +- 0.8058286905288696 +- 0.839918851852417 +- 0.8406909108161926 +- 0.8339935541152954 +- 0.9287465810775757 +- 0.8166532516479492 +- 0.8449192047119141 +- 0.7643511891365051 +- 0.8175668716430664 +- 1.0239852666854858 +- 0.920753002166748 +- 0.8153243660926819 +- 0.7587951421737671 +- 0.7698416113853455 +- 0.7247377634048462 +- 0.6954795122146606 +- 0.6807010173797607 +- 0.8715915679931641 +- 0.8993064761161804 +- 0.90997314453125 +- 0.7913641333580017 +- 0.7065826058387756 +- 0.6068118810653687 +- 0.6278789639472961 +- 0.6242763996124268 +- 0.5978773236274719 +- 0.651780366897583 +- 0.7780635952949524 +- 0.7565146684646606 +- 0.5729265213012695 +- 0.5707721710205078 +- 0.5281876921653748 +- 0.5579817891120911 +- 0.6407540440559387 +- 0.7233482003211975 +- 0.5677092671394348 +- 0.40926626324653625 +- 0.4460923373699188 +- 0.4058813750743866 +- 0.4390961229801178 +- 0.5553078055381775 +- 0.5349165201187134 +- 0.43830350041389465 +- 0.4032619595527649 +- 0.3253237009048462 +- 0.30613574385643005 +- 0.44174280762672424 +- 0.3622792959213257 +- 0.45337533950805664 +- 0.3313130736351013 +- 0.36956584453582764 +- 0.4998202919960022 +- 0.42133796215057373 +- 0.28050243854522705 +- 0.26571735739707947 +- 0.20871540904045105 +- 0.3416949510574341 +- 0.3328045904636383 +- 0.332925409078598 +- 0.3000032603740692 +- 0.08743463456630707 +- 0.20726755261421204 +- 0.1583203673362732 +- 0.13275942206382751 +- 0.066913902759552 +- 0.1054723709821701 +- -0.08983375877141953 +- -0.12505969405174255 +- -0.03509913384914398 +- -0.11556489020586014 +- -0.2324075847864151 +- -0.06187695264816284 +- 0.020108096301555634 +- -0.009129349142313004 +- -0.044059865176677704 +- 0.0343453511595726 +- 0.030609752982854843 +- 0.11592991650104523 +- 0.04611678794026375 +- 0.016514429822564125 +- -0.10608740150928497 +- -0.18119606375694275 +- -0.0764162689447403 +- -0.005786585621535778 +- -0.16699059307575226 +- -0.1254500299692154 +- -0.09370455145835876 +- 0.015143157914280891 +- 0.07289116084575653 +- -0.006812357809394598 +- -0.0280735082924366 +- -0.0021705669350922108 +- -0.1115487739443779 +- -0.2423458993434906 +- -0.116642065346241 +- -0.1487213373184204 +- -0.16707029938697815 +- -0.25437667965888977 +- -0.32499101758003235 +- -0.2704009413719177 +- -0.29621294140815735 +- -0.42674311995506287 +- -0.4650932848453522 +- -0.5842434763908386 +- -0.6859109401702881 +- -0.9532108902931213 +- -0.9863560199737549 +- -1.220953106880188 +- -1.3163429498672485 +spec_min: +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.999994277954102 +- -4.942144870758057 +- -4.772783279418945 +- -4.7206244468688965 +- -4.5759992599487305 +- -4.509932518005371 +spk_cond_steps: [] +stop_token_weight: 5.0 +task_cls: training.task.SVC_task.SVCTask +test_ids: [] +test_input_dir: '' +test_num: 0 +test_prefixes: +- test +test_set_name: test +timesteps: 1000 +train_set_name: train +use_crepe: false +use_denoise: false +use_energy_embed: false +use_gt_dur: false +use_gt_f0: false +use_midi: false +use_nsf: true +use_pitch_embed: true +use_pos_embed: true +use_spk_embed: false +use_spk_id: false +use_split_spk_id: false +use_uv: false +use_var_enc: false +use_vec: false +val_check_interval: 1000 +valid_num: 0 +valid_set_name: valid +vocoder: network.vocoders.nsf_hifigan.NsfHifiGAN +vocoder_ckpt: checkpoints/nsf_hifigan/model +warmup_updates: 2000 +wav2spec_eps: 1e-6 +weight_decay: 0 +win_size: 2048 +work_dir: checkpoints/Meiko diff --git a/training/dataset/__pycache__/base_dataset.cpython-38.pyc b/training/dataset/__pycache__/base_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eb751fa101b07c244ec4327c8daa71c767c84889 Binary files /dev/null and b/training/dataset/__pycache__/base_dataset.cpython-38.pyc differ diff --git a/training/dataset/__pycache__/fs2_utils.cpython-38.pyc b/training/dataset/__pycache__/fs2_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95fde02cfa553720d97934e331342e0a8d2c8442 Binary files /dev/null and b/training/dataset/__pycache__/fs2_utils.cpython-38.pyc differ diff --git a/training/dataset/base_dataset.py b/training/dataset/base_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c7d96e0fab43788757bd67c1737b9eab77b98c4d --- /dev/null +++ b/training/dataset/base_dataset.py @@ -0,0 +1,66 @@ +import torch +from utils.hparams import hparams +import numpy as np +import os + +class BaseDataset(torch.utils.data.Dataset): + ''' + Base class for datasets. + 1. *ordered_indices*: + if self.shuffle == True, shuffle the indices; + if self.sort_by_len == True, sort data by length; + 2. *sizes*: + clipped length if "max_frames" is set; + 3. *num_tokens*: + unclipped length. + + Subclasses should define: + 1. *collate*: + take the longest data, pad other data to the same length; + 2. *__getitem__*: + the index function. + ''' + def __init__(self, shuffle): + super().__init__() + self.hparams = hparams + self.shuffle = shuffle + self.sort_by_len = hparams['sort_by_len'] + self.sizes = None + + @property + def _sizes(self): + return self.sizes + + def __getitem__(self, index): + raise NotImplementedError + + def collater(self, samples): + raise NotImplementedError + + def __len__(self): + return len(self._sizes) + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + """Return an example's size as a float or tuple. This value is used when + filtering a dataset with ``--max-positions``.""" + size = min(self._sizes[index], hparams['max_frames']) + return size + + def ordered_indices(self): + """Return an ordered list of indices. Batches will be constructed based + on this order.""" + if self.shuffle: + indices = np.random.permutation(len(self)) + if self.sort_by_len: + indices = indices[np.argsort(np.array(self._sizes)[indices], kind='mergesort')] + # 先random, 然后稳定排序, 保证排序后同长度的数据顺序是依照random permutation的 (被其随机打乱). + else: + indices = np.arange(len(self)) + return indices + + @property + def num_workers(self): + return int(os.getenv('NUM_WORKERS', hparams['ds_workers'])) diff --git a/training/dataset/fs2_utils.py b/training/dataset/fs2_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..23833faa9b2858f86d31dadc77930cb78b456509 --- /dev/null +++ b/training/dataset/fs2_utils.py @@ -0,0 +1,178 @@ +import matplotlib + +matplotlib.use('Agg') + +import glob +import importlib +from utils.cwt import get_lf0_cwt +import os +import torch.optim +import torch.utils.data +from utils.indexed_datasets import IndexedDataset +from utils.pitch_utils import norm_interp_f0 +import numpy as np +from training.dataset.base_dataset import BaseDataset +import torch +import torch.optim +import torch.utils.data +import utils +import torch.distributions +from utils.hparams import hparams + + +class FastSpeechDataset(BaseDataset): + def __init__(self, prefix, shuffle=False): + super().__init__(shuffle) + self.data_dir = hparams['binary_data_dir'] + self.prefix = prefix + self.hparams = hparams + self.sizes = np.load(f'{self.data_dir}/{self.prefix}_lengths.npy') + self.indexed_ds = None + # self.name2spk_id={} + + # pitch stats + f0_stats_fn = f'{self.data_dir}/train_f0s_mean_std.npy' + if os.path.exists(f0_stats_fn): + hparams['f0_mean'], hparams['f0_std'] = self.f0_mean, self.f0_std = np.load(f0_stats_fn) + hparams['f0_mean'] = float(hparams['f0_mean']) + hparams['f0_std'] = float(hparams['f0_std']) + else: + hparams['f0_mean'], hparams['f0_std'] = self.f0_mean, self.f0_std = None, None + + if prefix == 'test': + if hparams['test_input_dir'] != '': + self.indexed_ds, self.sizes = self.load_test_inputs(hparams['test_input_dir']) + else: + if hparams['num_test_samples'] > 0: + self.avail_idxs = list(range(hparams['num_test_samples'])) + hparams['test_ids'] + self.sizes = [self.sizes[i] for i in self.avail_idxs] + + if hparams['pitch_type'] == 'cwt': + _, hparams['cwt_scales'] = get_lf0_cwt(np.ones(10)) + + def _get_item(self, index): + if hasattr(self, 'avail_idxs') and self.avail_idxs is not None: + index = self.avail_idxs[index] + if self.indexed_ds is None: + self.indexed_ds = IndexedDataset(f'{self.data_dir}/{self.prefix}') + return self.indexed_ds[index] + + def __getitem__(self, index): + hparams = self.hparams + item = self._get_item(index) + max_frames = hparams['max_frames'] + spec = torch.Tensor(item['mel'])[:max_frames] + energy = (spec.exp() ** 2).sum(-1).sqrt() + mel2ph = torch.LongTensor(item['mel2ph'])[:max_frames] if 'mel2ph' in item else None + f0, uv = norm_interp_f0(item["f0"][:max_frames], hparams) + #phone = torch.LongTensor(item['phone'][:hparams['max_input_tokens']]) + hubert=torch.Tensor(item['hubert'][:hparams['max_input_tokens']]) + pitch = torch.LongTensor(item.get("pitch"))[:max_frames] + # print(item.keys(), item['mel'].shape, spec.shape) + sample = { + "id": index, + "item_name": item['item_name'], + # "text": item['txt'], + # "txt_token": phone, + "hubert":hubert, + "mel": spec, + "pitch": pitch, + "energy": energy, + "f0": f0, + "uv": uv, + "mel2ph": mel2ph, + "mel_nonpadding": spec.abs().sum(-1) > 0, + } + if self.hparams['use_spk_embed']: + sample["spk_embed"] = torch.Tensor(item['spk_embed']) + if self.hparams['use_spk_id']: + sample["spk_id"] = item['spk_id'] + # sample['spk_id'] = 0 + # for key in self.name2spk_id.keys(): + # if key in item['item_name']: + # sample['spk_id'] = self.name2spk_id[key] + # break + #======not used========== + # if self.hparams['pitch_type'] == 'cwt': + # cwt_spec = torch.Tensor(item['cwt_spec'])[:max_frames] + # f0_mean = item.get('f0_mean', item.get('cwt_mean')) + # f0_std = item.get('f0_std', item.get('cwt_std')) + # sample.update({"cwt_spec": cwt_spec, "f0_mean": f0_mean, "f0_std": f0_std}) + # elif self.hparams['pitch_type'] == 'ph': + # f0_phlevel_sum = torch.zeros_like(phone).float().scatter_add(0, mel2ph - 1, f0) + # f0_phlevel_num = torch.zeros_like(phone).float().scatter_add( + # 0, mel2ph - 1, torch.ones_like(f0)).clamp_min(1) + # sample["f0_ph"] = f0_phlevel_sum / f0_phlevel_num + return sample + + def collater(self, samples): + if len(samples) == 0: + return {} + id = torch.LongTensor([s['id'] for s in samples]) + item_names = [s['item_name'] for s in samples] + text = [s['text'] for s in samples] + txt_tokens = utils.collate_1d([s['txt_token'] for s in samples], 0) + f0 = utils.collate_1d([s['f0'] for s in samples], 0.0) + pitch = utils.collate_1d([s['pitch'] for s in samples],1) + uv = utils.collate_1d([s['uv'] for s in samples]) + energy = utils.collate_1d([s['energy'] for s in samples], 0.0) + mel2ph = utils.collate_1d([s['mel2ph'] for s in samples], 0.0) \ + if samples[0]['mel2ph'] is not None else None + mels = utils.collate_2d([s['mel'] for s in samples], 0.0) + txt_lengths = torch.LongTensor([s['txt_token'].numel() for s in samples]) + mel_lengths = torch.LongTensor([s['mel'].shape[0] for s in samples]) + + batch = { + 'id': id, + 'item_name': item_names, + 'nsamples': len(samples), + 'text': text, + 'txt_tokens': txt_tokens, + 'txt_lengths': txt_lengths, + 'mels': mels, + 'mel_lengths': mel_lengths, + 'mel2ph': mel2ph, + 'energy': energy, + 'pitch': pitch, + 'f0': f0, + 'uv': uv, + } + + if self.hparams['use_spk_embed']: + spk_embed = torch.stack([s['spk_embed'] for s in samples]) + batch['spk_embed'] = spk_embed + if self.hparams['use_spk_id']: + spk_ids = torch.LongTensor([s['spk_id'] for s in samples]) + batch['spk_ids'] = spk_ids + if self.hparams['pitch_type'] == 'cwt': + cwt_spec = utils.collate_2d([s['cwt_spec'] for s in samples]) + f0_mean = torch.Tensor([s['f0_mean'] for s in samples]) + f0_std = torch.Tensor([s['f0_std'] for s in samples]) + batch.update({'cwt_spec': cwt_spec, 'f0_mean': f0_mean, 'f0_std': f0_std}) + elif self.hparams['pitch_type'] == 'ph': + batch['f0'] = utils.collate_1d([s['f0_ph'] for s in samples]) + + return batch + + def load_test_inputs(self, test_input_dir, spk_id=0): + inp_wav_paths = glob.glob(f'{test_input_dir}/*.wav') + glob.glob(f'{test_input_dir}/*.mp3') + sizes = [] + items = [] + + binarizer_cls = hparams.get("binarizer_cls", 'basics.base_binarizer.BaseBinarizer') + pkg = ".".join(binarizer_cls.split(".")[:-1]) + cls_name = binarizer_cls.split(".")[-1] + binarizer_cls = getattr(importlib.import_module(pkg), cls_name) + binarization_args = hparams['binarization_args'] + from preprocessing.hubertinfer import Hubertencoder + for wav_fn in inp_wav_paths: + item_name = os.path.basename(wav_fn) + ph = txt = tg_fn = '' + wav_fn = wav_fn + encoder = Hubertencoder(hparams['hubert_path']) + + item = binarizer_cls.process_item(item_name, {'wav_fn':wav_fn}, encoder, binarization_args) + print(item) + items.append(item) + sizes.append(item['len']) + return items, sizes diff --git a/training/from huggingface_hub import Repository.py b/training/from huggingface_hub import Repository.py new file mode 100644 index 0000000000000000000000000000000000000000..5c3c8ded414d2af662eb49404e608a8a15462e9a --- /dev/null +++ b/training/from huggingface_hub import Repository.py @@ -0,0 +1,2 @@ +from huggingface_hub import Repository +repo = Repository(local_dir="w2v2", clone_from="facebook/wav2vec2-large-960h-lv60") \ No newline at end of file diff --git a/training/pe.py b/training/pe.py new file mode 100644 index 0000000000000000000000000000000000000000..584a518649ba8465ad0c7690b51cce9762592da5 --- /dev/null +++ b/training/pe.py @@ -0,0 +1,155 @@ +import matplotlib +matplotlib.use('Agg') + +import torch +import numpy as np +import os + +from training.dataset.base_dataset import BaseDataset +from training.task.fs2 import FastSpeech2Task +from modules.fastspeech.pe import PitchExtractor +import utils +from utils.indexed_datasets import IndexedDataset +from utils.hparams import hparams +from utils.plot import f0_to_figure +from utils.pitch_utils import norm_interp_f0, denorm_f0 + + +class PeDataset(BaseDataset): + def __init__(self, prefix, shuffle=False): + super().__init__(shuffle) + self.data_dir = hparams['binary_data_dir'] + self.prefix = prefix + self.hparams = hparams + self.sizes = np.load(f'{self.data_dir}/{self.prefix}_lengths.npy') + self.indexed_ds = None + + # pitch stats + f0_stats_fn = f'{self.data_dir}/train_f0s_mean_std.npy' + if os.path.exists(f0_stats_fn): + hparams['f0_mean'], hparams['f0_std'] = self.f0_mean, self.f0_std = np.load(f0_stats_fn) + hparams['f0_mean'] = float(hparams['f0_mean']) + hparams['f0_std'] = float(hparams['f0_std']) + else: + hparams['f0_mean'], hparams['f0_std'] = self.f0_mean, self.f0_std = None, None + + if prefix == 'test': + if hparams['num_test_samples'] > 0: + self.avail_idxs = list(range(hparams['num_test_samples'])) + hparams['test_ids'] + self.sizes = [self.sizes[i] for i in self.avail_idxs] + + def _get_item(self, index): + if hasattr(self, 'avail_idxs') and self.avail_idxs is not None: + index = self.avail_idxs[index] + if self.indexed_ds is None: + self.indexed_ds = IndexedDataset(f'{self.data_dir}/{self.prefix}') + return self.indexed_ds[index] + + def __getitem__(self, index): + hparams = self.hparams + item = self._get_item(index) + max_frames = hparams['max_frames'] + spec = torch.Tensor(item['mel'])[:max_frames] + # mel2ph = torch.LongTensor(item['mel2ph'])[:max_frames] if 'mel2ph' in item else None + f0, uv = norm_interp_f0(item["f0"][:max_frames], hparams) + pitch = torch.LongTensor(item.get("pitch"))[:max_frames] + # print(item.keys(), item['mel'].shape, spec.shape) + sample = { + "id": index, + "item_name": item['item_name'], + "text": item['txt'], + "mel": spec, + "pitch": pitch, + "f0": f0, + "uv": uv, + # "mel2ph": mel2ph, + # "mel_nonpadding": spec.abs().sum(-1) > 0, + } + return sample + + def collater(self, samples): + if len(samples) == 0: + return {} + id = torch.LongTensor([s['id'] for s in samples]) + item_names = [s['item_name'] for s in samples] + text = [s['text'] for s in samples] + f0 = utils.collate_1d([s['f0'] for s in samples], 0.0) + pitch = utils.collate_1d([s['pitch'] for s in samples]) + uv = utils.collate_1d([s['uv'] for s in samples]) + mels = utils.collate_2d([s['mel'] for s in samples], 0.0) + mel_lengths = torch.LongTensor([s['mel'].shape[0] for s in samples]) + # mel2ph = utils.collate_1d([s['mel2ph'] for s in samples], 0.0) \ + # if samples[0]['mel2ph'] is not None else None + # mel_nonpaddings = utils.collate_1d([s['mel_nonpadding'].float() for s in samples], 0.0) + + batch = { + 'id': id, + 'item_name': item_names, + 'nsamples': len(samples), + 'text': text, + 'mels': mels, + 'mel_lengths': mel_lengths, + 'pitch': pitch, + # 'mel2ph': mel2ph, + # 'mel_nonpaddings': mel_nonpaddings, + 'f0': f0, + 'uv': uv, + } + return batch + + +class PitchExtractionTask(FastSpeech2Task): + def __init__(self): + super().__init__() + self.dataset_cls = PeDataset + + def build_tts_model(self): + self.model = PitchExtractor(conv_layers=hparams['pitch_extractor_conv_layers']) + + # def build_scheduler(self, optimizer): + # return torch.optim.lr_scheduler.StepLR(optimizer, hparams['decay_steps'], gamma=0.5) + def _training_step(self, sample, batch_idx, _): + loss_output = self.run_model(self.model, sample) + total_loss = sum([v for v in loss_output.values() if isinstance(v, torch.Tensor) and v.requires_grad]) + loss_output['batch_size'] = sample['mels'].size()[0] + return total_loss, loss_output + + def validation_step(self, sample, batch_idx): + outputs = {} + outputs['losses'] = {} + outputs['losses'], model_out = self.run_model(self.model, sample, return_output=True, infer=True) + outputs['total_loss'] = sum(outputs['losses'].values()) + outputs['nsamples'] = sample['nsamples'] + outputs = utils.tensors_to_scalars(outputs) + if batch_idx < hparams['num_valid_plots']: + self.plot_pitch(batch_idx, model_out, sample) + return outputs + + def run_model(self, model, sample, return_output=False, infer=False): + f0 = sample['f0'] + uv = sample['uv'] + output = model(sample['mels']) + losses = {} + self.add_pitch_loss(output, sample, losses) + if not return_output: + return losses + else: + return losses, output + + def plot_pitch(self, batch_idx, model_out, sample): + gt_f0 = denorm_f0(sample['f0'], sample['uv'], hparams) + self.logger.experiment.add_figure( + f'f0_{batch_idx}', + f0_to_figure(gt_f0[0], None, model_out['f0_denorm_pred'][0]), + self.global_step) + + def add_pitch_loss(self, output, sample, losses): + # mel2ph = sample['mel2ph'] # [B, T_s] + mel = sample['mels'] + f0 = sample['f0'] + uv = sample['uv'] + # nonpadding = (mel2ph != 0).float() if hparams['pitch_type'] == 'frame' \ + # else (sample['txt_tokens'] != 0).float() + nonpadding = (mel.abs().sum(-1) > 0).float() # sample['mel_nonpaddings'] + # print(nonpadding[0][-8:], nonpadding.shape) + self.add_f0_loss(output['pitch_pred'], f0, uv, losses, nonpadding=nonpadding) \ No newline at end of file diff --git a/training/rgsdgsd.py b/training/rgsdgsd.py new file mode 100644 index 0000000000000000000000000000000000000000..527fd3fc836518096a691c37a2d9a9a578441413 --- /dev/null +++ b/training/rgsdgsd.py @@ -0,0 +1,107 @@ +import pygame +import mido +from gtts import gTTS +from pygame.locals import MOUSEBUTTONDOWN, KEYDOWN, K_RETURN +import time + +# Initialize the click count and the time of the last click +click_count = 0 +last_click_time = 0 + +# Initialize Pygame and create a window for the MIDI editor +pygame.init() +screen = pygame.display.set_mode((640, 480)) + +class MidiNote: + def __init__(self, note, velocity, time, x, y, width, height): + self.note = note + self.velocity = velocity + self.time = time + self.x = x + self.y = y + self.width = width + self.height = height + + +# Create a new MIDI file +mid = mido.MidiFile() + +# Create a new MIDI track +track = mido.MidiTrack() + +# Add the MIDI track to the file +mid.tracks.append(track) + +# Create a function to add lyrics to a specific MIDI note +def add_lyrics(note, lyrics): + # Add the lyrics to the MIDI note + note.text = lyrics + # Update the MIDI file with the new lyrics + mid.save("song.mid") + +# Create a function to get the MIDI note that was clicked on +def get_clicked_note(pos): + # Iterate through the MIDI notes in the file + for track in mid.tracks: + for note in track: + if isinstance(note, mido.Message): + # Check if the mouse position is within the bounds of the MIDI note + if pos[0] > note.x and pos[0] < note.x + note.width: + if pos[1] > note.y and pos[1] < note.y + note.height: + return note + return None + +# Create a function to convert the lyrics from Japanese to speech +def speak_lyrics(lyrics): + tts = gTTS(lyrics, lang='ja') + tts.save('lyrics.mp3') + pygame.mixer.music.load('lyrics.mp3') + pygame.mixer.music.play() + + +# Main loop +while True: + for event in pygame.event.get(): + if event.type == MOUSEBUTTONDOWN: + # Increment the click count + click_count += 1 + # Check if the user double-clicked on a MIDI note + if time.time() - last_click_time < 0.5: + # Get the MIDI note that was clicked on + note = get_clicked_note(event.pos) + # Add the lyrics to the MIDI note + add_lyrics(note, lyrics) + # Reset the click count + click_count = 0 + # Update the time of the last click + last_click_time = time.time() + if event.type == KEYDOWN: + if event.key == K_RETURN: + # Get the lyrics from the input field + lyrics = input_field.get_text() + # Convert the lyrics to speech and play them + speak_lyrics(lyrics) + # If the click count is not reset, it means that the user has single-clicked + if click_count == 1: + # Get the position of the single click + pos = pygame.mouse.get_pos() + # Create a new MIDI note with the specified position and length + note = MidiNote(60, 64, 0, event.pos[0], event.pos[1], 100, 100) + note.x = pos[0] + note.y = pos[1] + note.width = 100 + note.height = 100 + # Add the MIDI note to the track + track.append(note) + mid.save("song.mid") + # Reset the click count + click_count = 0 + lyrics = "" + + input_field = pygame.font.Font(None, 32).render(lyrics, True, (0, 0, 0)) + + # Display the input field on the window + screen.blit(input_field, (10, 10)) + pygame.display.flip() + + diff --git a/training/song.mid b/training/song.mid new file mode 100644 index 0000000000000000000000000000000000000000..2e11450ce1dede01b45d22381102e424236fc679 Binary files /dev/null and b/training/song.mid differ diff --git a/training/task/SVC_task.py b/training/task/SVC_task.py new file mode 100644 index 0000000000000000000000000000000000000000..56c66751af4a279adeb76089b45d0ab93b29e6ad --- /dev/null +++ b/training/task/SVC_task.py @@ -0,0 +1,223 @@ +import torch + +import utils +from utils.hparams import hparams +from network.diff.net import DiffNet +from network.diff.diffusion import GaussianDiffusion, OfflineGaussianDiffusion +from training.task.fs2 import FastSpeech2Task +from network.vocoders.base_vocoder import get_vocoder_cls, BaseVocoder +from modules.fastspeech.tts_modules import mel2ph_to_dur + +from network.diff.candidate_decoder import FFT +from utils.pitch_utils import denorm_f0 +from training.dataset.fs2_utils import FastSpeechDataset + +import numpy as np +import os +import torch.nn.functional as F + +DIFF_DECODERS = { + 'wavenet': lambda hp: DiffNet(hp['audio_num_mel_bins']), + 'fft': lambda hp: FFT( + hp['hidden_size'], hp['dec_layers'], hp['dec_ffn_kernel_size'], hp['num_heads']), +} + + +class SVCDataset(FastSpeechDataset): + def collater(self, samples): + from preprocessing.process_pipeline import File2Batch + return File2Batch.processed_input2batch(samples) + + +class SVCTask(FastSpeech2Task): + def __init__(self): + super(SVCTask, self).__init__() + self.dataset_cls = SVCDataset + self.vocoder: BaseVocoder = get_vocoder_cls(hparams)() + + def build_tts_model(self): + # import torch + # from tqdm import tqdm + # v_min = torch.ones([80]) * 100 + # v_max = torch.ones([80]) * -100 + # for i, ds in enumerate(tqdm(self.dataset_cls('train'))): + # v_max = torch.max(torch.max(ds['mel'].reshape(-1, 80), 0)[0], v_max) + # v_min = torch.min(torch.min(ds['mel'].reshape(-1, 80), 0)[0], v_min) + # if i % 100 == 0: + # print(i, v_min, v_max) + # print('final', v_min, v_max) + mel_bins = hparams['audio_num_mel_bins'] + self.model = GaussianDiffusion( + phone_encoder=self.phone_encoder, + out_dims=mel_bins, denoise_fn=DIFF_DECODERS[hparams['diff_decoder_type']](hparams), + timesteps=hparams['timesteps'], + K_step=hparams['K_step'], + loss_type=hparams['diff_loss_type'], + spec_min=hparams['spec_min'], spec_max=hparams['spec_max'], + ) + + + def build_optimizer(self, model): + self.optimizer = optimizer = torch.optim.AdamW( + filter(lambda p: p.requires_grad, model.parameters()), + lr=hparams['lr'], + betas=(hparams['optimizer_adam_beta1'], hparams['optimizer_adam_beta2']), + weight_decay=hparams['weight_decay']) + return optimizer + + def run_model(self, model, sample, return_output=False, infer=False): + ''' + steps: + 1. run the full model, calc the main loss + 2. calculate loss for dur_predictor, pitch_predictor, energy_predictor + ''' + hubert = sample['hubert'] # [B, T_t,H] + target = sample['mels'] # [B, T_s, 80] + mel2ph = sample['mel2ph'] # [B, T_s] + f0 = sample['f0'] + uv = sample['uv'] + energy = sample['energy'] + + spk_embed = sample.get('spk_embed') if not hparams['use_spk_id'] else sample.get('spk_ids') + if hparams['pitch_type'] == 'cwt': + # NOTE: this part of script is *isolated* from other scripts, which means + # it may not be compatible with the current version. + pass + # cwt_spec = sample[f'cwt_spec'] + # f0_mean = sample['f0_mean'] + # f0_std = sample['f0_std'] + # sample['f0_cwt'] = f0 = model.cwt2f0_norm(cwt_spec, f0_mean, f0_std, mel2ph) + + # output == ret + # model == src.diff.diffusion.GaussianDiffusion + output = model(hubert, mel2ph=mel2ph, spk_embed=spk_embed, + ref_mels=target, f0=f0, uv=uv, energy=energy, infer=infer) + + losses = {} + if 'diff_loss' in output: + losses['mel'] = output['diff_loss'] + #self.add_dur_loss(output['dur'], mel2ph, txt_tokens, sample['word_boundary'], losses=losses) + # if hparams['use_pitch_embed']: + # self.add_pitch_loss(output, sample, losses) + # if hparams['use_energy_embed']: + # self.add_energy_loss(output['energy_pred'], energy, losses) + if not return_output: + return losses + else: + return losses, output + + def _training_step(self, sample, batch_idx, _): + log_outputs = self.run_model(self.model, sample) + total_loss = sum([v for v in log_outputs.values() if isinstance(v, torch.Tensor) and v.requires_grad]) + log_outputs['batch_size'] = sample['hubert'].size()[0] + log_outputs['lr'] = self.scheduler.get_lr()[0] + return total_loss, log_outputs + + def build_scheduler(self, optimizer): + return torch.optim.lr_scheduler.StepLR(optimizer, hparams['decay_steps'], gamma=0.5) + + def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx): + if optimizer is None: + return + optimizer.step() + optimizer.zero_grad() + if self.scheduler is not None: + self.scheduler.step(self.global_step // hparams['accumulate_grad_batches']) + + def validation_step(self, sample, batch_idx): + outputs = {} + hubert = sample['hubert'] # [B, T_t] + + target = sample['mels'] # [B, T_s, 80] + energy = sample['energy'] + # fs2_mel = sample['fs2_mels'] + spk_embed = sample.get('spk_embed') if not hparams['use_spk_id'] else sample.get('spk_ids') + mel2ph = sample['mel2ph'] + + outputs['losses'] = {} + + outputs['losses'], model_out = self.run_model(self.model, sample, return_output=True, infer=False) + + outputs['total_loss'] = sum(outputs['losses'].values()) + outputs['nsamples'] = sample['nsamples'] + outputs = utils.tensors_to_scalars(outputs) + if batch_idx < hparams['num_valid_plots']: + model_out = self.model( + hubert, spk_embed=spk_embed, mel2ph=mel2ph, f0=sample['f0'], uv=sample['uv'], energy=energy, ref_mels=None, infer=True + ) + + if hparams.get('pe_enable') is not None and hparams['pe_enable']: + gt_f0 = self.pe(sample['mels'])['f0_denorm_pred'] # pe predict from GT mel + pred_f0 = self.pe(model_out['mel_out'])['f0_denorm_pred'] # pe predict from Pred mel + else: + gt_f0 = denorm_f0(sample['f0'], sample['uv'], hparams) + pred_f0 = model_out.get('f0_denorm') + self.plot_wav(batch_idx, sample['mels'], model_out['mel_out'], is_mel=True, gt_f0=gt_f0, f0=pred_f0) + self.plot_mel(batch_idx, sample['mels'], model_out['mel_out'], name=f'diffmel_{batch_idx}') + #self.plot_mel(batch_idx, sample['mels'], model_out['fs2_mel'], name=f'fs2mel_{batch_idx}') + if hparams['use_pitch_embed']: + self.plot_pitch(batch_idx, sample, model_out) + return outputs + + def add_dur_loss(self, dur_pred, mel2ph, txt_tokens, wdb, losses=None): + """ + the effect of each loss component: + hparams['dur_loss'] : align each phoneme + hparams['lambda_word_dur']: align each word + hparams['lambda_sent_dur']: align each sentence + + :param dur_pred: [B, T], float, log scale + :param mel2ph: [B, T] + :param txt_tokens: [B, T] + :param losses: + :return: + """ + B, T = txt_tokens.shape + nonpadding = (txt_tokens != 0).float() + dur_gt = mel2ph_to_dur(mel2ph, T).float() * nonpadding + is_sil = torch.zeros_like(txt_tokens).bool() + for p in self.sil_ph: + is_sil = is_sil | (txt_tokens == self.phone_encoder.encode(p)[0]) + is_sil = is_sil.float() # [B, T_txt] + + # phone duration loss + if hparams['dur_loss'] == 'mse': + losses['pdur'] = F.mse_loss(dur_pred, (dur_gt + 1).log(), reduction='none') + losses['pdur'] = (losses['pdur'] * nonpadding).sum() / nonpadding.sum() + losses['pdur'] = losses['pdur'] * hparams['lambda_ph_dur'] + dur_pred = (dur_pred.exp() - 1).clamp(min=0) + else: + raise NotImplementedError + + # use linear scale for sent and word duration + if hparams['lambda_word_dur'] > 0: + #idx = F.pad(wdb.cumsum(axis=1), (1, 0))[:, :-1] + idx = wdb.cumsum(axis=1) + # word_dur_g = dur_gt.new_zeros([B, idx.max() + 1]).scatter_(1, idx, midi_dur) # midi_dur can be implied by add gt-ph_dur + word_dur_p = dur_pred.new_zeros([B, idx.max() + 1]).scatter_add(1, idx, dur_pred) + word_dur_g = dur_gt.new_zeros([B, idx.max() + 1]).scatter_add(1, idx, dur_gt) + wdur_loss = F.mse_loss((word_dur_p + 1).log(), (word_dur_g + 1).log(), reduction='none') + word_nonpadding = (word_dur_g > 0).float() + wdur_loss = (wdur_loss * word_nonpadding).sum() / word_nonpadding.sum() + losses['wdur'] = wdur_loss * hparams['lambda_word_dur'] + if hparams['lambda_sent_dur'] > 0: + sent_dur_p = dur_pred.sum(-1) + sent_dur_g = dur_gt.sum(-1) + sdur_loss = F.mse_loss((sent_dur_p + 1).log(), (sent_dur_g + 1).log(), reduction='mean') + losses['sdur'] = sdur_loss.mean() * hparams['lambda_sent_dur'] + + ############ + # validation plots + ############ + def plot_wav(self, batch_idx, gt_wav, wav_out, is_mel=False, gt_f0=None, f0=None, name=None): + gt_wav = gt_wav[0].cpu().numpy() + wav_out = wav_out[0].cpu().numpy() + gt_f0 = gt_f0[0].cpu().numpy() + f0 = f0[0].cpu().numpy() + if is_mel: + gt_wav = self.vocoder.spec2wav(gt_wav, f0=gt_f0) + wav_out = self.vocoder.spec2wav(wav_out, f0=f0) + self.logger.experiment.add_audio(f'gt_{batch_idx}', gt_wav, sample_rate=hparams['audio_sample_rate'], global_step=self.global_step) + self.logger.experiment.add_audio(f'wav_{batch_idx}', wav_out, sample_rate=hparams['audio_sample_rate'], global_step=self.global_step) + + diff --git a/training/task/__pycache__/SVC_task.cpython-38.pyc b/training/task/__pycache__/SVC_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6765b70454177192d8f4d3da60bf3bedc96b2ce9 Binary files /dev/null and b/training/task/__pycache__/SVC_task.cpython-38.pyc differ diff --git a/training/task/__pycache__/base_task.cpython-38.pyc b/training/task/__pycache__/base_task.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..080fb85834185c5f92f7c71f7c94ac576bd272c9 Binary files /dev/null and b/training/task/__pycache__/base_task.cpython-38.pyc differ diff --git a/training/task/__pycache__/fs2.cpython-38.pyc b/training/task/__pycache__/fs2.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f430f42481655afd814db339415b6962a6da1650 Binary files /dev/null and b/training/task/__pycache__/fs2.cpython-38.pyc differ diff --git a/training/task/__pycache__/tts.cpython-38.pyc b/training/task/__pycache__/tts.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4afe7b2dd3854a00ca49544b163578c5d4f49781 Binary files /dev/null and b/training/task/__pycache__/tts.cpython-38.pyc differ diff --git a/training/task/base_task.py b/training/task/base_task.py new file mode 100644 index 0000000000000000000000000000000000000000..369bd18da4aeceaf45da0f913be6bc3a1948aa4b --- /dev/null +++ b/training/task/base_task.py @@ -0,0 +1,337 @@ +from datetime import datetime +import shutil + +import matplotlib + +matplotlib.use('Agg') + +from utils.hparams import hparams, set_hparams +import random +import sys +import numpy as np +import torch.distributed as dist +from pytorch_lightning.loggers import TensorBoardLogger +from utils.pl_utils import LatestModelCheckpoint, BaseTrainer, data_loader, DDP +from torch import nn +import torch.utils.data +import utils +import logging +import os + +torch.multiprocessing.set_sharing_strategy(os.getenv('TORCH_SHARE_STRATEGY', 'file_system')) + +log_format = '%(asctime)s %(message)s' +logging.basicConfig(stream=sys.stdout, level=logging.INFO, + format=log_format, datefmt='%m/%d %I:%M:%S %p') + +class BaseTask(nn.Module): + ''' + Base class for training tasks. + 1. *load_ckpt*: + load checkpoint; + 2. *training_step*: + record and log the loss; + 3. *optimizer_step*: + run backwards step; + 4. *start*: + load training configs, backup code, log to tensorboard, start training; + 5. *configure_ddp* and *init_ddp_connection*: + start parallel training. + + Subclasses should define: + 1. *build_model*, *build_optimizer*, *build_scheduler*: + how to build the model, the optimizer and the training scheduler; + 2. *_training_step*: + one training step of the model; + 3. *validation_end* and *_validation_end*: + postprocess the validation output. + ''' + def __init__(self, *args, **kwargs): + # dataset configs + super(BaseTask, self).__init__(*args, **kwargs) + self.current_epoch = 0 + self.global_step = 0 + self.loaded_optimizer_states_dict = {} + self.trainer = None + self.logger = None + self.on_gpu = False + self.use_dp = False + self.use_ddp = False + self.example_input_array = None + + self.max_tokens = hparams['max_tokens'] + self.max_sentences = hparams['max_sentences'] + self.max_eval_tokens = hparams['max_eval_tokens'] + if self.max_eval_tokens == -1: + hparams['max_eval_tokens'] = self.max_eval_tokens = self.max_tokens + self.max_eval_sentences = hparams['max_eval_sentences'] + if self.max_eval_sentences == -1: + hparams['max_eval_sentences'] = self.max_eval_sentences = self.max_sentences + + self.model = None + self.training_losses_meter = None + + ########### + # Training, validation and testing + ########### + def build_model(self): + raise NotImplementedError + + def load_ckpt(self, ckpt_base_dir, current_model_name=None, model_name='model', force=True, strict=True): + # This function is updated on 2021.12.13 + if current_model_name is None: + current_model_name = model_name + utils.load_ckpt(self.__getattr__(current_model_name), ckpt_base_dir, current_model_name, force, strict) + + def on_epoch_start(self): + self.training_losses_meter = {'total_loss': utils.AvgrageMeter()} + + def _training_step(self, sample, batch_idx, optimizer_idx): + """ + + :param sample: + :param batch_idx: + :return: total loss: torch.Tensor, loss_log: dict + """ + raise NotImplementedError + + def training_step(self, sample, batch_idx, optimizer_idx=-1): + loss_ret = self._training_step(sample, batch_idx, optimizer_idx) + self.opt_idx = optimizer_idx + if loss_ret is None: + return {'loss': None} + total_loss, log_outputs = loss_ret + log_outputs = utils.tensors_to_scalars(log_outputs) + for k, v in log_outputs.items(): + if k not in self.training_losses_meter: + self.training_losses_meter[k] = utils.AvgrageMeter() + if not np.isnan(v): + self.training_losses_meter[k].update(v) + self.training_losses_meter['total_loss'].update(total_loss.item()) + + try: + log_outputs['lr'] = self.scheduler.get_lr() + if isinstance(log_outputs['lr'], list): + log_outputs['lr'] = log_outputs['lr'][0] + except: + pass + + # log_outputs['all_loss'] = total_loss.item() + progress_bar_log = log_outputs + tb_log = {f'tr/{k}': v for k, v in log_outputs.items()} + return { + 'loss': total_loss, + 'progress_bar': progress_bar_log, + 'log': tb_log + } + + def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx): + optimizer.step() + optimizer.zero_grad() + if self.scheduler is not None: + self.scheduler.step(self.global_step // hparams['accumulate_grad_batches']) + + def on_epoch_end(self): + loss_outputs = {k: round(v.avg, 4) for k, v in self.training_losses_meter.items()} + print(f"\n==============\n " + f"Epoch {self.current_epoch} ended. Steps: {self.global_step}. {loss_outputs}" + f"\n==============\n") + + def validation_step(self, sample, batch_idx): + """ + + :param sample: + :param batch_idx: + :return: output: dict + """ + raise NotImplementedError + + def _validation_end(self, outputs): + """ + + :param outputs: + :return: loss_output: dict + """ + raise NotImplementedError + + def validation_end(self, outputs): + loss_output = self._validation_end(outputs) + print(f"\n==============\n " + f"valid results: {loss_output}" + f"\n==============\n") + return { + 'log': {f'val/{k}': v for k, v in loss_output.items()}, + 'val_loss': loss_output['total_loss'] + } + + def build_scheduler(self, optimizer): + raise NotImplementedError + + def build_optimizer(self, model): + raise NotImplementedError + + def configure_optimizers(self): + optm = self.build_optimizer(self.model) + self.scheduler = self.build_scheduler(optm) + return [optm] + + def test_start(self): + pass + + def test_step(self, sample, batch_idx): + return self.validation_step(sample, batch_idx) + + def test_end(self, outputs): + return self.validation_end(outputs) + + ########### + # Running configuration + ########### + + @classmethod + def start(cls): + set_hparams() + os.environ['MASTER_PORT'] = str(random.randint(15000, 30000)) + random.seed(hparams['seed']) + np.random.seed(hparams['seed']) + task = cls() + work_dir = hparams['work_dir'] + trainer = BaseTrainer(checkpoint_callback=LatestModelCheckpoint( + filepath=work_dir, + verbose=True, + monitor='val_loss', + mode='min', + num_ckpt_keep=hparams['num_ckpt_keep'], + save_best=hparams['save_best'], + period=1 if hparams['save_ckpt'] else 100000 + ), + logger=TensorBoardLogger( + save_dir=work_dir, + name='lightning_logs', + version='lastest' + ), + gradient_clip_val=hparams['clip_grad_norm'], + val_check_interval=hparams['val_check_interval'], + row_log_interval=hparams['log_interval'], + max_updates=hparams['max_updates'], + num_sanity_val_steps=hparams['num_sanity_val_steps'] if not hparams[ + 'validate'] else 10000, + accumulate_grad_batches=hparams['accumulate_grad_batches']) + if not hparams['infer']: # train + # copy_code = input(f'{hparams["save_codes"]} code backup? y/n: ') == 'y' + # copy_code = True # backup code every time + # if copy_code: + # t = datetime.now().strftime('%Y%m%d%H%M%S') + # code_dir = f'{work_dir}/codes/{t}' + # # TODO: test filesystem calls + # os.makedirs(code_dir, exist_ok=True) + # # subprocess.check_call(f'mkdir "{code_dir}"', shell=True) + # for c in hparams['save_codes']: + # shutil.copytree(c, code_dir, dirs_exist_ok=True) + # # subprocess.check_call(f'xcopy "{c}" "{code_dir}/" /s /e /y', shell=True) + # print(f"| Copied codes to {code_dir}.") + trainer.checkpoint_callback.task = task + trainer.fit(task) + else: + trainer.test(task) + + def configure_ddp(self, model, device_ids): + model = DDP( + model, + device_ids=device_ids, + find_unused_parameters=True + ) + if dist.get_rank() != 0 and not hparams['debug']: + sys.stdout = open(os.devnull, "w") + sys.stderr = open(os.devnull, "w") + random.seed(hparams['seed']) + np.random.seed(hparams['seed']) + return model + + def training_end(self, *args, **kwargs): + return None + + def init_ddp_connection(self, proc_rank, world_size): + set_hparams(print_hparams=False) + # guarantees unique ports across jobs from same grid search + default_port = 12910 + # if user gave a port number, use that one instead + try: + default_port = os.environ['MASTER_PORT'] + except Exception: + os.environ['MASTER_PORT'] = str(default_port) + + # figure out the root node addr + root_node = '127.0.0.2' + root_node = self.trainer.resolve_root_node_address(root_node) + os.environ['MASTER_ADDR'] = root_node + dist.init_process_group('nccl', rank=proc_rank, world_size=world_size) + + @data_loader + def train_dataloader(self): + return None + + @data_loader + def test_dataloader(self): + return None + + @data_loader + def val_dataloader(self): + return None + + def on_load_checkpoint(self, checkpoint): + pass + + def on_save_checkpoint(self, checkpoint): + pass + + def on_sanity_check_start(self): + pass + + def on_train_start(self): + pass + + def on_train_end(self): + pass + + def on_batch_start(self, batch): + pass + + def on_batch_end(self): + pass + + def on_pre_performance_check(self): + pass + + def on_post_performance_check(self): + pass + + def on_before_zero_grad(self, optimizer): + pass + + def on_after_backward(self): + pass + + def backward(self, loss, optimizer): + loss.backward() + + def grad_norm(self, norm_type): + results = {} + total_norm = 0 + for name, p in self.named_parameters(): + if p.requires_grad: + try: + param_norm = p.grad.data.norm(norm_type) + total_norm += param_norm ** norm_type + norm = param_norm ** (1 / norm_type) + + grad = round(norm.data.cpu().numpy().flatten()[0], 3) + results['grad_{}_norm_{}'.format(norm_type, name)] = grad + except Exception: + # this param had no grad + pass + + total_norm = total_norm ** (1. / norm_type) + grad = round(total_norm.data.cpu().numpy().flatten()[0], 3) + results['grad_{}_norm_total'.format(norm_type)] = grad + return results diff --git a/training/task/fs2.py b/training/task/fs2.py new file mode 100644 index 0000000000000000000000000000000000000000..4e1618297cf869721c283baffa2fd7bda1c89020 --- /dev/null +++ b/training/task/fs2.py @@ -0,0 +1,539 @@ +import matplotlib + +matplotlib.use('Agg') + +from utils import audio +import matplotlib.pyplot as plt +from preprocessing.data_gen_utils import get_pitch_parselmouth +from training.dataset.fs2_utils import FastSpeechDataset +from utils.cwt import cwt2f0 +from utils.pl_utils import data_loader +import os +from multiprocessing.pool import Pool +from tqdm import tqdm +from modules.fastspeech.tts_modules import mel2ph_to_dur +from utils.hparams import hparams +from utils.plot import spec_to_figure, dur_to_figure, f0_to_figure +from utils.pitch_utils import denorm_f0 +from modules.fastspeech.fs2 import FastSpeech2 +from training.task.tts import TtsTask +import torch +import torch.optim +import torch.utils.data +import torch.nn.functional as F +import utils +import torch.distributions +import numpy as np +from modules.commons.ssim import ssim + +class FastSpeech2Task(TtsTask): + def __init__(self): + super(FastSpeech2Task, self).__init__() + self.dataset_cls = FastSpeechDataset + self.mse_loss_fn = torch.nn.MSELoss() + mel_losses = hparams['mel_loss'].split("|") + self.loss_and_lambda = {} + for i, l in enumerate(mel_losses): + if l == '': + continue + if ':' in l: + l, lbd = l.split(":") + lbd = float(lbd) + else: + lbd = 1.0 + self.loss_and_lambda[l] = lbd + print("| Mel losses:", self.loss_and_lambda) + #self.sil_ph = self.phone_encoder.sil_phonemes() + + @data_loader + def train_dataloader(self): + train_dataset = self.dataset_cls(hparams['train_set_name'], shuffle=True) + return self.build_dataloader(train_dataset, True, self.max_tokens, self.max_sentences, + endless=hparams['endless_ds']) + + @data_loader + def val_dataloader(self): + valid_dataset = self.dataset_cls(hparams['valid_set_name'], shuffle=False) + return self.build_dataloader(valid_dataset, False, self.max_eval_tokens, self.max_eval_sentences) + + @data_loader + def test_dataloader(self): + test_dataset = self.dataset_cls(hparams['test_set_name'], shuffle=False) + return self.build_dataloader(test_dataset, False, self.max_eval_tokens, + self.max_eval_sentences, batch_by_size=False) + + def build_tts_model(self): + ''' + rewrite + ''' + return + # self.model = FastSpeech2(self.phone_encoder) + + def build_model(self): + self.build_tts_model() + if hparams['load_ckpt'] != '': + self.load_ckpt(hparams['load_ckpt'], strict=True) + utils.print_arch(self.model) + return self.model + + def _training_step(self, sample, batch_idx, _): + ''' + rewrite + ''' + return + # loss_output = self.run_model(self.model, sample) + # total_loss = sum([v for v in loss_output.values() if isinstance(v, torch.Tensor) and v.requires_grad]) + # loss_output['batch_size'] = sample['txt_tokens'].size()[0] + # return total_loss, loss_output + + def validation_step(self, sample, batch_idx): + ''' + rewrite + ''' + return + # outputs = {} + # outputs['losses'] = {} + # outputs['losses'], model_out = self.run_model(self.model, sample, return_output=True) + # outputs['total_loss'] = sum(outputs['losses'].values()) + # outputs['nsamples'] = sample['nsamples'] + # mel_out = self.model.out2mel(model_out['mel_out']) + # outputs = utils.tensors_to_scalars(outputs) + # if batch_idx < hparams['num_valid_plots']: + # self.plot_mel(batch_idx, sample['mels'], mel_out) + # self.plot_dur(batch_idx, sample, model_out) + # if hparams['use_pitch_embed']: + # self.plot_pitch(batch_idx, sample, model_out) + # return outputs + + def _validation_end(self, outputs): + all_losses_meter = { + 'total_loss': utils.AvgrageMeter(), + } + for output in outputs: + n = output['nsamples'] + for k, v in output['losses'].items(): + if k not in all_losses_meter: + all_losses_meter[k] = utils.AvgrageMeter() + all_losses_meter[k].update(v, n) + all_losses_meter['total_loss'].update(output['total_loss'], n) + return {k: round(v.avg, 4) for k, v in all_losses_meter.items()} + + def run_model(self, model, sample, return_output=False): + ''' + rewrite + ''' + return + txt_tokens = sample['txt_tokens'] # [B, T_t] + target = sample['mels'] # [B, T_s, 80] + mel2ph = sample['mel2ph'] # [B, T_s] + f0 = sample['f0'] + uv = sample['uv'] + energy = sample['energy'] + spk_embed = sample.get('spk_embed') if not hparams['use_spk_id'] else sample.get('spk_ids') + if hparams['pitch_type'] == 'cwt': + cwt_spec = sample[f'cwt_spec'] + f0_mean = sample['f0_mean'] + f0_std = sample['f0_std'] + sample['f0_cwt'] = f0 = model.cwt2f0_norm(cwt_spec, f0_mean, f0_std, mel2ph) + + output = model(txt_tokens, mel2ph=mel2ph, spk_embed=spk_embed, + ref_mels=target, f0=f0, uv=uv, energy=energy, infer=False) + + losses = {} + self.add_mel_loss(output['mel_out'], target, losses) + self.add_dur_loss(output['dur'], mel2ph, txt_tokens, losses=losses) + if hparams['use_pitch_embed']: + self.add_pitch_loss(output, sample, losses) + if hparams['use_energy_embed']: + self.add_energy_loss(output['energy_pred'], energy, losses) + if not return_output: + return losses + else: + return losses, output + + ############ + # losses + ############ + def add_mel_loss(self, mel_out, target, losses, postfix='', mel_mix_loss=None): + if mel_mix_loss is None: + for loss_name, lbd in self.loss_and_lambda.items(): + if 'l1' == loss_name: + l = self.l1_loss(mel_out, target) + elif 'mse' == loss_name: + raise NotImplementedError + elif 'ssim' == loss_name: + l = self.ssim_loss(mel_out, target) + elif 'gdl' == loss_name: + raise NotImplementedError + losses[f'{loss_name}{postfix}'] = l * lbd + else: + raise NotImplementedError + + def l1_loss(self, decoder_output, target): + # decoder_output : B x T x n_mel + # target : B x T x n_mel + l1_loss = F.l1_loss(decoder_output, target, reduction='none') + weights = self.weights_nonzero_speech(target) + l1_loss = (l1_loss * weights).sum() / weights.sum() + return l1_loss + + def ssim_loss(self, decoder_output, target, bias=6.0): + # decoder_output : B x T x n_mel + # target : B x T x n_mel + assert decoder_output.shape == target.shape + weights = self.weights_nonzero_speech(target) + decoder_output = decoder_output[:, None] + bias + target = target[:, None] + bias + ssim_loss = 1 - ssim(decoder_output, target, size_average=False) + ssim_loss = (ssim_loss * weights).sum() / weights.sum() + return ssim_loss + + def add_dur_loss(self, dur_pred, mel2ph, txt_tokens, losses=None): + """ + + :param dur_pred: [B, T], float, log scale + :param mel2ph: [B, T] + :param txt_tokens: [B, T] + :param losses: + :return: + """ + B, T = txt_tokens.shape + nonpadding = (txt_tokens != 0).float() + dur_gt = mel2ph_to_dur(mel2ph, T).float() * nonpadding + is_sil = torch.zeros_like(txt_tokens).bool() + for p in self.sil_ph: + is_sil = is_sil | (txt_tokens == self.phone_encoder.encode(p)[0]) + is_sil = is_sil.float() # [B, T_txt] + + # phone duration loss + if hparams['dur_loss'] == 'mse': + losses['pdur'] = F.mse_loss(dur_pred, (dur_gt + 1).log(), reduction='none') + losses['pdur'] = (losses['pdur'] * nonpadding).sum() / nonpadding.sum() + dur_pred = (dur_pred.exp() - 1).clamp(min=0) + elif hparams['dur_loss'] == 'mog': + return NotImplementedError + elif hparams['dur_loss'] == 'crf': + losses['pdur'] = -self.model.dur_predictor.crf( + dur_pred, dur_gt.long().clamp(min=0, max=31), mask=nonpadding > 0, reduction='mean') + losses['pdur'] = losses['pdur'] * hparams['lambda_ph_dur'] + + # use linear scale for sent and word duration + if hparams['lambda_word_dur'] > 0: + word_id = (is_sil.cumsum(-1) * (1 - is_sil)).long() + word_dur_p = dur_pred.new_zeros([B, word_id.max() + 1]).scatter_add(1, word_id, dur_pred)[:, 1:] + word_dur_g = dur_gt.new_zeros([B, word_id.max() + 1]).scatter_add(1, word_id, dur_gt)[:, 1:] + wdur_loss = F.mse_loss((word_dur_p + 1).log(), (word_dur_g + 1).log(), reduction='none') + word_nonpadding = (word_dur_g > 0).float() + wdur_loss = (wdur_loss * word_nonpadding).sum() / word_nonpadding.sum() + losses['wdur'] = wdur_loss * hparams['lambda_word_dur'] + if hparams['lambda_sent_dur'] > 0: + sent_dur_p = dur_pred.sum(-1) + sent_dur_g = dur_gt.sum(-1) + sdur_loss = F.mse_loss((sent_dur_p + 1).log(), (sent_dur_g + 1).log(), reduction='mean') + losses['sdur'] = sdur_loss.mean() * hparams['lambda_sent_dur'] + + def add_pitch_loss(self, output, sample, losses): + if hparams['pitch_type'] == 'ph': + nonpadding = (sample['txt_tokens'] != 0).float() + pitch_loss_fn = F.l1_loss if hparams['pitch_loss'] == 'l1' else F.mse_loss + losses['f0'] = (pitch_loss_fn(output['pitch_pred'][:, :, 0], sample['f0'], + reduction='none') * nonpadding).sum() \ + / nonpadding.sum() * hparams['lambda_f0'] + return + mel2ph = sample['mel2ph'] # [B, T_s] + f0 = sample['f0'] + uv = sample['uv'] + nonpadding = (mel2ph != 0).float() + if hparams['pitch_type'] == 'cwt': + cwt_spec = sample[f'cwt_spec'] + f0_mean = sample['f0_mean'] + f0_std = sample['f0_std'] + cwt_pred = output['cwt'][:, :, :10] + f0_mean_pred = output['f0_mean'] + f0_std_pred = output['f0_std'] + losses['C'] = self.cwt_loss(cwt_pred, cwt_spec) * hparams['lambda_f0'] + if hparams['use_uv']: + assert output['cwt'].shape[-1] == 11 + uv_pred = output['cwt'][:, :, -1] + losses['uv'] = (F.binary_cross_entropy_with_logits(uv_pred, uv, reduction='none') * nonpadding) \ + .sum() / nonpadding.sum() * hparams['lambda_uv'] + losses['f0_mean'] = F.l1_loss(f0_mean_pred, f0_mean) * hparams['lambda_f0'] + losses['f0_std'] = F.l1_loss(f0_std_pred, f0_std) * hparams['lambda_f0'] + if hparams['cwt_add_f0_loss']: + f0_cwt_ = self.model.cwt2f0_norm(cwt_pred, f0_mean_pred, f0_std_pred, mel2ph) + self.add_f0_loss(f0_cwt_[:, :, None], f0, uv, losses, nonpadding=nonpadding) + elif hparams['pitch_type'] == 'frame': + self.add_f0_loss(output['pitch_pred'], f0, uv, losses, nonpadding=nonpadding) + + def add_f0_loss(self, p_pred, f0, uv, losses, nonpadding): + assert p_pred[..., 0].shape == f0.shape + if hparams['use_uv']: + assert p_pred[..., 1].shape == uv.shape + losses['uv'] = (F.binary_cross_entropy_with_logits( + p_pred[:, :, 1], uv, reduction='none') * nonpadding).sum() \ + / nonpadding.sum() * hparams['lambda_uv'] + nonpadding = nonpadding * (uv == 0).float() + + f0_pred = p_pred[:, :, 0] + if hparams['pitch_loss'] in ['l1', 'l2']: + pitch_loss_fn = F.l1_loss if hparams['pitch_loss'] == 'l1' else F.mse_loss + losses['f0'] = (pitch_loss_fn(f0_pred, f0, reduction='none') * nonpadding).sum() \ + / nonpadding.sum() * hparams['lambda_f0'] + elif hparams['pitch_loss'] == 'ssim': + return NotImplementedError + + def cwt_loss(self, cwt_p, cwt_g): + if hparams['cwt_loss'] == 'l1': + return F.l1_loss(cwt_p, cwt_g) + if hparams['cwt_loss'] == 'l2': + return F.mse_loss(cwt_p, cwt_g) + if hparams['cwt_loss'] == 'ssim': + return self.ssim_loss(cwt_p, cwt_g, 20) + + def add_energy_loss(self, energy_pred, energy, losses): + nonpadding = (energy != 0).float() + loss = (F.mse_loss(energy_pred, energy, reduction='none') * nonpadding).sum() / nonpadding.sum() + loss = loss * hparams['lambda_energy'] + losses['e'] = loss + + + ############ + # validation plots + ############ + def plot_mel(self, batch_idx, spec, spec_out, name=None): + spec_cat = torch.cat([spec, spec_out], -1) + name = f'mel_{batch_idx}' if name is None else name + vmin = hparams['mel_vmin'] + vmax = hparams['mel_vmax'] + self.logger.experiment.add_figure(name, spec_to_figure(spec_cat[0], vmin, vmax), self.global_step) + + def plot_dur(self, batch_idx, sample, model_out): + T_txt = sample['txt_tokens'].shape[1] + dur_gt = mel2ph_to_dur(sample['mel2ph'], T_txt)[0] + dur_pred = self.model.dur_predictor.out2dur(model_out['dur']).float() + txt = self.phone_encoder.decode(sample['txt_tokens'][0].cpu().numpy()) + txt = txt.split(" ") + self.logger.experiment.add_figure( + f'dur_{batch_idx}', dur_to_figure(dur_gt, dur_pred, txt), self.global_step) + + def plot_pitch(self, batch_idx, sample, model_out): + f0 = sample['f0'] + if hparams['pitch_type'] == 'ph': + mel2ph = sample['mel2ph'] + f0 = self.expand_f0_ph(f0, mel2ph) + f0_pred = self.expand_f0_ph(model_out['pitch_pred'][:, :, 0], mel2ph) + self.logger.experiment.add_figure( + f'f0_{batch_idx}', f0_to_figure(f0[0], None, f0_pred[0]), self.global_step) + return + f0 = denorm_f0(f0, sample['uv'], hparams) + if hparams['pitch_type'] == 'cwt': + # cwt + cwt_out = model_out['cwt'] + cwt_spec = cwt_out[:, :, :10] + cwt = torch.cat([cwt_spec, sample['cwt_spec']], -1) + self.logger.experiment.add_figure(f'cwt_{batch_idx}', spec_to_figure(cwt[0]), self.global_step) + # f0 + f0_pred = cwt2f0(cwt_spec, model_out['f0_mean'], model_out['f0_std'], hparams['cwt_scales']) + if hparams['use_uv']: + assert cwt_out.shape[-1] == 11 + uv_pred = cwt_out[:, :, -1] > 0 + f0_pred[uv_pred > 0] = 0 + f0_cwt = denorm_f0(sample['f0_cwt'], sample['uv'], hparams) + self.logger.experiment.add_figure( + f'f0_{batch_idx}', f0_to_figure(f0[0], f0_cwt[0], f0_pred[0]), self.global_step) + elif hparams['pitch_type'] == 'frame': + # f0 + #uv_pred = model_out['pitch_pred'][:, :, 0] > 0 + pitch_pred = denorm_f0(model_out['pitch_pred'][:, :, 0], sample['uv'], hparams) + self.logger.experiment.add_figure( + f'f0_{batch_idx}', f0_to_figure(f0[0], None, pitch_pred[0]), self.global_step) + + ############ + # infer + ############ + def test_step(self, sample, batch_idx): + spk_embed = sample.get('spk_embed') if not hparams['use_spk_id'] else sample.get('spk_ids') + hubert = sample['hubert'] + mel2ph, uv, f0 = None, None, None + ref_mels = None + if hparams['profile_infer']: + pass + else: + # if hparams['use_gt_dur']: + mel2ph = sample['mel2ph'] + #if hparams['use_gt_f0']: + f0 = sample['f0'] + uv = sample['uv'] + #print('Here using gt f0!!') + if hparams.get('use_midi') is not None and hparams['use_midi']: + outputs = self.model( + hubert, spk_embed=spk_embed, mel2ph=mel2ph, f0=f0, uv=uv, ref_mels=ref_mels, infer=True) + else: + outputs = self.model( + hubert, spk_embed=spk_embed, mel2ph=mel2ph, f0=f0, uv=uv, ref_mels=ref_mels, infer=True) + sample['outputs'] = self.model.out2mel(outputs['mel_out']) + sample['mel2ph_pred'] = outputs['mel2ph'] + if hparams.get('pe_enable') is not None and hparams['pe_enable']: + sample['f0'] = self.pe(sample['mels'])['f0_denorm_pred'] # pe predict from GT mel + sample['f0_pred'] = self.pe(sample['outputs'])['f0_denorm_pred'] # pe predict from Pred mel + else: + sample['f0'] = denorm_f0(sample['f0'], sample['uv'], hparams) + sample['f0_pred'] = outputs.get('f0_denorm') + return self.after_infer(sample) + + def after_infer(self, predictions): + if self.saving_result_pool is None and not hparams['profile_infer']: + self.saving_result_pool = Pool(min(int(os.getenv('N_PROC', os.cpu_count())), 16)) + self.saving_results_futures = [] + predictions = utils.unpack_dict_to_list(predictions) + t = tqdm(predictions) + for num_predictions, prediction in enumerate(t): + for k, v in prediction.items(): + if type(v) is torch.Tensor: + prediction[k] = v.cpu().numpy() + + item_name = prediction.get('item_name') + #text = prediction.get('text').replace(":", "%3A")[:80] + + # remove paddings + mel_gt = prediction["mels"] + mel_gt_mask = np.abs(mel_gt).sum(-1) > 0 + mel_gt = mel_gt[mel_gt_mask] + mel2ph_gt = prediction.get("mel2ph") + mel2ph_gt = mel2ph_gt[mel_gt_mask] if mel2ph_gt is not None else None + mel_pred = prediction["outputs"] + mel_pred_mask = np.abs(mel_pred).sum(-1) > 0 + mel_pred = mel_pred[mel_pred_mask] + mel_gt = np.clip(mel_gt, hparams['mel_vmin'], hparams['mel_vmax']) + mel_pred = np.clip(mel_pred, hparams['mel_vmin'], hparams['mel_vmax']) + + mel2ph_pred = prediction.get("mel2ph_pred") + if mel2ph_pred is not None: + if len(mel2ph_pred) > len(mel_pred_mask): + mel2ph_pred = mel2ph_pred[:len(mel_pred_mask)] + mel2ph_pred = mel2ph_pred[mel_pred_mask] + + f0_gt = prediction.get("f0") + f0_pred = f0_gt#prediction.get("f0_pred") + if f0_pred is not None: + f0_gt = f0_gt[mel_gt_mask] + if len(f0_pred) > len(mel_pred_mask): + f0_pred = f0_pred[:len(mel_pred_mask)] + f0_pred = f0_pred[mel_pred_mask] + text=None + str_phs = None + # if self.phone_encoder is not None and 'txt_tokens' in prediction: + # str_phs = self.phone_encoder.decode(prediction['txt_tokens'], strip_padding=True) + # def resize2d(source, target_len): + # source[source<0.001] = np.nan + # target = np.interp(np.linspace(0, len(source)-1, num=target_len,endpoint=True), np.arange(0, len(source)), source) + # return np.nan_to_num(target) + # def resize3d(source, target_len): + # newsource=[] + # for i in range(source.shape[1]): + # newsource.append(resize2d(source[:,i],target_len)) + # return np.array(newsource).transpose() + # print(mel_pred.shape) + # print(f0_pred.shape) + # mel_pred=resize3d(mel_pred,int(mel_pred.shape[0]/44100*24000)) + # f0_pred=resize2d(f0_pred,int(f0_pred.shape[0]/44100*24000)) + # print(mel_pred.shape) + # print(f0_pred.shape) + gen_dir = os.path.join(hparams['work_dir'], + f'generated_{self.trainer.global_step}_{hparams["gen_dir_name"]}') + wav_pred = self.vocoder.spec2wav(mel_pred, f0=f0_pred) + if not hparams['profile_infer']: + os.makedirs(gen_dir, exist_ok=True) + os.makedirs(f'{gen_dir}/wavs', exist_ok=True) + os.makedirs(f'{gen_dir}/plot', exist_ok=True) + os.makedirs(os.path.join(hparams['work_dir'], 'P_mels_npy'), exist_ok=True) + os.makedirs(os.path.join(hparams['work_dir'], 'G_mels_npy'), exist_ok=True) + self.saving_results_futures.append( + self.saving_result_pool.apply_async(self.save_result, args=[ + wav_pred, mel_pred, 'P', item_name, text, gen_dir, str_phs, mel2ph_pred, f0_gt, f0_pred])) + + if mel_gt is not None and hparams['save_gt']: + wav_gt = self.vocoder.spec2wav(mel_gt, f0=f0_gt) + self.saving_results_futures.append( + self.saving_result_pool.apply_async(self.save_result, args=[ + wav_gt, mel_gt, 'G', item_name, text, gen_dir, str_phs, mel2ph_gt, f0_gt, f0_pred])) + if hparams['save_f0']: + import matplotlib.pyplot as plt + # f0_pred_, _ = get_pitch(wav_pred, mel_pred, hparams) + f0_pred_ = f0_pred + f0_gt_, _ = get_pitch_parselmouth(wav_gt, mel_gt, hparams) + fig = plt.figure() + plt.plot(f0_pred_, label=r'$f0_P$') + plt.plot(f0_gt_, label=r'$f0_G$') + if hparams.get('pe_enable') is not None and hparams['pe_enable']: + # f0_midi = prediction.get("f0_midi") + # f0_midi = f0_midi[mel_gt_mask] + # plt.plot(f0_midi, label=r'$f0_M$') + pass + plt.legend() + plt.tight_layout() + plt.savefig(f'{gen_dir}/plot/[F0][{item_name}]{text}.png', format='png') + plt.close(fig) + + t.set_description( + f"Pred_shape: {mel_pred.shape}, gt_shape: {mel_gt.shape}") + else: + if 'gen_wav_time' not in self.stats: + self.stats['gen_wav_time'] = 0 + self.stats['gen_wav_time'] += len(wav_pred) / hparams['audio_sample_rate'] + print('gen_wav_time: ', self.stats['gen_wav_time']) + + return {} + + @staticmethod + def save_result(wav_out, mel, prefix, item_name, text, gen_dir, str_phs=None, mel2ph=None, gt_f0=None, pred_f0=None): + item_name = item_name.replace('/', '-') + base_fn = f'[{item_name}][{prefix}]' + + if text is not None: + base_fn += text + base_fn += ('-' + hparams['exp_name']) + np.save(os.path.join(hparams['work_dir'], f'{prefix}_mels_npy', item_name), mel) + audio.save_wav(wav_out, f'{gen_dir}/wavs/{base_fn}.wav', 24000,#hparams['audio_sample_rate'], + norm=hparams['out_wav_norm']) + fig = plt.figure(figsize=(14, 10)) + spec_vmin = hparams['mel_vmin'] + spec_vmax = hparams['mel_vmax'] + heatmap = plt.pcolor(mel.T, vmin=spec_vmin, vmax=spec_vmax) + fig.colorbar(heatmap) + if hparams.get('pe_enable') is not None and hparams['pe_enable']: + gt_f0 = (gt_f0 - 100) / (800 - 100) * 80 * (gt_f0 > 0) + pred_f0 = (pred_f0 - 100) / (800 - 100) * 80 * (pred_f0 > 0) + plt.plot(pred_f0, c='white', linewidth=1, alpha=0.6) + plt.plot(gt_f0, c='red', linewidth=1, alpha=0.6) + else: + f0, _ = get_pitch_parselmouth(wav_out, mel, hparams) + f0 = (f0 - 100) / (800 - 100) * 80 * (f0 > 0) + plt.plot(f0, c='white', linewidth=1, alpha=0.6) + if mel2ph is not None and str_phs is not None: + decoded_txt = str_phs.split(" ") + dur = mel2ph_to_dur(torch.LongTensor(mel2ph)[None, :], len(decoded_txt))[0].numpy() + dur = [0] + list(np.cumsum(dur)) + for i in range(len(dur) - 1): + shift = (i % 20) + 1 + plt.text(dur[i], shift, decoded_txt[i]) + plt.hlines(shift, dur[i], dur[i + 1], colors='b' if decoded_txt[i] != '|' else 'black') + plt.vlines(dur[i], 0, 5, colors='b' if decoded_txt[i] != '|' else 'black', + alpha=1, linewidth=1) + plt.tight_layout() + plt.savefig(f'{gen_dir}/plot/{base_fn}.png', format='png', dpi=1000) + plt.close(fig) + + ############## + # utils + ############## + @staticmethod + def expand_f0_ph(f0, mel2ph): + f0 = denorm_f0(f0, None, hparams) + f0 = F.pad(f0, [1, 0]) + f0 = torch.gather(f0, 1, mel2ph) # [B, T_mel] + return f0 + + +if __name__ == '__main__': + FastSpeech2Task.start() diff --git a/training/task/tts.py b/training/task/tts.py new file mode 100644 index 0000000000000000000000000000000000000000..acf360a510a8cfc290da8f08269eda051cc77d0f --- /dev/null +++ b/training/task/tts.py @@ -0,0 +1,131 @@ +from multiprocessing.pool import Pool + +import matplotlib + +from utils.pl_utils import data_loader +from utils.training_utils import RSQRTSchedule +from network.vocoders.base_vocoder import get_vocoder_cls, BaseVocoder +from modules.fastspeech.pe import PitchExtractor + +matplotlib.use('Agg') +import os +import numpy as np +from tqdm import tqdm +import torch.distributed as dist + +from training.task.base_task import BaseTask +from utils.hparams import hparams +from utils.text_encoder import TokenTextEncoder +import json +from preprocessing.hubertinfer import Hubertencoder +import torch +import torch.optim +import torch.utils.data +import utils + + + +class TtsTask(BaseTask): + def __init__(self, *args, **kwargs): + self.vocoder = None + self.phone_encoder = Hubertencoder(hparams['hubert_path']) + # self.padding_idx = self.phone_encoder.pad() + # self.eos_idx = self.phone_encoder.eos() + # self.seg_idx = self.phone_encoder.seg() + self.saving_result_pool = None + self.saving_results_futures = None + self.stats = {} + super().__init__(*args, **kwargs) + + def build_scheduler(self, optimizer): + return RSQRTSchedule(optimizer) + + def build_optimizer(self, model): + self.optimizer = optimizer = torch.optim.AdamW( + model.parameters(), + lr=hparams['lr']) + return optimizer + + def build_dataloader(self, dataset, shuffle, max_tokens=None, max_sentences=None, + required_batch_size_multiple=-1, endless=False, batch_by_size=True): + devices_cnt = torch.cuda.device_count() + if devices_cnt == 0: + devices_cnt = 1 + if required_batch_size_multiple == -1: + required_batch_size_multiple = devices_cnt + + def shuffle_batches(batches): + np.random.shuffle(batches) + return batches + + if max_tokens is not None: + max_tokens *= devices_cnt + if max_sentences is not None: + max_sentences *= devices_cnt + indices = dataset.ordered_indices() + if batch_by_size: + batch_sampler = utils.batch_by_size( + indices, dataset.num_tokens, max_tokens=max_tokens, max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + ) + else: + batch_sampler = [] + for i in range(0, len(indices), max_sentences): + batch_sampler.append(indices[i:i + max_sentences]) + + if shuffle: + batches = shuffle_batches(list(batch_sampler)) + if endless: + batches = [b for _ in range(1000) for b in shuffle_batches(list(batch_sampler))] + else: + batches = batch_sampler + if endless: + batches = [b for _ in range(1000) for b in batches] + num_workers = dataset.num_workers + if self.trainer.use_ddp: + num_replicas = dist.get_world_size() + rank = dist.get_rank() + batches = [x[rank::num_replicas] for x in batches if len(x) % num_replicas == 0] + return torch.utils.data.DataLoader(dataset, + collate_fn=dataset.collater, + batch_sampler=batches, + num_workers=num_workers, + pin_memory=False) + + # def build_phone_encoder(self, data_dir): + # phone_list_file = os.path.join(data_dir, 'phone_set.json') + + # phone_list = json.load(open(phone_list_file, encoding='utf-8')) + # return TokenTextEncoder(None, vocab_list=phone_list, replace_oov=',') + + def build_optimizer(self, model): + self.optimizer = optimizer = torch.optim.AdamW( + model.parameters(), + lr=hparams['lr']) + return optimizer + + def test_start(self): + self.saving_result_pool = Pool(8) + self.saving_results_futures = [] + self.vocoder: BaseVocoder = get_vocoder_cls(hparams)() + if hparams.get('pe_enable') is not None and hparams['pe_enable']: + self.pe = PitchExtractor().cuda() + utils.load_ckpt(self.pe, hparams['pe_ckpt'], 'model', strict=True) + self.pe.eval() + def test_end(self, outputs): + self.saving_result_pool.close() + [f.get() for f in tqdm(self.saving_results_futures)] + self.saving_result_pool.join() + return {} + + ########## + # utils + ########## + def weights_nonzero_speech(self, target): + # target : B x T x mel + # Assign weight 1.0 to all labels except for padding (id=0). + dim = target.size(-1) + return target.abs().sum(-1, keepdim=True).ne(0).float().repeat(1, 1, dim) + +if __name__ == '__main__': + TtsTask.start() diff --git a/training/train_pipeline.py b/training/train_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..a7f9f99f64ed0dcff0001034a6932a2c623ce706 --- /dev/null +++ b/training/train_pipeline.py @@ -0,0 +1,238 @@ +from utils.hparams import hparams +import torch +from torch.nn import functional as F +from utils.pitch_utils import f0_to_coarse, denorm_f0, norm_f0 + +class Batch2Loss: + ''' + pipeline: batch -> insert1 -> module1 -> insert2 -> module2 -> insert3 -> module3 -> insert4 -> module4 -> loss + ''' + + @staticmethod + def insert1(pitch_midi, midi_dur, is_slur, # variables + midi_embed, midi_dur_layer, is_slur_embed): # modules + ''' + add embeddings for midi, midi_dur, slur + ''' + midi_embedding = midi_embed(pitch_midi) + midi_dur_embedding, slur_embedding = 0, 0 + if midi_dur is not None: + midi_dur_embedding = midi_dur_layer(midi_dur[:, :, None]) # [B, T, 1] -> [B, T, H] + if is_slur is not None: + slur_embedding = is_slur_embed(is_slur) + return midi_embedding, midi_dur_embedding, slur_embedding + + @staticmethod + def module1(fs2_encoder, # modules + txt_tokens, midi_embedding, midi_dur_embedding, slur_embedding): # variables + ''' + get *encoder_out* == fs2_encoder(*txt_tokens*, some embeddings) + ''' + encoder_out = fs2_encoder(txt_tokens, midi_embedding, midi_dur_embedding, slur_embedding) + return encoder_out + + @staticmethod + def insert2(encoder_out, spk_embed_id, spk_embed_dur_id, spk_embed_f0_id, src_nonpadding, # variables + spk_embed_proj): # modules + ''' + 1. add embeddings for pspk, spk_dur, sk_f0 + 2. get *dur_inp* ~= *encoder_out* + *spk_embed_dur* + ''' + # add ref style embed + # Not implemented + # variance encoder + var_embed = 0 + + # encoder_out_dur denotes encoder outputs for duration predictor + # in speech adaptation, duration predictor use old speaker embedding + if hparams['use_spk_embed']: + spk_embed_dur = spk_embed_f0 = spk_embed = spk_embed_proj(spk_embed_id)[:, None, :] + elif hparams['use_spk_id']: + if spk_embed_dur_id is None: + spk_embed_dur_id = spk_embed_id + if spk_embed_f0_id is None: + spk_embed_f0_id = spk_embed_id + spk_embed = spk_embed_proj(spk_embed_id)[:, None, :] + spk_embed_dur = spk_embed_f0 = spk_embed + if hparams['use_split_spk_id']: + spk_embed_dur = spk_embed_dur(spk_embed_dur_id)[:, None, :] + spk_embed_f0 = spk_embed_f0(spk_embed_f0_id)[:, None, :] + else: + spk_embed_dur = spk_embed_f0 = spk_embed = 0 + + # add dur + dur_inp = (encoder_out + var_embed + spk_embed_dur) * src_nonpadding + return var_embed, spk_embed, spk_embed_dur, spk_embed_f0, dur_inp + + @staticmethod + def module2(dur_predictor, length_regulator, # modules + dur_input, mel2ph, txt_tokens, all_vowel_tokens, ret, midi_dur=None): # variables + ''' + 1. get *dur* ~= dur_predictor(*dur_inp*) + 2. (mel2ph is None): get *mel2ph* ~= length_regulater(*dur*) + ''' + src_padding = (txt_tokens == 0) + dur_input = dur_input.detach() + hparams['predictor_grad'] * (dur_input - dur_input.detach()) + + if mel2ph is None: + dur, xs = dur_predictor.inference(dur_input, src_padding) + ret['dur'] = xs + dur = xs.squeeze(-1).exp() - 1.0 + for i in range(len(dur)): + for j in range(len(dur[i])): + if txt_tokens[i,j] in all_vowel_tokens: + if j < len(dur[i])-1 and txt_tokens[i,j+1] not in all_vowel_tokens: + dur[i,j] = midi_dur[i,j] - dur[i,j+1] + if dur[i,j] < 0: + dur[i,j] = 0 + dur[i,j+1] = midi_dur[i,j] + else: + dur[i,j]=midi_dur[i,j] + dur[:,0] = dur[:,0] + 0.5 + dur_acc = F.pad(torch.round(torch.cumsum(dur, axis=1)), (1,0)) + dur = torch.clamp(dur_acc[:,1:]-dur_acc[:,:-1], min=0).long() + ret['dur_choice'] = dur + mel2ph = length_regulator(dur, src_padding).detach() + else: + ret['dur'] = dur_predictor(dur_input, src_padding) + ret['mel2ph'] = mel2ph + + return mel2ph + + @staticmethod + def insert3(encoder_out, mel2ph, var_embed, spk_embed_f0, src_nonpadding, tgt_nonpadding): # variables + ''' + 1. get *decoder_inp* ~= gather *encoder_out* according to *mel2ph* + 2. get *pitch_inp* ~= *decoder_inp* + *spk_embed_f0* + 3. get *pitch_inp_ph* ~= *encoder_out* + *spk_embed_f0* + ''' + decoder_inp = F.pad(encoder_out, [0, 0, 1, 0]) + mel2ph_ = mel2ph[..., None].repeat([1, 1, encoder_out.shape[-1]]) + decoder_inp = decoder_inp_origin = torch.gather(decoder_inp, 1, mel2ph_) # [B, T, H] + + pitch_inp = (decoder_inp_origin + var_embed + spk_embed_f0) * tgt_nonpadding + pitch_inp_ph = (encoder_out + var_embed + spk_embed_f0) * src_nonpadding + return decoder_inp, pitch_inp, pitch_inp_ph + + @staticmethod + def module3(pitch_predictor, pitch_embed, energy_predictor, energy_embed, # modules + pitch_inp, pitch_inp_ph, f0, uv, energy, mel2ph, is_training, ret): # variables + ''' + 1. get *ret['pitch_pred']*, *ret['energy_pred']* ~= pitch_predictor(*pitch_inp*), energy_predictor(*pitch_inp*) + 2. get *pitch_embedding* ~= pitch_embed(f0_to_coarse(denorm_f0(*f0* or *pitch_pred*)) + 3. get *energy_embedding* ~= energy_embed(energy_to_coarse(*energy* or *energy_pred*)) + ''' + def add_pitch(decoder_inp, f0, uv, mel2ph, ret, encoder_out=None): + if hparams['pitch_type'] == 'ph': + pitch_pred_inp = encoder_out.detach() + hparams['predictor_grad'] * (encoder_out - encoder_out.detach()) + pitch_padding = (encoder_out.sum().abs() == 0) + ret['pitch_pred'] = pitch_pred = pitch_predictor(pitch_pred_inp) + if f0 is None: + f0 = pitch_pred[:, :, 0] + ret['f0_denorm'] = f0_denorm = denorm_f0(f0, None, hparams, pitch_padding=pitch_padding) + pitch = f0_to_coarse(f0_denorm) # start from 0 [B, T_txt] + pitch = F.pad(pitch, [1, 0]) + pitch = torch.gather(pitch, 1, mel2ph) # [B, T_mel] + pitch_embedding = pitch_embed(pitch) + return pitch_embedding + + decoder_inp = decoder_inp.detach() + hparams['predictor_grad'] * (decoder_inp - decoder_inp.detach()) + + pitch_padding = (mel2ph == 0) + + if hparams['pitch_type'] == 'cwt': + # NOTE: this part of script is *isolated* from other scripts, which means + # it may not be compatible with the current version. + pass + # pitch_padding = None + # ret['cwt'] = cwt_out = self.cwt_predictor(decoder_inp) + # stats_out = self.cwt_stats_layers(encoder_out[:, 0, :]) # [B, 2] + # mean = ret['f0_mean'] = stats_out[:, 0] + # std = ret['f0_std'] = stats_out[:, 1] + # cwt_spec = cwt_out[:, :, :10] + # if f0 is None: + # std = std * hparams['cwt_std_scale'] + # f0 = self.cwt2f0_norm(cwt_spec, mean, std, mel2ph) + # if hparams['use_uv']: + # assert cwt_out.shape[-1] == 11 + # uv = cwt_out[:, :, -1] > 0 + elif hparams['pitch_ar']: + ret['pitch_pred'] = pitch_pred = pitch_predictor(decoder_inp, f0 if is_training else None) + if f0 is None: + f0 = pitch_pred[:, :, 0] + else: + ret['pitch_pred'] = pitch_pred = pitch_predictor(decoder_inp) + if f0 is None: + f0 = pitch_pred[:, :, 0] + if hparams['use_uv'] and uv is None: + uv = pitch_pred[:, :, 1] > 0 + ret['f0_denorm'] = f0_denorm = denorm_f0(f0, uv, hparams, pitch_padding=pitch_padding) + if pitch_padding is not None: + f0[pitch_padding] = 0 + + pitch = f0_to_coarse(f0_denorm) # start from 0 + pitch_embedding = pitch_embed(pitch) + return pitch_embedding + + def add_energy(decoder_inp, energy, ret): + decoder_inp = decoder_inp.detach() + hparams['predictor_grad'] * (decoder_inp - decoder_inp.detach()) + ret['energy_pred'] = energy_pred = energy_predictor(decoder_inp)[:, :, 0] + if energy is None: + energy = energy_pred + energy = torch.clamp(energy * 256 // 4, max=255).long() # energy_to_coarse + energy_embedding = energy_embed(energy) + return energy_embedding + + # add pitch and energy embed + nframes = mel2ph.size(1) + + pitch_embedding = 0 + if hparams['use_pitch_embed']: + if f0 is not None: + delta_l = nframes - f0.size(1) + if delta_l > 0: + f0 = torch.cat((f0,torch.FloatTensor([[x[-1]] * delta_l for x in f0]).to(f0.device)),1) + f0 = f0[:,:nframes] + if uv is not None: + delta_l = nframes - uv.size(1) + if delta_l > 0: + uv = torch.cat((uv,torch.FloatTensor([[x[-1]] * delta_l for x in uv]).to(uv.device)),1) + uv = uv[:,:nframes] + pitch_embedding = add_pitch(pitch_inp, f0, uv, mel2ph, ret, encoder_out=pitch_inp_ph) + + energy_embedding = 0 + if hparams['use_energy_embed']: + if energy is not None: + delta_l = nframes - energy.size(1) + if delta_l > 0: + energy = torch.cat((energy,torch.FloatTensor([[x[-1]] * delta_l for x in energy]).to(energy.device)),1) + energy = energy[:,:nframes] + energy_embedding = add_energy(pitch_inp, energy, ret) + + return pitch_embedding, energy_embedding + + @staticmethod + def insert4(decoder_inp, pitch_embedding, energy_embedding, spk_embed, ret, tgt_nonpadding): + ''' + *decoder_inp* ~= *decoder_inp* + embeddings for spk, pitch, energy + ''' + ret['decoder_inp'] = decoder_inp = (decoder_inp + pitch_embedding + energy_embedding + spk_embed) * tgt_nonpadding + return decoder_inp + + @staticmethod + def module4(diff_main_loss, # modules + norm_spec, decoder_inp_t, ret, K_step, batch_size, device): # variables + ''' + training diffusion using spec as input and decoder_inp as condition. + + Args: + norm_spec: (normalized) spec + decoder_inp_t: (transposed) decoder_inp + Returns: + ret['diff_loss'] + ''' + t = torch.randint(0, K_step, (batch_size,), device=device).long() + norm_spec = norm_spec.transpose(1, 2)[:, None, :, :] # [B, 1, M, T] + ret['diff_loss'] = diff_main_loss(norm_spec, t, cond=decoder_inp_t) + # nonpadding = (mel2ph != 0).float() + # ret['diff_loss'] = self.p_losses(x, t, cond, nonpadding=nonpadding) diff --git a/trans_key.py b/trans_key.py new file mode 100644 index 0000000000000000000000000000000000000000..c803a6acdbaa065cb75ce0a935b023780ab37026 --- /dev/null +++ b/trans_key.py @@ -0,0 +1,61 @@ +head_list = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"] + + +def trans_f0_seq(feature_pit, transform): + feature_pit = feature_pit * 2 ** (transform / 12) + return round(feature_pit, 1) + + +def move_key(raw_data, mv_key): + head = raw_data[:-1] + body = int(raw_data[-1]) + new_head_index = head_list.index(head) + mv_key + while new_head_index < 0: + body -= 1 + new_head_index += 12 + while new_head_index > 11: + body += 1 + new_head_index -= 12 + result_data = head_list[new_head_index] + str(body) + return result_data + + +def trans_key(raw_data, key): + for i in raw_data: + note_seq_list = i["note_seq"].split(" ") + new_note_seq_list = [] + for note_seq in note_seq_list: + if note_seq != "rest": + new_note_seq = move_key(note_seq, key) + new_note_seq_list.append(new_note_seq) + else: + new_note_seq_list.append(note_seq) + i["note_seq"] = " ".join(new_note_seq_list) + + f0_seq_list = i["f0_seq"].split(" ") + f0_seq_list = [float(x) for x in f0_seq_list] + new_f0_seq_list = [] + for f0_seq in f0_seq_list: + new_f0_seq = trans_f0_seq(f0_seq, key) + new_f0_seq_list.append(str(new_f0_seq)) + i["f0_seq"] = " ".join(new_f0_seq_list) + return raw_data + + +key = -6 +f_w = open("raw.txt", "w", encoding='utf-8') +with open("result.txt", "r", encoding='utf-8') as f: + raw_data = f.readlines() + for raw in raw_data: + raw_list = raw.split("|") + new_note_seq_list = [] + for note_seq in raw_list[3].split(" "): + if note_seq != "rest": + note_seq = note_seq.split("/")[0] if "/" in note_seq else note_seq + new_note_seq = move_key(note_seq, key) + new_note_seq_list.append(new_note_seq) + else: + new_note_seq_list.append(note_seq) + raw_list[3] = " ".join(new_note_seq_list) + f_w.write("|".join(raw_list)) +f_w.close() diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..edd05b1cbcf86d489ce395ab90e50587c7bef4c6 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,250 @@ +import glob +import logging +import re +import time +from collections import defaultdict +import os +import sys +import shutil +import types +import numpy as np +import torch +import torch.nn.functional as F +import torch.distributed as dist +from torch import nn + + +def tensors_to_scalars(metrics): + new_metrics = {} + for k, v in metrics.items(): + if isinstance(v, torch.Tensor): + v = v.item() + if type(v) is dict: + v = tensors_to_scalars(v) + new_metrics[k] = v + return new_metrics + + +class AvgrageMeter(object): + + def __init__(self): + self.reset() + + def reset(self): + self.avg = 0 + self.sum = 0 + self.cnt = 0 + + def update(self, val, n=1): + self.sum += val * n + self.cnt += n + self.avg = self.sum / self.cnt + + +def collate_1d(values, pad_idx=0, left_pad=False, shift_right=False, max_len=None, shift_id=1): + """Convert a list of 1d tensors into a padded 2d tensor.""" + size = max(v.size(0) for v in values) if max_len is None else max_len + res = values[0].new(len(values), size).fill_(pad_idx) + + def copy_tensor(src, dst): + assert dst.numel() == src.numel() + if shift_right: + dst[1:] = src[:-1] + dst[0] = shift_id + else: + dst.copy_(src) + + for i, v in enumerate(values): + copy_tensor(v, res[i][size - len(v):] if left_pad else res[i][:len(v)]) + return res + + +def collate_2d(values, pad_idx=0, left_pad=False, shift_right=False, max_len=None): + """Convert a list of 2d tensors into a padded 3d tensor.""" + size = max(v.size(0) for v in values) if max_len is None else max_len + res = values[0].new(len(values), size, values[0].shape[1]).fill_(pad_idx) + + def copy_tensor(src, dst): + assert dst.numel() == src.numel() + if shift_right: + dst[1:] = src[:-1] + else: + dst.copy_(src) + + for i, v in enumerate(values): + copy_tensor(v, res[i][size - len(v):] if left_pad else res[i][:len(v)]) + return res + + +def _is_batch_full(batch, num_tokens, max_tokens, max_sentences): + if len(batch) == 0: + return 0 + if len(batch) == max_sentences: + return 1 + if num_tokens > max_tokens: + return 1 + return 0 + + +def batch_by_size( + indices, num_tokens_fn, max_tokens=None, max_sentences=None, + required_batch_size_multiple=1, distributed=False +): + """ + Yield mini-batches of indices bucketed by size. Batches may contain + sequences of different lengths. + + Args: + indices (List[int]): ordered list of dataset indices + num_tokens_fn (callable): function that returns the number of tokens at + a given index + max_tokens (int, optional): max number of tokens in each batch + (default: None). + max_sentences (int, optional): max number of sentences in each + batch (default: None). + required_batch_size_multiple (int, optional): require batch size to + be a multiple of N (default: 1). + """ + max_tokens = max_tokens if max_tokens is not None else sys.maxsize + max_sentences = max_sentences if max_sentences is not None else sys.maxsize + bsz_mult = required_batch_size_multiple + + if isinstance(indices, types.GeneratorType): + indices = np.fromiter(indices, dtype=np.int64, count=-1) + + sample_len = 0 + sample_lens = [] + batch = [] + batches = [] + for i in range(len(indices)): + idx = indices[i] + num_tokens = num_tokens_fn(idx) + sample_lens.append(num_tokens) + sample_len = max(sample_len, num_tokens) + assert sample_len <= max_tokens, ( + "sentence at index {} of size {} exceeds max_tokens " + "limit of {}!".format(idx, sample_len, max_tokens) + ) + num_tokens = (len(batch) + 1) * sample_len + + if _is_batch_full(batch, num_tokens, max_tokens, max_sentences): + mod_len = max( + bsz_mult * (len(batch) // bsz_mult), + len(batch) % bsz_mult, + ) + batches.append(batch[:mod_len]) + batch = batch[mod_len:] + sample_lens = sample_lens[mod_len:] + sample_len = max(sample_lens) if len(sample_lens) > 0 else 0 + batch.append(idx) + if len(batch) > 0: + batches.append(batch) + return batches + + +def make_positions(tensor, padding_idx): + """Replace non-padding symbols with their position numbers. + + Position numbers begin at padding_idx+1. Padding symbols are ignored. + """ + # The series of casts and type-conversions here are carefully + # balanced to both work with ONNX export and XLA. In particular XLA + # prefers ints, cumsum defaults to output longs, and ONNX doesn't know + # how to handle the dtype kwarg in cumsum. + mask = tensor.ne(padding_idx).int() + return ( + torch.cumsum(mask, dim=1).type_as(mask) * mask + ).long() + padding_idx + + +def softmax(x, dim): + return F.softmax(x, dim=dim, dtype=torch.float32) + + +def unpack_dict_to_list(samples): + samples_ = [] + bsz = samples.get('outputs').size(0) + for i in range(bsz): + res = {} + for k, v in samples.items(): + try: + res[k] = v[i] + except: + pass + samples_.append(res) + return samples_ + + +def load_ckpt(cur_model, ckpt_base_dir, prefix_in_ckpt='model', force=True, strict=True): + if os.path.isfile(ckpt_base_dir): + base_dir = os.path.dirname(ckpt_base_dir) + checkpoint_path = [ckpt_base_dir] + else: + base_dir = ckpt_base_dir + checkpoint_path = sorted(glob.glob(f'{base_dir}/model_ckpt_steps_*.ckpt'), key= + lambda x: int(re.findall(f'{base_dir}/model_ckpt_steps_(\d+).ckpt', x.replace('\\','/'))[0])) + if len(checkpoint_path) > 0: + checkpoint_path = checkpoint_path[-1] + state_dict = torch.load(checkpoint_path, map_location="cpu")["state_dict"] + state_dict = {k[len(prefix_in_ckpt) + 1:]: v for k, v in state_dict.items() + if k.startswith(f'{prefix_in_ckpt}.')} + if not strict: + cur_model_state_dict = cur_model.state_dict() + unmatched_keys = [] + for key, param in state_dict.items(): + if key in cur_model_state_dict: + new_param = cur_model_state_dict[key] + if new_param.shape != param.shape: + unmatched_keys.append(key) + print("| Unmatched keys: ", key, new_param.shape, param.shape) + for key in unmatched_keys: + del state_dict[key] + cur_model.load_state_dict(state_dict, strict=strict) + print(f"| load '{prefix_in_ckpt}' from '{checkpoint_path}'.") + else: + e_msg = f"| ckpt not found in {base_dir}." + if force: + assert False, e_msg + else: + print(e_msg) + + +def remove_padding(x, padding_idx=0): + if x is None: + return None + assert len(x.shape) in [1, 2] + if len(x.shape) == 2: # [T, H] + return x[np.abs(x).sum(-1) != padding_idx] + elif len(x.shape) == 1: # [T] + return x[x != padding_idx] + + +class Timer: + timer_map = {} + + def __init__(self, name, print_time=False): + if name not in Timer.timer_map: + Timer.timer_map[name] = 0 + self.name = name + self.print_time = print_time + + def __enter__(self): + self.t = time.time() + + def __exit__(self, exc_type, exc_val, exc_tb): + Timer.timer_map[self.name] += time.time() - self.t + if self.print_time: + print(self.name, Timer.timer_map[self.name]) + + +def print_arch(model, model_name='model'): + #print(f"| {model_name} Arch: ", model) + num_params(model, model_name=model_name) + + +def num_params(model, print_out=True, model_name="model"): + parameters = filter(lambda p: p.requires_grad, model.parameters()) + parameters = sum([np.prod(p.size()) for p in parameters]) / 1_000_000 + if print_out: + print(f'| {model_name} Trainable Parameters: %.3fM' % parameters) + return parameters diff --git a/utils/__pycache__/__init__.cpython-38.pyc b/utils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c746ba9e45443d03816de9aa2b8a1059f9815a3c Binary files /dev/null and b/utils/__pycache__/__init__.cpython-38.pyc differ diff --git a/utils/__pycache__/audio.cpython-38.pyc b/utils/__pycache__/audio.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8afcff881dbb45eceea2a526c4a576105bcfc12c Binary files /dev/null and b/utils/__pycache__/audio.cpython-38.pyc differ diff --git a/utils/__pycache__/cwt.cpython-38.pyc b/utils/__pycache__/cwt.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3e274b26e8911ffb3b8f54a20ec8ff49f6c1495 Binary files /dev/null and b/utils/__pycache__/cwt.cpython-38.pyc differ diff --git a/utils/__pycache__/hparams.cpython-38.pyc b/utils/__pycache__/hparams.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8641c0974b9c8464ca765b4208ba46da46ea7126 Binary files /dev/null and b/utils/__pycache__/hparams.cpython-38.pyc differ diff --git a/utils/__pycache__/indexed_datasets.cpython-38.pyc b/utils/__pycache__/indexed_datasets.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5af5e4925ba78cc44093198d69b905eb23b1cfd Binary files /dev/null and b/utils/__pycache__/indexed_datasets.cpython-38.pyc differ diff --git a/utils/__pycache__/multiprocess_utils.cpython-38.pyc b/utils/__pycache__/multiprocess_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6dd62f9fd09bbe88a4c7eb52ef8a23c38a50cd4f Binary files /dev/null and b/utils/__pycache__/multiprocess_utils.cpython-38.pyc differ diff --git a/utils/__pycache__/pitch_utils.cpython-38.pyc b/utils/__pycache__/pitch_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3aff420fe73cfa093f2008de847293d166ac72a8 Binary files /dev/null and b/utils/__pycache__/pitch_utils.cpython-38.pyc differ diff --git a/utils/__pycache__/pl_utils.cpython-38.pyc b/utils/__pycache__/pl_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f96abece2fced97b71c8ec51558130d0adee8678 Binary files /dev/null and b/utils/__pycache__/pl_utils.cpython-38.pyc differ diff --git a/utils/__pycache__/plot.cpython-38.pyc b/utils/__pycache__/plot.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be844867804ea2f6ddcf5d8e0c59aeb343d20aea Binary files /dev/null and b/utils/__pycache__/plot.cpython-38.pyc differ diff --git a/utils/__pycache__/text_encoder.cpython-38.pyc b/utils/__pycache__/text_encoder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7dec50c4d7671deb03a4eba6969bcd909ef747b7 Binary files /dev/null and b/utils/__pycache__/text_encoder.cpython-38.pyc differ diff --git a/utils/__pycache__/training_utils.cpython-38.pyc b/utils/__pycache__/training_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..290a0b8eb2bc967a3e9c87a564069db722a75810 Binary files /dev/null and b/utils/__pycache__/training_utils.cpython-38.pyc differ diff --git a/utils/audio.py b/utils/audio.py new file mode 100644 index 0000000000000000000000000000000000000000..aba7ab926cf793d085bbdc70c97f376001183fe1 --- /dev/null +++ b/utils/audio.py @@ -0,0 +1,56 @@ +import subprocess +import matplotlib + +matplotlib.use('Agg') +import librosa +import librosa.filters +import numpy as np +from scipy import signal +from scipy.io import wavfile + + +def save_wav(wav, path, sr, norm=False): + if norm: + wav = wav / np.abs(wav).max() + wav *= 32767 + # proposed by @dsmiller + wavfile.write(path, sr, wav.astype(np.int16)) + + +def get_hop_size(hparams): + hop_size = hparams['hop_size'] + if hop_size is None: + assert hparams['frame_shift_ms'] is not None + hop_size = int(hparams['frame_shift_ms'] / 1000 * hparams['audio_sample_rate']) + return hop_size + + +########################################################################################### +def _stft(y, hparams): + return librosa.stft(y=y, n_fft=hparams['fft_size'], hop_length=get_hop_size(hparams), + win_length=hparams['win_size'], pad_mode='constant') + + +def _istft(y, hparams): + return librosa.istft(y, hop_length=get_hop_size(hparams), win_length=hparams['win_size']) + + +def librosa_pad_lr(x, fsize, fshift, pad_sides=1): + '''compute right padding (final frame) or both sides padding (first and final frames) + ''' + assert pad_sides in (1, 2) + # return int(fsize // 2) + pad = (x.shape[0] // fshift + 1) * fshift - x.shape[0] + if pad_sides == 1: + return 0, pad + else: + return pad // 2, pad // 2 + pad % 2 + + +# Conversions +def amp_to_db(x): + return 20 * np.log10(np.maximum(1e-5, x)) + + +def normalize(S, hparams): + return (S - hparams['min_level_db']) / -hparams['min_level_db'] diff --git a/utils/cwt.py b/utils/cwt.py new file mode 100644 index 0000000000000000000000000000000000000000..1a08461b9e422aac614438e6240b7355b8e4bb2c --- /dev/null +++ b/utils/cwt.py @@ -0,0 +1,146 @@ +import librosa +import numpy as np +from pycwt import wavelet +from scipy.interpolate import interp1d + + +def load_wav(wav_file, sr): + wav, _ = librosa.load(wav_file, sr=sr, mono=True) + return wav + + +def convert_continuos_f0(f0): + '''CONVERT F0 TO CONTINUOUS F0 + Args: + f0 (ndarray): original f0 sequence with the shape (T) + Return: + (ndarray): continuous f0 with the shape (T) + ''' + # get uv information as binary + f0 = np.copy(f0) + uv = np.float32(f0 != 0) + + # get start and end of f0 + if (f0 == 0).all(): + print("| all of the f0 values are 0.") + return uv, f0 + start_f0 = f0[f0 != 0][0] + end_f0 = f0[f0 != 0][-1] + + # padding start and end of f0 sequence + start_idx = np.where(f0 == start_f0)[0][0] + end_idx = np.where(f0 == end_f0)[0][-1] + f0[:start_idx] = start_f0 + f0[end_idx:] = end_f0 + + # get non-zero frame index + nz_frames = np.where(f0 != 0)[0] + + # perform linear interpolation + f = interp1d(nz_frames, f0[nz_frames]) + cont_f0 = f(np.arange(0, f0.shape[0])) + + return uv, cont_f0 + + +def get_cont_lf0(f0, frame_period=5.0): + uv, cont_f0_lpf = convert_continuos_f0(f0) + # cont_f0_lpf = low_pass_filter(cont_f0_lpf, int(1.0 / (frame_period * 0.001)), cutoff=20) + cont_lf0_lpf = np.log(cont_f0_lpf) + return uv, cont_lf0_lpf + + +def get_lf0_cwt(lf0): + ''' + input: + signal of shape (N) + output: + Wavelet_lf0 of shape(10, N), scales of shape(10) + ''' + mother = wavelet.MexicanHat() + dt = 0.005 + dj = 1 + s0 = dt * 2 + J = 9 + + Wavelet_lf0, scales, _, _, _, _ = wavelet.cwt(np.squeeze(lf0), dt, dj, s0, J, mother) + # Wavelet.shape => (J + 1, len(lf0)) + Wavelet_lf0 = np.real(Wavelet_lf0).T + return Wavelet_lf0, scales + + +def norm_scale(Wavelet_lf0): + Wavelet_lf0_norm = np.zeros((Wavelet_lf0.shape[0], Wavelet_lf0.shape[1])) + mean = Wavelet_lf0.mean(0)[None, :] + std = Wavelet_lf0.std(0)[None, :] + Wavelet_lf0_norm = (Wavelet_lf0 - mean) / std + return Wavelet_lf0_norm, mean, std + + +def normalize_cwt_lf0(f0, mean, std): + uv, cont_lf0_lpf = get_cont_lf0(f0) + cont_lf0_norm = (cont_lf0_lpf - mean) / std + Wavelet_lf0, scales = get_lf0_cwt(cont_lf0_norm) + Wavelet_lf0_norm, _, _ = norm_scale(Wavelet_lf0) + + return Wavelet_lf0_norm + + +def get_lf0_cwt_norm(f0s, mean, std): + uvs = list() + cont_lf0_lpfs = list() + cont_lf0_lpf_norms = list() + Wavelet_lf0s = list() + Wavelet_lf0s_norm = list() + scaless = list() + + means = list() + stds = list() + for f0 in f0s: + uv, cont_lf0_lpf = get_cont_lf0(f0) + cont_lf0_lpf_norm = (cont_lf0_lpf - mean) / std + + Wavelet_lf0, scales = get_lf0_cwt(cont_lf0_lpf_norm) # [560,10] + Wavelet_lf0_norm, mean_scale, std_scale = norm_scale(Wavelet_lf0) # [560,10],[1,10],[1,10] + + Wavelet_lf0s_norm.append(Wavelet_lf0_norm) + uvs.append(uv) + cont_lf0_lpfs.append(cont_lf0_lpf) + cont_lf0_lpf_norms.append(cont_lf0_lpf_norm) + Wavelet_lf0s.append(Wavelet_lf0) + scaless.append(scales) + means.append(mean_scale) + stds.append(std_scale) + + return Wavelet_lf0s_norm, scaless, means, stds + + +def inverse_cwt_torch(Wavelet_lf0, scales): + import torch + b = ((torch.arange(0, len(scales)).float().to(Wavelet_lf0.device)[None, None, :] + 1 + 2.5) ** (-2.5)) + lf0_rec = Wavelet_lf0 * b + lf0_rec_sum = lf0_rec.sum(-1) + lf0_rec_sum = (lf0_rec_sum - lf0_rec_sum.mean(-1, keepdim=True)) / lf0_rec_sum.std(-1, keepdim=True) + return lf0_rec_sum + + +def inverse_cwt(Wavelet_lf0, scales): + b = ((np.arange(0, len(scales))[None, None, :] + 1 + 2.5) ** (-2.5)) + lf0_rec = Wavelet_lf0 * b + lf0_rec_sum = lf0_rec.sum(-1) + lf0_rec_sum = (lf0_rec_sum - lf0_rec_sum.mean(-1, keepdims=True)) / lf0_rec_sum.std(-1, keepdims=True) + return lf0_rec_sum + + +def cwt2f0(cwt_spec, mean, std, cwt_scales): + assert len(mean.shape) == 1 and len(std.shape) == 1 and len(cwt_spec.shape) == 3 + import torch + if isinstance(cwt_spec, torch.Tensor): + f0 = inverse_cwt_torch(cwt_spec, cwt_scales) + f0 = f0 * std[:, None] + mean[:, None] + f0 = f0.exp() # [B, T] + else: + f0 = inverse_cwt(cwt_spec, cwt_scales) + f0 = f0 * std[:, None] + mean[:, None] + f0 = np.exp(f0) # [B, T] + return f0 diff --git a/utils/hparams.py b/utils/hparams.py new file mode 100644 index 0000000000000000000000000000000000000000..6d5e6552d88c4609343f968239bfce1a1c177c8b --- /dev/null +++ b/utils/hparams.py @@ -0,0 +1,131 @@ +import argparse +import os +import yaml + +global_print_hparams = True +hparams = {} + + +class Args: + def __init__(self, **kwargs): + for k, v in kwargs.items(): + self.__setattr__(k, v) + + +def override_config(old_config: dict, new_config: dict): + for k, v in new_config.items(): + if isinstance(v, dict) and k in old_config: + override_config(old_config[k], new_config[k]) + else: + old_config[k] = v + + +def set_hparams(config='', exp_name='', hparams_str='', print_hparams=True, global_hparams=True,reset=True,infer=True): + ''' + Load hparams from multiple sources: + 1. config chain (i.e. first load base_config, then load config); + 2. if reset == True, load from the (auto-saved) complete config file ('config.yaml') + which contains all settings and do not rely on base_config; + 3. load from argument --hparams or hparams_str, as temporary modification. + ''' + if config == '': + parser = argparse.ArgumentParser(description='neural music') + parser.add_argument('--config', type=str, default='', + help='location of the data corpus') + parser.add_argument('--exp_name', type=str, default='', help='exp_name') + parser.add_argument('--hparams', type=str, default='', + help='location of the data corpus') + parser.add_argument('--infer', action='store_true', help='infer') + parser.add_argument('--validate', action='store_true', help='validate') + parser.add_argument('--reset', action='store_true', help='reset hparams') + parser.add_argument('--debug', action='store_true', help='debug') + args, unknown = parser.parse_known_args() + else: + args = Args(config=config, exp_name=exp_name, hparams=hparams_str, + infer=infer, validate=False, reset=reset, debug=False) + args_work_dir = '' + if args.exp_name != '': + args.work_dir = args.exp_name + args_work_dir = f'checkpoints/{args.work_dir}' + + config_chains = [] + loaded_config = set() + + def load_config(config_fn): # deep first + with open(config_fn, encoding='utf-8') as f: + hparams_ = yaml.safe_load(f) + loaded_config.add(config_fn) + if 'base_config' in hparams_: + ret_hparams = {} + if not isinstance(hparams_['base_config'], list): + hparams_['base_config'] = [hparams_['base_config']] + for c in hparams_['base_config']: + if c not in loaded_config: + if c.startswith('.'): + c = f'{os.path.dirname(config_fn)}/{c}' + c = os.path.normpath(c) + override_config(ret_hparams, load_config(c)) + override_config(ret_hparams, hparams_) + else: + ret_hparams = hparams_ + config_chains.append(config_fn) + return ret_hparams + + global hparams + assert args.config != '' or args_work_dir != '' + saved_hparams = {} + if args_work_dir != 'checkpoints/': + ckpt_config_path = f'{args_work_dir}/config.yaml' + if os.path.exists(ckpt_config_path): + try: + with open(ckpt_config_path, encoding='utf-8') as f: + saved_hparams.update(yaml.safe_load(f)) + except: + pass + if args.config == '': + args.config = ckpt_config_path + + hparams_ = {} + + hparams_.update(load_config(args.config)) + + if not args.reset: + hparams_.update(saved_hparams) + hparams_['work_dir'] = args_work_dir + + if args.hparams != "": + for new_hparam in args.hparams.split(","): + k, v = new_hparam.split("=") + if k not in hparams_: + hparams_[k] = eval(v) + if v in ['True', 'False'] or type(hparams_[k]) == bool: + hparams_[k] = eval(v) + else: + hparams_[k] = type(hparams_[k])(v) + + if args_work_dir != '' and (not os.path.exists(ckpt_config_path) or args.reset) and not args.infer: + os.makedirs(hparams_['work_dir'], exist_ok=True) + with open(ckpt_config_path, 'w', encoding='utf-8') as f: + yaml.safe_dump(hparams_, f) + + hparams_['infer'] = args.infer + hparams_['debug'] = args.debug + hparams_['validate'] = args.validate + global global_print_hparams + if global_hparams: + hparams.clear() + hparams.update(hparams_) + + if print_hparams and global_print_hparams and global_hparams: + print('| Hparams chains: ', config_chains) + print('| Hparams: ') + for i, (k, v) in enumerate(sorted(hparams_.items())): + print(f"\033[;33;m{k}\033[0m: {v}, ", end="\n" if i % 5 == 4 else "") + print("") + global_print_hparams = False + # print(hparams_.keys()) + if hparams.get('exp_name') is None: + hparams['exp_name'] = args.exp_name + if hparams_.get('exp_name') is None: + hparams_['exp_name'] = args.exp_name + return hparams_ diff --git a/utils/indexed_datasets.py b/utils/indexed_datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..e15632be30d6296a3c9aa80a1f351058003698b3 --- /dev/null +++ b/utils/indexed_datasets.py @@ -0,0 +1,71 @@ +import pickle +from copy import deepcopy + +import numpy as np + + +class IndexedDataset: + def __init__(self, path, num_cache=1): + super().__init__() + self.path = path + self.data_file = None + self.data_offsets = np.load(f"{path}.idx", allow_pickle=True).item()['offsets'] + self.data_file = open(f"{path}.data", 'rb', buffering=-1) + self.cache = [] + self.num_cache = num_cache + + def check_index(self, i): + if i < 0 or i >= len(self.data_offsets) - 1: + raise IndexError('index out of range') + + def __del__(self): + if self.data_file: + self.data_file.close() + + def __getitem__(self, i): + self.check_index(i) + if self.num_cache > 0: + for c in self.cache: + if c[0] == i: + return c[1] + self.data_file.seek(self.data_offsets[i]) + b = self.data_file.read(self.data_offsets[i + 1] - self.data_offsets[i]) + item = pickle.loads(b) + if self.num_cache > 0: + self.cache = [(i, deepcopy(item))] + self.cache[:-1] + return item + + def __len__(self): + return len(self.data_offsets) - 1 + +class IndexedDatasetBuilder: + def __init__(self, path): + self.path = path + self.out_file = open(f"{path}.data", 'wb') + self.byte_offsets = [0] + + def add_item(self, item): + s = pickle.dumps(item) + bytes = self.out_file.write(s) + self.byte_offsets.append(self.byte_offsets[-1] + bytes) + + def finalize(self): + self.out_file.close() + np.save(open(f"{self.path}.idx", 'wb'), {'offsets': self.byte_offsets}) + + +if __name__ == "__main__": + import random + from tqdm import tqdm + ds_path = '/tmp/indexed_ds_example' + size = 100 + items = [{"a": np.random.normal(size=[10000, 10]), + "b": np.random.normal(size=[10000, 10])} for i in range(size)] + builder = IndexedDatasetBuilder(ds_path) + for i in tqdm(range(size)): + builder.add_item(items[i]) + builder.finalize() + ds = IndexedDataset(ds_path) + for i in tqdm(range(10000)): + idx = random.randint(0, size - 1) + assert (ds[idx]['a'] == items[idx]['a']).all() diff --git a/utils/multiprocess_utils.py b/utils/multiprocess_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..24876c4ca777f09d1c1e1b75674cd7aaf37a75a6 --- /dev/null +++ b/utils/multiprocess_utils.py @@ -0,0 +1,47 @@ +import os +import traceback +from multiprocessing import Queue, Process + + +def chunked_worker(worker_id, map_func, args, results_queue=None, init_ctx_func=None): + ctx = init_ctx_func(worker_id) if init_ctx_func is not None else None + for job_idx, arg in args: + try: + if ctx is not None: + res = map_func(*arg, ctx=ctx) + else: + res = map_func(*arg) + results_queue.put((job_idx, res)) + except: + traceback.print_exc() + results_queue.put((job_idx, None)) + +def chunked_multiprocess_run(map_func, args, num_workers=None, ordered=True, init_ctx_func=None, q_max_size=1000): + args = zip(range(len(args)), args) + args = list(args) + n_jobs = len(args) + if num_workers is None: + num_workers = int(os.getenv('N_PROC', os.cpu_count())) + results_queues = [] + if ordered: + for i in range(num_workers): + results_queues.append(Queue(maxsize=q_max_size // num_workers)) + else: + results_queue = Queue(maxsize=q_max_size) + for i in range(num_workers): + results_queues.append(results_queue) + workers = [] + for i in range(num_workers): + args_worker = args[i::num_workers] + p = Process(target=chunked_worker, args=( + i, map_func, args_worker, results_queues[i], init_ctx_func), daemon=True) + workers.append(p) + p.start() + for n_finished in range(n_jobs): + results_queue = results_queues[n_finished % num_workers] + job_idx, res = results_queue.get() + assert job_idx == n_finished or not ordered, (job_idx, n_finished) + yield res + for w in workers: + w.join() + w.close() diff --git a/utils/pitch_utils.py b/utils/pitch_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..1767810e600c8f82e821ff4fc0a164daddaf7af4 --- /dev/null +++ b/utils/pitch_utils.py @@ -0,0 +1,76 @@ +######### +# world +########## +import librosa +import numpy as np +import torch + +# gamma = 0 +# mcepInput = 3 # 0 for dB, 3 for magnitude +# alpha = 0.45 +# en_floor = 10 ** (-80 / 20) +# FFT_SIZE = 2048 + + + + +def f0_to_coarse(f0,hparams): + f0_bin = hparams['f0_bin'] + f0_max = hparams['f0_max'] + f0_min = hparams['f0_min'] + is_torch = isinstance(f0, torch.Tensor) + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + f0_mel = 1127 * (1 + f0 / 700).log() if is_torch else 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * (f0_bin - 2) / (f0_mel_max - f0_mel_min) + 1 + + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > f0_bin - 1] = f0_bin - 1 + f0_coarse = (f0_mel + 0.5).long() if is_torch else np.rint(f0_mel).astype(int) + assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (f0_coarse.max(), f0_coarse.min()) + return f0_coarse + + +def norm_f0(f0, uv, hparams): + is_torch = isinstance(f0, torch.Tensor) + if hparams['pitch_norm'] == 'standard': + f0 = (f0 - hparams['f0_mean']) / hparams['f0_std'] + if hparams['pitch_norm'] == 'log': + f0 = torch.log2(f0) if is_torch else np.log2(f0) + if uv is not None and hparams['use_uv']: + f0[uv > 0] = 0 + return f0 + + +def norm_interp_f0(f0, hparams): + is_torch = isinstance(f0, torch.Tensor) + if is_torch: + device = f0.device + f0 = f0.data.cpu().numpy() + uv = f0 == 0 + f0 = norm_f0(f0, uv, hparams) + if sum(uv) == len(f0): + f0[uv] = 0 + elif sum(uv) > 0: + f0[uv] = np.interp(np.where(uv)[0], np.where(~uv)[0], f0[~uv]) + uv = torch.FloatTensor(uv) + f0 = torch.FloatTensor(f0) + if is_torch: + f0 = f0.to(device) + return f0, uv + + +def denorm_f0(f0, uv, hparams, pitch_padding=None, min=None, max=None): + if hparams['pitch_norm'] == 'standard': + f0 = f0 * hparams['f0_std'] + hparams['f0_mean'] + if hparams['pitch_norm'] == 'log': + f0 = 2 ** f0 + if min is not None: + f0 = f0.clamp(min=min) + if max is not None: + f0 = f0.clamp(max=max) + if uv is not None and hparams['use_uv']: + f0[uv > 0] = 0 + if pitch_padding is not None: + f0[pitch_padding] = 0 + return f0 diff --git a/utils/pl_utils.py b/utils/pl_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f375637e83f07b3406e788026da2971d510540e7 --- /dev/null +++ b/utils/pl_utils.py @@ -0,0 +1,1625 @@ +import matplotlib +from torch.nn import DataParallel +from torch.nn.parallel import DistributedDataParallel + +matplotlib.use('Agg') +import glob +import itertools +import subprocess +import threading +import traceback + +from pytorch_lightning.callbacks import GradientAccumulationScheduler +from pytorch_lightning.callbacks import ModelCheckpoint + +from functools import wraps +from torch.cuda._utils import _get_device_index +import numpy as np +import torch.optim +import torch.utils.data +import copy +import logging +import os +import re +import sys +import torch +import torch.distributed as dist +import torch.multiprocessing as mp +import tqdm +from torch.optim.optimizer import Optimizer + + +def get_a_var(obj): # pragma: no cover + if isinstance(obj, torch.Tensor): + return obj + + if isinstance(obj, list) or isinstance(obj, tuple): + for result in map(get_a_var, obj): + if isinstance(result, torch.Tensor): + return result + if isinstance(obj, dict): + for result in map(get_a_var, obj.items()): + if isinstance(result, torch.Tensor): + return result + return None + + +def data_loader(fn): + """ + Decorator to make any fx with this use the lazy property + :param fn: + :return: + """ + + wraps(fn) + attr_name = '_lazy_' + fn.__name__ + + def _get_data_loader(self): + try: + value = getattr(self, attr_name) + except AttributeError: + try: + value = fn(self) # Lazy evaluation, done only once. + if ( + value is not None and + not isinstance(value, list) and + fn.__name__ in ['test_dataloader', 'val_dataloader'] + ): + value = [value] + except AttributeError as e: + # Guard against AttributeError suppression. (Issue #142) + traceback.print_exc() + error = f'{fn.__name__}: An AttributeError was encountered: ' + str(e) + raise RuntimeError(error) from e + setattr(self, attr_name, value) # Memoize evaluation. + return value + + return _get_data_loader + + +def parallel_apply(modules, inputs, kwargs_tup=None, devices=None): # pragma: no cover + r"""Applies each `module` in :attr:`modules` in parallel on arguments + contained in :attr:`inputs` (positional) and :attr:`kwargs_tup` (keyword) + on each of :attr:`devices`. + + Args: + modules (Module): modules to be parallelized + inputs (tensor): inputs to the modules + devices (list of int or torch.device): CUDA devices + + :attr:`modules`, :attr:`inputs`, :attr:`kwargs_tup` (if given), and + :attr:`devices` (if given) should all have same length. Moreover, each + element of :attr:`inputs` can either be a single object as the only argument + to a module, or a collection of positional arguments. + """ + assert len(modules) == len(inputs) + if kwargs_tup is not None: + assert len(modules) == len(kwargs_tup) + else: + kwargs_tup = ({},) * len(modules) + if devices is not None: + assert len(modules) == len(devices) + else: + devices = [None] * len(modules) + devices = list(map(lambda x: _get_device_index(x, True), devices)) + lock = threading.Lock() + results = {} + grad_enabled = torch.is_grad_enabled() + + def _worker(i, module, input, kwargs, device=None): + torch.set_grad_enabled(grad_enabled) + if device is None: + device = get_a_var(input).get_device() + try: + with torch.cuda.device(device): + # this also avoids accidental slicing of `input` if it is a Tensor + if not isinstance(input, (list, tuple)): + input = (input,) + + # --------------- + # CHANGE + if module.training: + output = module.training_step(*input, **kwargs) + + elif module.testing: + output = module.test_step(*input, **kwargs) + + else: + output = module.validation_step(*input, **kwargs) + # --------------- + + with lock: + results[i] = output + except Exception as e: + with lock: + results[i] = e + + # make sure each module knows what training state it's in... + # fixes weird bug where copies are out of sync + root_m = modules[0] + for m in modules[1:]: + m.training = root_m.training + m.testing = root_m.testing + + if len(modules) > 1: + threads = [threading.Thread(target=_worker, + args=(i, module, input, kwargs, device)) + for i, (module, input, kwargs, device) in + enumerate(zip(modules, inputs, kwargs_tup, devices))] + + for thread in threads: + thread.start() + for thread in threads: + thread.join() + else: + _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0]) + + outputs = [] + for i in range(len(inputs)): + output = results[i] + if isinstance(output, Exception): + raise output + outputs.append(output) + return outputs + + +def _find_tensors(obj): # pragma: no cover + r""" + Recursively find all tensors contained in the specified object. + """ + if isinstance(obj, torch.Tensor): + return [obj] + if isinstance(obj, (list, tuple)): + return itertools.chain(*map(_find_tensors, obj)) + if isinstance(obj, dict): + return itertools.chain(*map(_find_tensors, obj.values())) + return [] + + +class DDP(DistributedDataParallel): + """ + Override the forward call in lightning so it goes to training and validation step respectively + """ + + def parallel_apply(self, replicas, inputs, kwargs): + return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) + + def forward(self, *inputs, **kwargs): # pragma: no cover + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + # -------------- + # LIGHTNING MOD + # -------------- + # normal + # output = self.module(*inputs[0], **kwargs[0]) + # lightning + if self.module.training: + output = self.module.training_step(*inputs[0], **kwargs[0]) + elif self.module.testing: + output = self.module.test_step(*inputs[0], **kwargs[0]) + else: + output = self.module.validation_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply(self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + # normal + output = self.module(*inputs, **kwargs) + + if torch.is_grad_enabled(): + # We'll return the output object verbatim since it is a freeform + # object. We need to find any tensors in this object, though, + # because we need to figure out which parameters were used during + # this forward pass, to ensure we short circuit reduction for any + # unused parameters. Only if `find_unused_parameters` is set. + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + return output + + +class DP(DataParallel): + """ + Override the forward call in lightning so it goes to training and validation step respectively + """ + + def forward(self, *inputs, **kwargs): + if not self.device_ids: + return self.module(*inputs, **kwargs) + + for t in itertools.chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError("module must have its parameters and buffers " + "on device {} (device_ids[0]) but found one of " + "them on device: {}".format(self.src_device_obj, t.device)) + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + # lightning + if self.module.training: + return self.module.training_step(*inputs[0], **kwargs[0]) + elif self.module.testing: + return self.module.test_step(*inputs[0], **kwargs[0]) + else: + return self.module.validation_step(*inputs[0], **kwargs[0]) + + replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) + outputs = self.parallel_apply(replicas, inputs, kwargs) + return self.gather(outputs, self.output_device) + + def parallel_apply(self, replicas, inputs, kwargs): + return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) + + +class GradientAccumulationScheduler: + def __init__(self, scheduling: dict): + if scheduling == {}: # empty dict error + raise TypeError("Empty dict cannot be interpreted correct") + + for key in scheduling.keys(): + if not isinstance(key, int) or not isinstance(scheduling[key], int): + raise TypeError("All epoches and accumulation factor must be integers") + + minimal_epoch = min(scheduling.keys()) + if minimal_epoch < 1: + msg = f"Epochs indexing from 1, epoch {minimal_epoch} cannot be interpreted correct" + raise IndexError(msg) + elif minimal_epoch != 1: # if user didnt define first epoch accumulation factor + scheduling.update({1: 1}) + + self.scheduling = scheduling + self.epochs = sorted(scheduling.keys()) + + def on_epoch_begin(self, epoch, trainer): + epoch += 1 # indexing epochs from 1 + for i in reversed(range(len(self.epochs))): + if epoch >= self.epochs[i]: + trainer.accumulate_grad_batches = self.scheduling.get(self.epochs[i]) + break + + +class LatestModelCheckpoint(ModelCheckpoint): + def __init__(self, filepath, monitor='val_loss', verbose=0, num_ckpt_keep=5, + save_weights_only=False, mode='auto', period=1, prefix='model', save_best=True): + super(ModelCheckpoint, self).__init__() + self.monitor = monitor + self.verbose = verbose + self.filepath = filepath + os.makedirs(filepath, exist_ok=True) + self.num_ckpt_keep = num_ckpt_keep + self.save_best = save_best + self.save_weights_only = save_weights_only + self.period = period + self.epochs_since_last_check = 0 + self.prefix = prefix + self.best_k_models = {} + # {filename: monitor} + self.kth_best_model = '' + self.save_top_k = 1 + self.task = None + if mode == 'min': + self.monitor_op = np.less + self.best = np.Inf + self.mode = 'min' + elif mode == 'max': + self.monitor_op = np.greater + self.best = -np.Inf + self.mode = 'max' + else: + if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): + self.monitor_op = np.greater + self.best = -np.Inf + self.mode = 'max' + else: + self.monitor_op = np.less + self.best = np.Inf + self.mode = 'min' + if os.path.exists(f'{self.filepath}/best_valid.npy'): + self.best = np.load(f'{self.filepath}/best_valid.npy')[0] + + def get_all_ckpts(self): + return sorted(glob.glob(f'{self.filepath}/{self.prefix}_ckpt_steps_*.ckpt'), + key=lambda x: -int(re.findall('.*steps\_(\d+)\.ckpt', x)[0])) + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + self.epochs_since_last_check += 1 + best_filepath = f'{self.filepath}/{self.prefix}_ckpt_best.pt' + if self.epochs_since_last_check >= self.period: + self.epochs_since_last_check = 0 + filepath = f'{self.filepath}/{self.prefix}_ckpt_steps_{self.task.global_step}.ckpt' + if self.verbose > 0: + logging.info(f'Epoch {epoch:05d}@{self.task.global_step}: saving model to {filepath}') + self._save_model(filepath) + for old_ckpt in self.get_all_ckpts()[self.num_ckpt_keep:]: + # TODO: test filesystem calls + os.remove(old_ckpt) + # subprocess.check_call(f'del "{old_ckpt}"', shell=True) + if self.verbose > 0: + logging.info(f'Delete ckpt: {os.path.basename(old_ckpt)}') + current = logs.get(self.monitor) + if current is not None and self.save_best: + if self.monitor_op(current, self.best): + self.best = current + if self.verbose > 0: + logging.info( + f'Epoch {epoch:05d}@{self.task.global_step}: {self.monitor} reached' + f' {current:0.5f} (best {self.best:0.5f}), saving model to' + f' {best_filepath} as top 1') + self._save_model(best_filepath) + np.save(f'{self.filepath}/best_valid.npy', [self.best]) + + def _save_model(self,path): + return self.save_function(path) + + + +class BaseTrainer: + def __init__( + self, + logger=True, + checkpoint_callback=True, + default_save_path=None, + gradient_clip_val=0, + process_position=0, + gpus=-1, + log_gpu_memory=None, + show_progress_bar=True, + track_grad_norm=-1, + check_val_every_n_epoch=1, + accumulate_grad_batches=1, + max_updates=1000, + min_epochs=1, + val_check_interval=1.0, + log_save_interval=100, + row_log_interval=10, + print_nan_grads=False, + weights_summary='full', + num_sanity_val_steps=5, + resume_from_checkpoint=None, + ): + self.log_gpu_memory = log_gpu_memory + self.gradient_clip_val = gradient_clip_val + self.check_val_every_n_epoch = check_val_every_n_epoch + self.track_grad_norm = track_grad_norm + self.on_gpu = True if (gpus and torch.cuda.is_available()) else False + self.process_position = process_position + self.weights_summary = weights_summary + self.max_updates = max_updates + self.min_epochs = min_epochs + self.num_sanity_val_steps = num_sanity_val_steps + self.print_nan_grads = print_nan_grads + self.resume_from_checkpoint = resume_from_checkpoint + self.default_save_path = default_save_path + + # training bookeeping + self.total_batch_idx = 0 + self.running_loss = [] + self.avg_loss = 0 + self.batch_idx = 0 + self.tqdm_metrics = {} + self.callback_metrics = {} + self.num_val_batches = 0 + self.num_training_batches = 0 + self.num_test_batches = 0 + self.get_train_dataloader = None + self.get_test_dataloaders = None + self.get_val_dataloaders = None + self.is_iterable_train_dataloader = False + + # training state + self.model = None + self.testing = False + self.disable_validation = False + self.lr_schedulers = [] + self.optimizers = None + self.global_step = 0 + self.current_epoch = 0 + self.total_batches = 0 + + # configure checkpoint callback + self.checkpoint_callback = checkpoint_callback + self.checkpoint_callback.save_function = self.save_checkpoint + self.weights_save_path = self.checkpoint_callback.filepath + + # accumulated grads + self.configure_accumulated_gradients(accumulate_grad_batches) + + # allow int, string and gpu list + self.data_parallel_device_ids = [ + int(x) for x in os.environ.get("CUDA_VISIBLE_DEVICES", "").split(",") if x != ''] + if len(self.data_parallel_device_ids) == 0: + self.root_gpu = None + self.on_gpu = False + else: + self.root_gpu = self.data_parallel_device_ids[0] + self.on_gpu = True + + # distributed backend choice + self.use_ddp = False + self.use_dp = False + self.single_gpu = False + self.distributed_backend = 'ddp' if self.num_gpus > 0 else 'dp' + self.set_distributed_mode(self.distributed_backend) + + self.proc_rank = 0 + self.world_size = 1 + self.node_rank = 0 + + # can't init progress bar here because starting a new process + # means the progress_bar won't survive pickling + self.show_progress_bar = show_progress_bar + + # logging + self.log_save_interval = log_save_interval + self.val_check_interval = val_check_interval + self.logger = logger + self.logger.rank = 0 + self.row_log_interval = row_log_interval + + @property + def num_gpus(self): + gpus = self.data_parallel_device_ids + if gpus is None: + return 0 + else: + return len(gpus) + + @property + def data_parallel(self): + return self.use_dp or self.use_ddp + + def get_model(self): + is_dp_module = isinstance(self.model, (DDP, DP)) + model = self.model.module if is_dp_module else self.model + return model + + # ----------------------------- + # MODEL TRAINING + # ----------------------------- + def fit(self, model): + if self.use_ddp: + mp.spawn(self.ddp_train, nprocs=self.num_gpus, args=(model,)) + else: + model.model = model.build_model() + if not self.testing: + self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers()) + if self.use_dp: + model.cuda(self.root_gpu) + model = DP(model, device_ids=self.data_parallel_device_ids) + elif self.single_gpu: + model.cuda(self.root_gpu) + self.run_pretrain_routine(model) + return 1 + + def init_optimizers(self, optimizers): + + # single optimizer + if isinstance(optimizers, Optimizer): + return [optimizers], [] + + # two lists + elif len(optimizers) == 2 and isinstance(optimizers[0], list): + optimizers, lr_schedulers = optimizers + return optimizers, lr_schedulers + + # single list or tuple + elif isinstance(optimizers, list) or isinstance(optimizers, tuple): + return optimizers, [] + + def run_pretrain_routine(self, model): + """Sanity check a few things before starting actual training. + + :param model: + """ + ref_model = model + if self.data_parallel: + ref_model = model.module + + # give model convenience properties + ref_model.trainer = self + + # set local properties on the model + self.copy_trainer_model_properties(ref_model) + + # link up experiment object + if self.logger is not None: + ref_model.logger = self.logger + self.logger.save() + + if self.use_ddp: + dist.barrier() + + # set up checkpoint callback + # self.configure_checkpoint_callback() + + # transfer data loaders from model + self.get_dataloaders(ref_model) + + # track model now. + # if cluster resets state, the model will update with the saved weights + self.model = model + + # restore training and model before hpc call + self.restore_weights(model) + + # when testing requested only run test and return + if self.testing: + self.run_evaluation(test=True) + return + + # check if we should run validation during training + self.disable_validation = self.num_val_batches == 0 + + # run tiny validation (if validation defined) + # to make sure program won't crash during val + ref_model.on_sanity_check_start() + ref_model.on_train_start() + if not self.disable_validation and self.num_sanity_val_steps > 0: + # init progress bars for validation sanity check + pbar = tqdm.tqdm(desc='Validation sanity check', + total=self.num_sanity_val_steps * len(self.get_val_dataloaders()), + leave=False, position=2 * self.process_position, + disable=not self.show_progress_bar, dynamic_ncols=True, unit='batch') + self.main_progress_bar = pbar + # dummy validation progress bar + self.val_progress_bar = tqdm.tqdm(disable=True) + + self.evaluate(model, self.get_val_dataloaders(), self.num_sanity_val_steps, self.testing) + + # close progress bars + self.main_progress_bar.close() + self.val_progress_bar.close() + + # init progress bar + pbar = tqdm.tqdm(leave=True, position=2 * self.process_position, + disable=not self.show_progress_bar, dynamic_ncols=True, unit='batch', + file=sys.stdout) + self.main_progress_bar = pbar + + # clear cache before training + if self.on_gpu: + torch.cuda.empty_cache() + + # CORE TRAINING LOOP + self.train() + + def test(self, model): + self.testing = True + self.fit(model) + + @property + def training_tqdm_dict(self): + tqdm_dict = { + 'step': '{}'.format(self.global_step), + } + tqdm_dict.update(self.tqdm_metrics) + return tqdm_dict + + # -------------------- + # restore ckpt + # -------------------- + def restore_weights(self, model): + """ + To restore weights we have two cases. + First, attempt to restore hpc weights. If successful, don't restore + other weights. + + Otherwise, try to restore actual weights + :param model: + :return: + """ + # clear cache before restore + if self.on_gpu: + torch.cuda.empty_cache() + + if self.resume_from_checkpoint is not None: + self.restore(self.resume_from_checkpoint, on_gpu=self.on_gpu) + else: + # restore weights if same exp version + self.restore_state_if_checkpoint_exists(model) + + # wait for all models to restore weights + if self.use_ddp: + # wait for all processes to catch up + dist.barrier() + + # clear cache after restore + if self.on_gpu: + torch.cuda.empty_cache() + + def restore_state_if_checkpoint_exists(self, model): + did_restore = False + + # do nothing if there's not dir or callback + no_ckpt_callback = (self.checkpoint_callback is None) or (not self.checkpoint_callback) + if no_ckpt_callback or not os.path.exists(self.checkpoint_callback.filepath): + return did_restore + + # restore trainer state and model if there is a weight for this experiment + last_steps = -1 + last_ckpt_name = None + + # find last epoch + checkpoints = os.listdir(self.checkpoint_callback.filepath) + for name in checkpoints: + if '.ckpt' in name and not name.endswith('part'): + if 'steps_' in name: + steps = name.split('steps_')[1] + steps = int(re.sub('[^0-9]', '', steps)) + + if steps > last_steps: + last_steps = steps + last_ckpt_name = name + + # restore last checkpoint + if last_ckpt_name is not None: + last_ckpt_path = os.path.join(self.checkpoint_callback.filepath, last_ckpt_name) + self.restore(last_ckpt_path, self.on_gpu) + logging.info(f'model and trainer restored from checkpoint: {last_ckpt_path}') + did_restore = True + + return did_restore + + def restore(self, checkpoint_path, on_gpu): + checkpoint = torch.load(checkpoint_path, map_location='cpu') + + # load model state + model = self.get_model() + + # load the state_dict on the model automatically + model.load_state_dict(checkpoint['state_dict'], strict=False) + if on_gpu: + model.cuda(self.root_gpu) + # load training state (affects trainer only) + self.restore_training_state(checkpoint) + model.global_step = self.global_step + del checkpoint + + try: + if dist.is_initialized() and dist.get_rank() > 0: + return + except Exception as e: + print(e) + return + + def restore_training_state(self, checkpoint): + """ + Restore trainer state. + Model will get its change to update + :param checkpoint: + :return: + """ + if self.checkpoint_callback is not None and self.checkpoint_callback is not False: + # return allowing checkpoints with meta information (global_step, etc) + self.checkpoint_callback.best = checkpoint['checkpoint_callback_best'] + + self.global_step = checkpoint['global_step'] + self.current_epoch = checkpoint['epoch'] + + if self.testing: + return + + # restore the optimizers + optimizer_states = checkpoint['optimizer_states'] + for optimizer, opt_state in zip(self.optimizers, optimizer_states): + if optimizer is None: + return + optimizer.load_state_dict(opt_state) + + # move optimizer to GPU 1 weight at a time + # avoids OOM + if self.root_gpu is not None: + for state in optimizer.state.values(): + for k, v in state.items(): + if isinstance(v, torch.Tensor): + state[k] = v.cuda(self.root_gpu) + + # restore the lr schedulers + lr_schedulers = checkpoint['lr_schedulers'] + for scheduler, lrs_state in zip(self.lr_schedulers, lr_schedulers): + scheduler.load_state_dict(lrs_state) + + # -------------------- + # MODEL SAVE CHECKPOINT + # -------------------- + def _atomic_save(self, checkpoint, filepath): + """Saves a checkpoint atomically, avoiding the creation of incomplete checkpoints. + + This will create a temporary checkpoint with a suffix of ``.part``, then copy it to the final location once + saving is finished. + + Args: + checkpoint (object): The object to save. + Built to be used with the ``dump_checkpoint`` method, but can deal with anything which ``torch.save`` + accepts. + filepath (str|pathlib.Path): The path to which the checkpoint will be saved. + This points to the file that the checkpoint will be stored in. + """ + tmp_path = str(filepath) + ".part" + torch.save(checkpoint, tmp_path) + os.replace(tmp_path, filepath) + + def save_checkpoint(self, filepath): + checkpoint = self.dump_checkpoint() + self._atomic_save(checkpoint, filepath) + + def dump_checkpoint(self): + + checkpoint = { + 'epoch': self.current_epoch, + 'global_step': self.global_step + } + + if self.checkpoint_callback is not None and self.checkpoint_callback is not False: + checkpoint['checkpoint_callback_best'] = self.checkpoint_callback.best + + # save optimizers + optimizer_states = [] + for i, optimizer in enumerate(self.optimizers): + if optimizer is not None: + optimizer_states.append(optimizer.state_dict()) + + checkpoint['optimizer_states'] = optimizer_states + + # save lr schedulers + lr_schedulers = [] + for i, scheduler in enumerate(self.lr_schedulers): + lr_schedulers.append(scheduler.state_dict()) + + checkpoint['lr_schedulers'] = lr_schedulers + + # add the hparams and state_dict from the model + model = self.get_model() + checkpoint['state_dict'] = model.state_dict() + # give the model a chance to add a few things + model.on_save_checkpoint(checkpoint) + + return checkpoint + + def copy_trainer_model_properties(self, model): + if isinstance(model, DP): + ref_model = model.module + elif isinstance(model, DDP): + ref_model = model.module + else: + ref_model = model + + for m in [model, ref_model]: + m.trainer = self + m.on_gpu = self.on_gpu + m.use_dp = self.use_dp + m.use_ddp = self.use_ddp + m.testing = self.testing + m.single_gpu = self.single_gpu + + def transfer_batch_to_gpu(self, batch, gpu_id): + # base case: object can be directly moved using `cuda` or `to` + if callable(getattr(batch, 'cuda', None)): + return batch.cuda(gpu_id, non_blocking=True) + + elif callable(getattr(batch, 'to', None)): + return batch.to(torch.device('cuda', gpu_id), non_blocking=True) + + # when list + elif isinstance(batch, list): + for i, x in enumerate(batch): + batch[i] = self.transfer_batch_to_gpu(x, gpu_id) + return batch + + # when tuple + elif isinstance(batch, tuple): + batch = list(batch) + for i, x in enumerate(batch): + batch[i] = self.transfer_batch_to_gpu(x, gpu_id) + return tuple(batch) + + # when dict + elif isinstance(batch, dict): + for k, v in batch.items(): + batch[k] = self.transfer_batch_to_gpu(v, gpu_id) + + return batch + + # nothing matches, return the value as is without transform + return batch + + def set_distributed_mode(self, distributed_backend): + # skip for CPU + if self.num_gpus == 0: + return + + # single GPU case + # in single gpu case we allow ddp so we can train on multiple + # nodes, 1 gpu per node + elif self.num_gpus == 1: + self.single_gpu = True + self.use_dp = False + self.use_ddp = False + self.root_gpu = 0 + self.data_parallel_device_ids = [0] + else: + if distributed_backend is not None: + self.use_dp = distributed_backend == 'dp' + self.use_ddp = distributed_backend == 'ddp' + elif distributed_backend is None: + self.use_dp = True + self.use_ddp = False + + logging.info(f'gpu available: {torch.cuda.is_available()}, used: {self.on_gpu}') + + def ddp_train(self, gpu_idx, model): + """ + Entry point into a DP thread + :param gpu_idx: + :param model: + :param cluster_obj: + :return: + """ + # otherwise default to node rank 0 + self.node_rank = 0 + + # show progressbar only on progress_rank 0 + self.show_progress_bar = self.show_progress_bar and self.node_rank == 0 and gpu_idx == 0 + + # determine which process we are and world size + if self.use_ddp: + self.proc_rank = self.node_rank * self.num_gpus + gpu_idx + self.world_size = self.num_gpus + + # let the exp know the rank to avoid overwriting logs + if self.logger is not None: + self.logger.rank = self.proc_rank + + # set up server using proc 0's ip address + # try to init for 20 times at max in case ports are taken + # where to store ip_table + model.trainer = self + model.init_ddp_connection(self.proc_rank, self.world_size) + + # CHOOSE OPTIMIZER + # allow for lr schedulers as well + model.model = model.build_model() + if not self.testing: + self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers()) + + # MODEL + # copy model to each gpu + if self.distributed_backend == 'ddp': + torch.cuda.set_device(gpu_idx) + model.cuda(gpu_idx) + + # set model properties before going into wrapper + self.copy_trainer_model_properties(model) + + # override root GPU + self.root_gpu = gpu_idx + + if self.distributed_backend == 'ddp': + device_ids = [gpu_idx] + else: + device_ids = None + + # allow user to configure ddp + model = model.configure_ddp(model, device_ids) + + # continue training routine + self.run_pretrain_routine(model) + + def resolve_root_node_address(self, root_node): + if '[' in root_node: + name = root_node.split('[')[0] + number = root_node.split(',')[0] + if '-' in number: + number = number.split('-')[0] + + number = re.sub('[^0-9]', '', number) + root_node = name + number + + return root_node + + def log_metrics(self, metrics, grad_norm_dic, step=None): + """Logs the metric dict passed in. + + :param metrics: + :param grad_norm_dic: + """ + # added metrics by Lightning for convenience + metrics['epoch'] = self.current_epoch + + # add norms + metrics.update(grad_norm_dic) + + # turn all tensors to scalars + scalar_metrics = self.metrics_to_scalars(metrics) + + step = step if step is not None else self.global_step + # log actual metrics + if self.proc_rank == 0 and self.logger is not None: + self.logger.log_metrics(scalar_metrics, step=step) + self.logger.save() + + def add_tqdm_metrics(self, metrics): + for k, v in metrics.items(): + if type(v) is torch.Tensor: + v = v.item() + + self.tqdm_metrics[k] = v + + def metrics_to_scalars(self, metrics): + new_metrics = {} + for k, v in metrics.items(): + if isinstance(v, torch.Tensor): + v = v.item() + + if type(v) is dict: + v = self.metrics_to_scalars(v) + + new_metrics[k] = v + + return new_metrics + + def process_output(self, output, train=False): + """Reduces output according to the training mode. + + Separates loss from logging and tqdm metrics + :param output: + :return: + """ + # --------------- + # EXTRACT CALLBACK KEYS + # --------------- + # all keys not progress_bar or log are candidates for callbacks + callback_metrics = {} + for k, v in output.items(): + if k not in ['progress_bar', 'log', 'hiddens']: + callback_metrics[k] = v + + if train and self.use_dp: + num_gpus = self.num_gpus + callback_metrics = self.reduce_distributed_output(callback_metrics, num_gpus) + + for k, v in callback_metrics.items(): + if isinstance(v, torch.Tensor): + callback_metrics[k] = v.item() + + # --------------- + # EXTRACT PROGRESS BAR KEYS + # --------------- + try: + progress_output = output['progress_bar'] + + # reduce progress metrics for tqdm when using dp + if train and self.use_dp: + num_gpus = self.num_gpus + progress_output = self.reduce_distributed_output(progress_output, num_gpus) + + progress_bar_metrics = progress_output + except Exception: + progress_bar_metrics = {} + + # --------------- + # EXTRACT LOGGING KEYS + # --------------- + # extract metrics to log to experiment + try: + log_output = output['log'] + + # reduce progress metrics for tqdm when using dp + if train and self.use_dp: + num_gpus = self.num_gpus + log_output = self.reduce_distributed_output(log_output, num_gpus) + + log_metrics = log_output + except Exception: + log_metrics = {} + + # --------------- + # EXTRACT LOSS + # --------------- + # if output dict doesn't have the keyword loss + # then assume the output=loss if scalar + loss = None + if train: + try: + loss = output['loss'] + except Exception: + if type(output) is torch.Tensor: + loss = output + else: + raise RuntimeError( + 'No `loss` value in the dictionary returned from `model.training_step()`.' + ) + + # when using dp need to reduce the loss + if self.use_dp: + loss = self.reduce_distributed_output(loss, self.num_gpus) + + # --------------- + # EXTRACT HIDDEN + # --------------- + hiddens = output.get('hiddens') + + # use every metric passed in as a candidate for callback + callback_metrics.update(progress_bar_metrics) + callback_metrics.update(log_metrics) + + # convert tensors to numpy + for k, v in callback_metrics.items(): + if isinstance(v, torch.Tensor): + callback_metrics[k] = v.item() + + return loss, progress_bar_metrics, log_metrics, callback_metrics, hiddens + + def reduce_distributed_output(self, output, num_gpus): + if num_gpus <= 1: + return output + + # when using DP, we get one output per gpu + # average outputs and return + if type(output) is torch.Tensor: + return output.mean() + + for k, v in output.items(): + # recurse on nested dics + if isinstance(output[k], dict): + output[k] = self.reduce_distributed_output(output[k], num_gpus) + + # do nothing when there's a scalar + elif isinstance(output[k], torch.Tensor) and output[k].dim() == 0: + pass + + # reduce only metrics that have the same number of gpus + elif output[k].size(0) == num_gpus: + reduced = torch.mean(output[k]) + output[k] = reduced + return output + + def clip_gradients(self): + if self.gradient_clip_val > 0: + model = self.get_model() + torch.nn.utils.clip_grad_norm_(model.parameters(), self.gradient_clip_val) + + def print_nan_gradients(self): + model = self.get_model() + for param in model.parameters(): + if (param.grad is not None) and torch.isnan(param.grad.float()).any(): + logging.info(param, param.grad) + + def configure_accumulated_gradients(self, accumulate_grad_batches): + self.accumulate_grad_batches = None + + if isinstance(accumulate_grad_batches, dict): + self.accumulation_scheduler = GradientAccumulationScheduler(accumulate_grad_batches) + elif isinstance(accumulate_grad_batches, int): + schedule = {1: accumulate_grad_batches} + self.accumulation_scheduler = GradientAccumulationScheduler(schedule) + else: + raise TypeError("Gradient accumulation supports only int and dict types") + + def get_dataloaders(self, model): + if not self.testing: + self.init_train_dataloader(model) + self.init_val_dataloader(model) + else: + self.init_test_dataloader(model) + + if self.use_ddp: + dist.barrier() + if not self.testing: + self.get_train_dataloader() + self.get_val_dataloaders() + else: + self.get_test_dataloaders() + + def init_train_dataloader(self, model): + self.fisrt_epoch = True + self.get_train_dataloader = model.train_dataloader + if isinstance(self.get_train_dataloader(), torch.utils.data.DataLoader): + self.num_training_batches = len(self.get_train_dataloader()) + self.num_training_batches = int(self.num_training_batches) + else: + self.num_training_batches = float('inf') + self.is_iterable_train_dataloader = True + if isinstance(self.val_check_interval, int): + self.val_check_batch = self.val_check_interval + else: + self._percent_range_check('val_check_interval') + self.val_check_batch = int(self.num_training_batches * self.val_check_interval) + self.val_check_batch = max(1, self.val_check_batch) + + def init_val_dataloader(self, model): + self.get_val_dataloaders = model.val_dataloader + self.num_val_batches = 0 + if self.get_val_dataloaders() is not None: + if isinstance(self.get_val_dataloaders()[0], torch.utils.data.DataLoader): + self.num_val_batches = sum(len(dataloader) for dataloader in self.get_val_dataloaders()) + self.num_val_batches = int(self.num_val_batches) + else: + self.num_val_batches = float('inf') + + def init_test_dataloader(self, model): + self.get_test_dataloaders = model.test_dataloader + if self.get_test_dataloaders() is not None: + if isinstance(self.get_test_dataloaders()[0], torch.utils.data.DataLoader): + self.num_test_batches = sum(len(dataloader) for dataloader in self.get_test_dataloaders()) + self.num_test_batches = int(self.num_test_batches) + else: + self.num_test_batches = float('inf') + + def evaluate(self, model, dataloaders, max_batches, test=False): + """Run evaluation code. + + :param model: PT model + :param dataloaders: list of PT dataloaders + :param max_batches: Scalar + :param test: boolean + :return: + """ + # enable eval mode + model.zero_grad() + model.eval() + + # copy properties for forward overrides + self.copy_trainer_model_properties(model) + + # disable gradients to save memory + torch.set_grad_enabled(False) + + if test: + self.get_model().test_start() + # bookkeeping + outputs = [] + + # run training + for dataloader_idx, dataloader in enumerate(dataloaders): + dl_outputs = [] + for batch_idx, batch in enumerate(dataloader): + + if batch is None: # pragma: no cover + continue + + # stop short when on fast_dev_run (sets max_batch=1) + if batch_idx >= max_batches: + break + + # ----------------- + # RUN EVALUATION STEP + # ----------------- + output = self.evaluation_forward(model, + batch, + batch_idx, + dataloader_idx, + test) + + # track outputs for collation + dl_outputs.append(output) + + # batch done + if test: + self.test_progress_bar.update(1) + else: + self.val_progress_bar.update(1) + outputs.append(dl_outputs) + + # with a single dataloader don't pass an array + if len(dataloaders) == 1: + outputs = outputs[0] + + # give model a chance to do something with the outputs (and method defined) + model = self.get_model() + if test: + eval_results_ = model.test_end(outputs) + else: + eval_results_ = model.validation_end(outputs) + eval_results = eval_results_ + + # enable train mode again + model.train() + + # enable gradients to save memory + torch.set_grad_enabled(True) + + return eval_results + + def run_evaluation(self, test=False): + # when testing make sure user defined a test step + model = self.get_model() + model.on_pre_performance_check() + + # select dataloaders + if test: + dataloaders = self.get_test_dataloaders() + max_batches = self.num_test_batches + else: + # val + dataloaders = self.get_val_dataloaders() + max_batches = self.num_val_batches + + # init validation or test progress bar + # main progress bar will already be closed when testing so initial position is free + position = 2 * self.process_position + (not test) + desc = 'Testing' if test else 'Validating' + pbar = tqdm.tqdm(desc=desc, total=max_batches, leave=test, position=position, + disable=not self.show_progress_bar, dynamic_ncols=True, + unit='batch', file=sys.stdout) + setattr(self, f'{"test" if test else "val"}_progress_bar', pbar) + + # run evaluation + eval_results = self.evaluate(self.model, + dataloaders, + max_batches, + test) + if eval_results is not None: + _, prog_bar_metrics, log_metrics, callback_metrics, _ = self.process_output( + eval_results) + + # add metrics to prog bar + self.add_tqdm_metrics(prog_bar_metrics) + + # log metrics + self.log_metrics(log_metrics, {}) + + # track metrics for callbacks + self.callback_metrics.update(callback_metrics) + + # hook + model.on_post_performance_check() + + # add model specific metrics + tqdm_metrics = self.training_tqdm_dict + if not test: + self.main_progress_bar.set_postfix(**tqdm_metrics) + + # close progress bar + if test: + self.test_progress_bar.close() + else: + self.val_progress_bar.close() + + # model checkpointing + if self.proc_rank == 0 and self.checkpoint_callback is not None and not test: + self.checkpoint_callback.on_epoch_end(epoch=self.current_epoch, + logs=self.callback_metrics) + + def evaluation_forward(self, model, batch, batch_idx, dataloader_idx, test=False): + # make dataloader_idx arg in validation_step optional + args = [batch, batch_idx] + # print(batch) + if test and len(self.get_test_dataloaders()) > 1: + args.append(dataloader_idx) + + elif not test and len(self.get_val_dataloaders()) > 1: + args.append(dataloader_idx) + + # handle DP, DDP forward + if self.use_ddp or self.use_dp: + output = model(*args) + return output + + # single GPU + if self.single_gpu: + # for single GPU put inputs on gpu manually + root_gpu = 0 + if isinstance(self.data_parallel_device_ids, list): + root_gpu = self.data_parallel_device_ids[0] + batch = self.transfer_batch_to_gpu(batch, root_gpu) + args[0] = batch + + # CPU + if test: + output = model.test_step(*args) + else: + output = model.validation_step(*args) + + return output + + def train(self): + model = self.get_model() + # run all epochs + for epoch in range(self.current_epoch, 1000000): + # set seed for distributed sampler (enables shuffling for each epoch) + if self.use_ddp and hasattr(self.get_train_dataloader().sampler, 'set_epoch'): + self.get_train_dataloader().sampler.set_epoch(epoch) + + # get model + model = self.get_model() + + # update training progress in trainer and model + model.current_epoch = epoch + self.current_epoch = epoch + + total_val_batches = 0 + if not self.disable_validation: + # val can be checked multiple times in epoch + is_val_epoch = (self.current_epoch + 1) % self.check_val_every_n_epoch == 0 + val_checks_per_epoch = self.num_training_batches // self.val_check_batch + val_checks_per_epoch = val_checks_per_epoch if is_val_epoch else 0 + total_val_batches = self.num_val_batches * val_checks_per_epoch + + # total batches includes multiple val checks + self.total_batches = self.num_training_batches + total_val_batches + self.batch_loss_value = 0 # accumulated grads + + if self.is_iterable_train_dataloader: + # for iterable train loader, the progress bar never ends + num_iterations = None + else: + num_iterations = self.total_batches + + # reset progress bar + # .reset() doesn't work on disabled progress bar so we should check + desc = f'Epoch {epoch + 1}' if not self.is_iterable_train_dataloader else '' + self.main_progress_bar.set_description(desc) + + # changing gradient according accumulation_scheduler + self.accumulation_scheduler.on_epoch_begin(epoch, self) + + # ----------------- + # RUN TNG EPOCH + # ----------------- + self.run_training_epoch() + + # update LR schedulers + if self.lr_schedulers is not None: + for lr_scheduler in self.lr_schedulers: + lr_scheduler.step(epoch=self.current_epoch) + + self.main_progress_bar.close() + + model.on_train_end() + + if self.logger is not None: + self.logger.finalize("success") + + def run_training_epoch(self): + # before epoch hook + if self.is_function_implemented('on_epoch_start'): + model = self.get_model() + model.on_epoch_start() + + # run epoch + for batch_idx, batch in enumerate(self.get_train_dataloader()): + # stop epoch if we limited the number of training batches + if batch_idx >= self.num_training_batches: + break + + self.batch_idx = batch_idx + + model = self.get_model() + model.global_step = self.global_step + + # --------------- + # RUN TRAIN STEP + # --------------- + output = self.run_training_batch(batch, batch_idx) + batch_result, grad_norm_dic, batch_step_metrics = output + + # when returning -1 from train_step, we end epoch early + early_stop_epoch = batch_result == -1 + + # --------------- + # RUN VAL STEP + # --------------- + should_check_val = ( + not self.disable_validation and self.global_step % self.val_check_batch == 0 and not self.fisrt_epoch) + self.fisrt_epoch = False + + if should_check_val: + self.run_evaluation(test=self.testing) + + # when logs should be saved + should_save_log = (batch_idx + 1) % self.log_save_interval == 0 or early_stop_epoch + if should_save_log: + if self.proc_rank == 0 and self.logger is not None: + self.logger.save() + + # when metrics should be logged + should_log_metrics = batch_idx % self.row_log_interval == 0 or early_stop_epoch + if should_log_metrics: + # logs user requested information to logger + self.log_metrics(batch_step_metrics, grad_norm_dic) + + self.global_step += 1 + self.total_batch_idx += 1 + + # end epoch early + # stop when the flag is changed or we've gone past the amount + # requested in the batches + if early_stop_epoch: + break + if self.global_step > self.max_updates: + print("| Training end..") + exit() + + # epoch end hook + if self.is_function_implemented('on_epoch_end'): + model = self.get_model() + model.on_epoch_end() + + def run_training_batch(self, batch, batch_idx): + # track grad norms + grad_norm_dic = {} + + # track all metrics for callbacks + all_callback_metrics = [] + + # track metrics to log + all_log_metrics = [] + + if batch is None: + return 0, grad_norm_dic, {} + + # hook + if self.is_function_implemented('on_batch_start'): + model_ref = self.get_model() + response = model_ref.on_batch_start(batch) + + if response == -1: + return -1, grad_norm_dic, {} + + splits = [batch] + self.hiddens = None + for split_idx, split_batch in enumerate(splits): + self.split_idx = split_idx + + # call training_step once per optimizer + for opt_idx, optimizer in enumerate(self.optimizers): + if optimizer is None: + continue + # make sure only the gradients of the current optimizer's paramaters are calculated + # in the training step to prevent dangling gradients in multiple-optimizer setup. + if len(self.optimizers) > 1: + for param in self.get_model().parameters(): + param.requires_grad = False + for group in optimizer.param_groups: + for param in group['params']: + param.requires_grad = True + + # wrap the forward step in a closure so second order methods work + def optimizer_closure(): + # forward pass + output = self.training_forward( + split_batch, batch_idx, opt_idx, self.hiddens) + + closure_loss = output[0] + progress_bar_metrics = output[1] + log_metrics = output[2] + callback_metrics = output[3] + self.hiddens = output[4] + if closure_loss is None: + return None + + # accumulate loss + # (if accumulate_grad_batches = 1 no effect) + closure_loss = closure_loss / self.accumulate_grad_batches + + # backward pass + model_ref = self.get_model() + if closure_loss.requires_grad: + model_ref.backward(closure_loss, optimizer) + + # track metrics for callbacks + all_callback_metrics.append(callback_metrics) + + # track progress bar metrics + self.add_tqdm_metrics(progress_bar_metrics) + all_log_metrics.append(log_metrics) + + # insert after step hook + if self.is_function_implemented('on_after_backward'): + model_ref = self.get_model() + model_ref.on_after_backward() + + return closure_loss + + # calculate loss + loss = optimizer_closure() + if loss is None: + continue + + # nan grads + if self.print_nan_grads: + self.print_nan_gradients() + + # track total loss for logging (avoid mem leaks) + self.batch_loss_value += loss.item() + + # gradient update with accumulated gradients + if (self.batch_idx + 1) % self.accumulate_grad_batches == 0: + + # track gradient norms when requested + if batch_idx % self.row_log_interval == 0: + if self.track_grad_norm > 0: + model = self.get_model() + grad_norm_dic = model.grad_norm( + self.track_grad_norm) + + # clip gradients + self.clip_gradients() + + # calls .step(), .zero_grad() + # override function to modify this behavior + model = self.get_model() + model.optimizer_step(self.current_epoch, batch_idx, optimizer, opt_idx) + + # calculate running loss for display + self.running_loss.append(self.batch_loss_value) + self.batch_loss_value = 0 + self.avg_loss = np.mean(self.running_loss[-100:]) + + # activate batch end hook + if self.is_function_implemented('on_batch_end'): + model = self.get_model() + model.on_batch_end() + + # update progress bar + self.main_progress_bar.update(1) + self.main_progress_bar.set_postfix(**self.training_tqdm_dict) + + # collapse all metrics into one dict + all_log_metrics = {k: v for d in all_log_metrics for k, v in d.items()} + + # track all metrics for callbacks + self.callback_metrics.update({k: v for d in all_callback_metrics for k, v in d.items()}) + + return 0, grad_norm_dic, all_log_metrics + + def training_forward(self, batch, batch_idx, opt_idx, hiddens): + """ + Handle forward for each training case (distributed, single gpu, etc...) + :param batch: + :param batch_idx: + :return: + """ + # --------------- + # FORWARD + # --------------- + # enable not needing to add opt_idx to training_step + args = [batch, batch_idx, opt_idx] + + # distributed forward + if self.use_ddp or self.use_dp: + output = self.model(*args) + # single GPU forward + elif self.single_gpu: + gpu_id = 0 + if isinstance(self.data_parallel_device_ids, list): + gpu_id = self.data_parallel_device_ids[0] + batch = self.transfer_batch_to_gpu(copy.copy(batch), gpu_id) + args[0] = batch + output = self.model.training_step(*args) + # CPU forward + else: + output = self.model.training_step(*args) + + # allow any mode to define training_end + model_ref = self.get_model() + output_ = model_ref.training_end(output) + if output_ is not None: + output = output_ + + # format and reduce outputs accordingly + output = self.process_output(output, train=True) + + return output + + # --------------- + # Utils + # --------------- + def is_function_implemented(self, f_name): + model = self.get_model() + f_op = getattr(model, f_name, None) + return callable(f_op) + + def _percent_range_check(self, name): + value = getattr(self, name) + msg = f"`{name}` must lie in the range [0.0, 1.0], but got {value:.3f}." + if name == "val_check_interval": + msg += " If you want to disable validation set `val_percent_check` to 0.0 instead." + + if not 0. <= value <= 1.: + raise ValueError(msg) diff --git a/utils/plot.py b/utils/plot.py new file mode 100644 index 0000000000000000000000000000000000000000..bdca62a8cd80869c707890cd9febd39966cd3658 --- /dev/null +++ b/utils/plot.py @@ -0,0 +1,56 @@ +import matplotlib.pyplot as plt +import numpy as np +import torch + +LINE_COLORS = ['w', 'r', 'y', 'cyan', 'm', 'b', 'lime'] + + +def spec_to_figure(spec, vmin=None, vmax=None): + if isinstance(spec, torch.Tensor): + spec = spec.cpu().numpy() + fig = plt.figure(figsize=(12, 6)) + plt.pcolor(spec.T, vmin=vmin, vmax=vmax) + return fig + + +def spec_f0_to_figure(spec, f0s, figsize=None): + max_y = spec.shape[1] + if isinstance(spec, torch.Tensor): + spec = spec.detach().cpu().numpy() + f0s = {k: f0.detach().cpu().numpy() for k, f0 in f0s.items()} + f0s = {k: f0 / 10 for k, f0 in f0s.items()} + fig = plt.figure(figsize=(12, 6) if figsize is None else figsize) + plt.pcolor(spec.T) + for i, (k, f0) in enumerate(f0s.items()): + plt.plot(f0.clip(0, max_y), label=k, c=LINE_COLORS[i], linewidth=1, alpha=0.8) + plt.legend() + return fig + + +def dur_to_figure(dur_gt, dur_pred, txt): + dur_gt = dur_gt.long().cpu().numpy() + dur_pred = dur_pred.long().cpu().numpy() + dur_gt = np.cumsum(dur_gt) + dur_pred = np.cumsum(dur_pred) + fig = plt.figure(figsize=(12, 6)) + for i in range(len(dur_gt)): + shift = (i % 8) + 1 + plt.text(dur_gt[i], shift, txt[i]) + plt.text(dur_pred[i], 10 + shift, txt[i]) + plt.vlines(dur_gt[i], 0, 10, colors='b') # blue is gt + plt.vlines(dur_pred[i], 10, 20, colors='r') # red is pred + return fig + + +def f0_to_figure(f0_gt, f0_cwt=None, f0_pred=None): + fig = plt.figure() + f0_gt = f0_gt.cpu().numpy() + plt.plot(f0_gt, color='r', label='gt') + if f0_cwt is not None: + f0_cwt = f0_cwt.cpu().numpy() + plt.plot(f0_cwt, color='b', label='cwt') + if f0_pred is not None: + f0_pred = f0_pred.cpu().numpy() + plt.plot(f0_pred, color='green', label='pred') + plt.legend() + return fig diff --git a/utils/text_encoder.py b/utils/text_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..d9e0758abc7b4e1f452481cba9715df08ceab543 --- /dev/null +++ b/utils/text_encoder.py @@ -0,0 +1,304 @@ +import re +import six +from six.moves import range # pylint: disable=redefined-builtin + +PAD = "" +EOS = "" +UNK = "" +SEG = "|" +RESERVED_TOKENS = [PAD, EOS, UNK] +NUM_RESERVED_TOKENS = len(RESERVED_TOKENS) +PAD_ID = RESERVED_TOKENS.index(PAD) # Normally 0 +EOS_ID = RESERVED_TOKENS.index(EOS) # Normally 1 +UNK_ID = RESERVED_TOKENS.index(UNK) # Normally 2 + +if six.PY2: + RESERVED_TOKENS_BYTES = RESERVED_TOKENS +else: + RESERVED_TOKENS_BYTES = [bytes(PAD, "ascii"), bytes(EOS, "ascii")] + +# Regular expression for unescaping token strings. +# '\u' is converted to '_' +# '\\' is converted to '\' +# '\213;' is converted to unichr(213) +_UNESCAPE_REGEX = re.compile(r"\\u|\\\\|\\([0-9]+);") +_ESCAPE_CHARS = set(u"\\_u;0123456789") + + +def strip_ids(ids, ids_to_strip): + """Strip ids_to_strip from the end ids.""" + ids = list(ids) + while ids and ids[-1] in ids_to_strip: + ids.pop() + return ids + + +class TextEncoder(object): + """Base class for converting from ints to/from human readable strings.""" + + def __init__(self, num_reserved_ids=NUM_RESERVED_TOKENS): + self._num_reserved_ids = num_reserved_ids + + @property + def num_reserved_ids(self): + return self._num_reserved_ids + + def encode(self, s): + """Transform a human-readable string into a sequence of int ids. + + The ids should be in the range [num_reserved_ids, vocab_size). Ids [0, + num_reserved_ids) are reserved. + + EOS is not appended. + + Args: + s: human-readable string to be converted. + + Returns: + ids: list of integers + """ + return [int(w) + self._num_reserved_ids for w in s.split()] + + def decode(self, ids, strip_extraneous=False): + """Transform a sequence of int ids into a human-readable string. + + EOS is not expected in ids. + + Args: + ids: list of integers to be converted. + strip_extraneous: bool, whether to strip off extraneous tokens + (EOS and PAD). + + Returns: + s: human-readable string. + """ + if strip_extraneous: + ids = strip_ids(ids, list(range(self._num_reserved_ids or 0))) + return " ".join(self.decode_list(ids)) + + def decode_list(self, ids): + """Transform a sequence of int ids into a their string versions. + + This method supports transforming individual input/output ids to their + string versions so that sequence to/from text conversions can be visualized + in a human readable format. + + Args: + ids: list of integers to be converted. + + Returns: + strs: list of human-readable string. + """ + decoded_ids = [] + for id_ in ids: + if 0 <= id_ < self._num_reserved_ids: + decoded_ids.append(RESERVED_TOKENS[int(id_)]) + else: + decoded_ids.append(id_ - self._num_reserved_ids) + return [str(d) for d in decoded_ids] + + @property + def vocab_size(self): + raise NotImplementedError() + + +class ByteTextEncoder(TextEncoder): + """Encodes each byte to an id. For 8-bit strings only.""" + + def encode(self, s): + numres = self._num_reserved_ids + if six.PY2: + if isinstance(s, unicode): + s = s.encode("utf-8") + return [ord(c) + numres for c in s] + # Python3: explicitly convert to UTF-8 + return [c + numres for c in s.encode("utf-8")] + + def decode(self, ids, strip_extraneous=False): + if strip_extraneous: + ids = strip_ids(ids, list(range(self._num_reserved_ids or 0))) + numres = self._num_reserved_ids + decoded_ids = [] + int2byte = six.int2byte + for id_ in ids: + if 0 <= id_ < numres: + decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) + else: + decoded_ids.append(int2byte(id_ - numres)) + if six.PY2: + return "".join(decoded_ids) + # Python3: join byte arrays and then decode string + return b"".join(decoded_ids).decode("utf-8", "replace") + + def decode_list(self, ids): + numres = self._num_reserved_ids + decoded_ids = [] + int2byte = six.int2byte + for id_ in ids: + if 0 <= id_ < numres: + decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) + else: + decoded_ids.append(int2byte(id_ - numres)) + # Python3: join byte arrays and then decode string + return decoded_ids + + @property + def vocab_size(self): + return 2**8 + self._num_reserved_ids + + +class ByteTextEncoderWithEos(ByteTextEncoder): + """Encodes each byte to an id and appends the EOS token.""" + + def encode(self, s): + return super(ByteTextEncoderWithEos, self).encode(s) + [EOS_ID] + + +class TokenTextEncoder(TextEncoder): + """Encoder based on a user-supplied vocabulary (file or list).""" + + def __init__(self, + vocab_filename, + reverse=False, + vocab_list=None, + replace_oov=None, + num_reserved_ids=NUM_RESERVED_TOKENS): + """Initialize from a file or list, one token per line. + + Handling of reserved tokens works as follows: + - When initializing from a list, we add reserved tokens to the vocab. + - When initializing from a file, we do not add reserved tokens to the vocab. + - When saving vocab files, we save reserved tokens to the file. + + Args: + vocab_filename: If not None, the full filename to read vocab from. If this + is not None, then vocab_list should be None. + reverse: Boolean indicating if tokens should be reversed during encoding + and decoding. + vocab_list: If not None, a list of elements of the vocabulary. If this is + not None, then vocab_filename should be None. + replace_oov: If not None, every out-of-vocabulary token seen when + encoding will be replaced by this string (which must be in vocab). + num_reserved_ids: Number of IDs to save for reserved tokens like . + """ + super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) + self._reverse = reverse + self._replace_oov = replace_oov + if vocab_filename: + self._init_vocab_from_file(vocab_filename) + else: + assert vocab_list is not None + self._init_vocab_from_list(vocab_list) + self.pad_index = self._token_to_id[PAD] + self.eos_index = self._token_to_id[EOS] + self.unk_index = self._token_to_id[UNK] + self.seg_index = self._token_to_id[SEG] if SEG in self._token_to_id else self.eos_index + + def encode(self, s): + """Converts a space-separated string of tokens to a list of ids.""" + sentence = s + tokens = sentence.strip().split() + if self._replace_oov is not None: + tokens = [t if t in self._token_to_id else self._replace_oov + for t in tokens] + ret = [self._token_to_id[tok] for tok in tokens] + return ret[::-1] if self._reverse else ret + + def decode(self, ids, strip_eos=False, strip_padding=False): + if strip_padding and self.pad() in list(ids): + pad_pos = list(ids).index(self.pad()) + ids = ids[:pad_pos] + if strip_eos and self.eos() in list(ids): + eos_pos = list(ids).index(self.eos()) + ids = ids[:eos_pos] + return " ".join(self.decode_list(ids)) + + def decode_list(self, ids): + seq = reversed(ids) if self._reverse else ids + return [self._safe_id_to_token(i) for i in seq] + + @property + def vocab_size(self): + return len(self._id_to_token) + + def __len__(self): + return self.vocab_size + + def _safe_id_to_token(self, idx): + return self._id_to_token.get(idx, "ID_%d" % idx) + + def _init_vocab_from_file(self, filename): + """Load vocab from a file. + + Args: + filename: The file to load vocabulary from. + """ + with open(filename) as f: + tokens = [token.strip() for token in f.readlines()] + + def token_gen(): + for token in tokens: + yield token + + self._init_vocab(token_gen(), add_reserved_tokens=False) + + def _init_vocab_from_list(self, vocab_list): + """Initialize tokens from a list of tokens. + + It is ok if reserved tokens appear in the vocab list. They will be + removed. The set of tokens in vocab_list should be unique. + + Args: + vocab_list: A list of tokens. + """ + def token_gen(): + for token in vocab_list: + if token not in RESERVED_TOKENS: + yield token + + self._init_vocab(token_gen()) + + def _init_vocab(self, token_generator, add_reserved_tokens=True): + """Initialize vocabulary with tokens from token_generator.""" + + self._id_to_token = {} + non_reserved_start_index = 0 + + if add_reserved_tokens: + self._id_to_token.update(enumerate(RESERVED_TOKENS)) + non_reserved_start_index = len(RESERVED_TOKENS) + + self._id_to_token.update( + enumerate(token_generator, start=non_reserved_start_index)) + + # _token_to_id is the reverse of _id_to_token + self._token_to_id = dict((v, k) + for k, v in six.iteritems(self._id_to_token)) + + def pad(self): + return self.pad_index + + def eos(self): + return self.eos_index + + def unk(self): + return self.unk_index + + def seg(self): + return self.seg_index + + def store_to_file(self, filename): + """Write vocab file to disk. + + Vocab files have one token per line. The file ends in a newline. Reserved + tokens are written to the vocab file as well. + + Args: + filename: Full path of the file to store the vocab to. + """ + with open(filename, "w") as f: + for i in range(len(self._id_to_token)): + f.write(self._id_to_token[i] + "\n") + + def sil_phonemes(self): + return [p for p in self._id_to_token.values() if not p[0].isalpha()] diff --git a/utils/text_norm.py b/utils/text_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..d0973cebc91e0525aeb6657e70012a1d37b5e6ff --- /dev/null +++ b/utils/text_norm.py @@ -0,0 +1,790 @@ +# coding=utf-8 +# Authors: +# 2019.5 Zhiyang Zhou (https://github.com/Joee1995/chn_text_norm.git) +# 2019.9 Jiayu DU +# +# requirements: +# - python 3.X +# notes: python 2.X WILL fail or produce misleading results + +import sys, os, argparse, codecs, string, re + +# ================================================================================ # +# basic constant +# ================================================================================ # +CHINESE_DIGIS = u'零一二三四五六七八九' +BIG_CHINESE_DIGIS_SIMPLIFIED = u'零壹贰叁肆伍陆柒捌玖' +BIG_CHINESE_DIGIS_TRADITIONAL = u'零壹貳參肆伍陸柒捌玖' +SMALLER_BIG_CHINESE_UNITS_SIMPLIFIED = u'十百千万' +SMALLER_BIG_CHINESE_UNITS_TRADITIONAL = u'拾佰仟萬' +LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED = u'亿兆京垓秭穰沟涧正载' +LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL = u'億兆京垓秭穰溝澗正載' +SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED = u'十百千万' +SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL = u'拾佰仟萬' + +ZERO_ALT = u'〇' +ONE_ALT = u'幺' +TWO_ALTS = [u'两', u'兩'] + +POSITIVE = [u'正', u'正'] +NEGATIVE = [u'负', u'負'] +POINT = [u'点', u'點'] +# PLUS = [u'加', u'加'] +# SIL = [u'杠', u'槓'] + +# 中文数字系统类型 +NUMBERING_TYPES = ['low', 'mid', 'high'] + +CURRENCY_NAMES = '(人民币|美元|日元|英镑|欧元|马克|法郎|加拿大元|澳元|港币|先令|芬兰马克|爱尔兰镑|' \ + '里拉|荷兰盾|埃斯库多|比塞塔|印尼盾|林吉特|新西兰元|比索|卢布|新加坡元|韩元|泰铢)' +CURRENCY_UNITS = '((亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)' +COM_QUANTIFIERS = '(匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|' \ + '砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|' \ + '针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|' \ + '毫|厘|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|' \ + '盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|' \ + '纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块)' + +# punctuation information are based on Zhon project (https://github.com/tsroten/zhon.git) +CHINESE_PUNC_STOP = '!?。。' +CHINESE_PUNC_NON_STOP = '"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏' +CHINESE_PUNC_LIST = CHINESE_PUNC_STOP + CHINESE_PUNC_NON_STOP + + +# ================================================================================ # +# basic class +# ================================================================================ # +class ChineseChar(object): + """ + 中文字符 + 每个字符对应简体和繁体, + e.g. 简体 = '负', 繁体 = '負' + 转换时可转换为简体或繁体 + """ + + def __init__(self, simplified, traditional): + self.simplified = simplified + self.traditional = traditional + # self.__repr__ = self.__str__ + + def __str__(self): + return self.simplified or self.traditional or None + + def __repr__(self): + return self.__str__() + + +class ChineseNumberUnit(ChineseChar): + """ + 中文数字/数位字符 + 每个字符除繁简体外还有一个额外的大写字符 + e.g. '陆' 和 '陸' + """ + + def __init__(self, power, simplified, traditional, big_s, big_t): + super(ChineseNumberUnit, self).__init__(simplified, traditional) + self.power = power + self.big_s = big_s + self.big_t = big_t + + def __str__(self): + return '10^{}'.format(self.power) + + @classmethod + def create(cls, index, value, numbering_type=NUMBERING_TYPES[1], small_unit=False): + + if small_unit: + return ChineseNumberUnit(power=index + 1, + simplified=value[0], traditional=value[1], big_s=value[1], big_t=value[1]) + elif numbering_type == NUMBERING_TYPES[0]: + return ChineseNumberUnit(power=index + 8, + simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1]) + elif numbering_type == NUMBERING_TYPES[1]: + return ChineseNumberUnit(power=(index + 2) * 4, + simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1]) + elif numbering_type == NUMBERING_TYPES[2]: + return ChineseNumberUnit(power=pow(2, index + 3), + simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1]) + else: + raise ValueError( + 'Counting type should be in {0} ({1} provided).'.format(NUMBERING_TYPES, numbering_type)) + + +class ChineseNumberDigit(ChineseChar): + """ + 中文数字字符 + """ + + def __init__(self, value, simplified, traditional, big_s, big_t, alt_s=None, alt_t=None): + super(ChineseNumberDigit, self).__init__(simplified, traditional) + self.value = value + self.big_s = big_s + self.big_t = big_t + self.alt_s = alt_s + self.alt_t = alt_t + + def __str__(self): + return str(self.value) + + @classmethod + def create(cls, i, v): + return ChineseNumberDigit(i, v[0], v[1], v[2], v[3]) + + +class ChineseMath(ChineseChar): + """ + 中文数位字符 + """ + + def __init__(self, simplified, traditional, symbol, expression=None): + super(ChineseMath, self).__init__(simplified, traditional) + self.symbol = symbol + self.expression = expression + self.big_s = simplified + self.big_t = traditional + + +CC, CNU, CND, CM = ChineseChar, ChineseNumberUnit, ChineseNumberDigit, ChineseMath + + +class NumberSystem(object): + """ + 中文数字系统 + """ + pass + + +class MathSymbol(object): + """ + 用于中文数字系统的数学符号 (繁/简体), e.g. + positive = ['正', '正'] + negative = ['负', '負'] + point = ['点', '點'] + """ + + def __init__(self, positive, negative, point): + self.positive = positive + self.negative = negative + self.point = point + + def __iter__(self): + for v in self.__dict__.values(): + yield v + + +# class OtherSymbol(object): +# """ +# 其他符号 +# """ +# +# def __init__(self, sil): +# self.sil = sil +# +# def __iter__(self): +# for v in self.__dict__.values(): +# yield v + + +# ================================================================================ # +# basic utils +# ================================================================================ # +def create_system(numbering_type=NUMBERING_TYPES[1]): + """ + 根据数字系统类型返回创建相应的数字系统,默认为 mid + NUMBERING_TYPES = ['low', 'mid', 'high']: 中文数字系统类型 + low: '兆' = '亿' * '十' = $10^{9}$, '京' = '兆' * '十', etc. + mid: '兆' = '亿' * '万' = $10^{12}$, '京' = '兆' * '万', etc. + high: '兆' = '亿' * '亿' = $10^{16}$, '京' = '兆' * '兆', etc. + 返回对应的数字系统 + """ + + # chinese number units of '亿' and larger + all_larger_units = zip( + LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED, LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL) + larger_units = [CNU.create(i, v, numbering_type, False) + for i, v in enumerate(all_larger_units)] + # chinese number units of '十, 百, 千, 万' + all_smaller_units = zip( + SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED, SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL) + smaller_units = [CNU.create(i, v, small_unit=True) + for i, v in enumerate(all_smaller_units)] + # digis + chinese_digis = zip(CHINESE_DIGIS, CHINESE_DIGIS, + BIG_CHINESE_DIGIS_SIMPLIFIED, BIG_CHINESE_DIGIS_TRADITIONAL) + digits = [CND.create(i, v) for i, v in enumerate(chinese_digis)] + digits[0].alt_s, digits[0].alt_t = ZERO_ALT, ZERO_ALT + digits[1].alt_s, digits[1].alt_t = ONE_ALT, ONE_ALT + digits[2].alt_s, digits[2].alt_t = TWO_ALTS[0], TWO_ALTS[1] + + # symbols + positive_cn = CM(POSITIVE[0], POSITIVE[1], '+', lambda x: x) + negative_cn = CM(NEGATIVE[0], NEGATIVE[1], '-', lambda x: -x) + point_cn = CM(POINT[0], POINT[1], '.', lambda x, + y: float(str(x) + '.' + str(y))) + # sil_cn = CM(SIL[0], SIL[1], '-', lambda x, y: float(str(x) + '-' + str(y))) + system = NumberSystem() + system.units = smaller_units + larger_units + system.digits = digits + system.math = MathSymbol(positive_cn, negative_cn, point_cn) + # system.symbols = OtherSymbol(sil_cn) + return system + + +def chn2num(chinese_string, numbering_type=NUMBERING_TYPES[1]): + def get_symbol(char, system): + for u in system.units: + if char in [u.traditional, u.simplified, u.big_s, u.big_t]: + return u + for d in system.digits: + if char in [d.traditional, d.simplified, d.big_s, d.big_t, d.alt_s, d.alt_t]: + return d + for m in system.math: + if char in [m.traditional, m.simplified]: + return m + + def string2symbols(chinese_string, system): + int_string, dec_string = chinese_string, '' + for p in [system.math.point.simplified, system.math.point.traditional]: + if p in chinese_string: + int_string, dec_string = chinese_string.split(p) + break + return [get_symbol(c, system) for c in int_string], \ + [get_symbol(c, system) for c in dec_string] + + def correct_symbols(integer_symbols, system): + """ + 一百八 to 一百八十 + 一亿一千三百万 to 一亿 一千万 三百万 + """ + + if integer_symbols and isinstance(integer_symbols[0], CNU): + if integer_symbols[0].power == 1: + integer_symbols = [system.digits[1]] + integer_symbols + + if len(integer_symbols) > 1: + if isinstance(integer_symbols[-1], CND) and isinstance(integer_symbols[-2], CNU): + integer_symbols.append( + CNU(integer_symbols[-2].power - 1, None, None, None, None)) + + result = [] + unit_count = 0 + for s in integer_symbols: + if isinstance(s, CND): + result.append(s) + unit_count = 0 + elif isinstance(s, CNU): + current_unit = CNU(s.power, None, None, None, None) + unit_count += 1 + + if unit_count == 1: + result.append(current_unit) + elif unit_count > 1: + for i in range(len(result)): + if isinstance(result[-i - 1], CNU) and result[-i - 1].power < current_unit.power: + result[-i - 1] = CNU(result[-i - 1].power + + current_unit.power, None, None, None, None) + return result + + def compute_value(integer_symbols): + """ + Compute the value. + When current unit is larger than previous unit, current unit * all previous units will be used as all previous units. + e.g. '两千万' = 2000 * 10000 not 2000 + 10000 + """ + value = [0] + last_power = 0 + for s in integer_symbols: + if isinstance(s, CND): + value[-1] = s.value + elif isinstance(s, CNU): + value[-1] *= pow(10, s.power) + if s.power > last_power: + value[:-1] = list(map(lambda v: v * + pow(10, s.power), value[:-1])) + last_power = s.power + value.append(0) + return sum(value) + + system = create_system(numbering_type) + int_part, dec_part = string2symbols(chinese_string, system) + int_part = correct_symbols(int_part, system) + int_str = str(compute_value(int_part)) + dec_str = ''.join([str(d.value) for d in dec_part]) + if dec_part: + return '{0}.{1}'.format(int_str, dec_str) + else: + return int_str + + +def num2chn(number_string, numbering_type=NUMBERING_TYPES[1], big=False, + traditional=False, alt_zero=False, alt_one=False, alt_two=True, + use_zeros=True, use_units=True): + def get_value(value_string, use_zeros=True): + + striped_string = value_string.lstrip('0') + + # record nothing if all zeros + if not striped_string: + return [] + + # record one digits + elif len(striped_string) == 1: + if use_zeros and len(value_string) != len(striped_string): + return [system.digits[0], system.digits[int(striped_string)]] + else: + return [system.digits[int(striped_string)]] + + # recursively record multiple digits + else: + result_unit = next(u for u in reversed( + system.units) if u.power < len(striped_string)) + result_string = value_string[:-result_unit.power] + return get_value(result_string) + [result_unit] + get_value(striped_string[-result_unit.power:]) + + system = create_system(numbering_type) + + int_dec = number_string.split('.') + if len(int_dec) == 1: + int_string = int_dec[0] + dec_string = "" + elif len(int_dec) == 2: + int_string = int_dec[0] + dec_string = int_dec[1] + else: + raise ValueError( + "invalid input num string with more than one dot: {}".format(number_string)) + + if use_units and len(int_string) > 1: + result_symbols = get_value(int_string) + else: + result_symbols = [system.digits[int(c)] for c in int_string] + dec_symbols = [system.digits[int(c)] for c in dec_string] + if dec_string: + result_symbols += [system.math.point] + dec_symbols + + if alt_two: + liang = CND(2, system.digits[2].alt_s, system.digits[2].alt_t, + system.digits[2].big_s, system.digits[2].big_t) + for i, v in enumerate(result_symbols): + if isinstance(v, CND) and v.value == 2: + next_symbol = result_symbols[i + + 1] if i < len(result_symbols) - 1 else None + previous_symbol = result_symbols[i - 1] if i > 0 else None + if isinstance(next_symbol, CNU) and isinstance(previous_symbol, (CNU, type(None))): + if next_symbol.power != 1 and ((previous_symbol is None) or (previous_symbol.power != 1)): + result_symbols[i] = liang + + # if big is True, '两' will not be used and `alt_two` has no impact on output + if big: + attr_name = 'big_' + if traditional: + attr_name += 't' + else: + attr_name += 's' + else: + if traditional: + attr_name = 'traditional' + else: + attr_name = 'simplified' + + result = ''.join([getattr(s, attr_name) for s in result_symbols]) + + # if not use_zeros: + # result = result.strip(getattr(system.digits[0], attr_name)) + + if alt_zero: + result = result.replace( + getattr(system.digits[0], attr_name), system.digits[0].alt_s) + + if alt_one: + result = result.replace( + getattr(system.digits[1], attr_name), system.digits[1].alt_s) + + for i, p in enumerate(POINT): + if result.startswith(p): + return CHINESE_DIGIS[0] + result + + # ^10, 11, .., 19 + if len(result) >= 2 and result[1] in [SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED[0], + SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL[0]] and \ + result[0] in [CHINESE_DIGIS[1], BIG_CHINESE_DIGIS_SIMPLIFIED[1], BIG_CHINESE_DIGIS_TRADITIONAL[1]]: + result = result[1:] + + return result + + +# ================================================================================ # +# different types of rewriters +# ================================================================================ # +class Cardinal: + """ + CARDINAL类 + """ + + def __init__(self, cardinal=None, chntext=None): + self.cardinal = cardinal + self.chntext = chntext + + def chntext2cardinal(self): + return chn2num(self.chntext) + + def cardinal2chntext(self): + return num2chn(self.cardinal) + + +class Digit: + """ + DIGIT类 + """ + + def __init__(self, digit=None, chntext=None): + self.digit = digit + self.chntext = chntext + + # def chntext2digit(self): + # return chn2num(self.chntext) + + def digit2chntext(self): + return num2chn(self.digit, alt_two=False, use_units=False) + + +class TelePhone: + """ + TELEPHONE类 + """ + + def __init__(self, telephone=None, raw_chntext=None, chntext=None): + self.telephone = telephone + self.raw_chntext = raw_chntext + self.chntext = chntext + + # def chntext2telephone(self): + # sil_parts = self.raw_chntext.split('') + # self.telephone = '-'.join([ + # str(chn2num(p)) for p in sil_parts + # ]) + # return self.telephone + + def telephone2chntext(self, fixed=False): + + if fixed: + sil_parts = self.telephone.split('-') + self.raw_chntext = ''.join([ + num2chn(part, alt_two=False, use_units=False) for part in sil_parts + ]) + self.chntext = self.raw_chntext.replace('', '') + else: + sp_parts = self.telephone.strip('+').split() + self.raw_chntext = ''.join([ + num2chn(part, alt_two=False, use_units=False) for part in sp_parts + ]) + self.chntext = self.raw_chntext.replace('', '') + return self.chntext + + +class Fraction: + """ + FRACTION类 + """ + + def __init__(self, fraction=None, chntext=None): + self.fraction = fraction + self.chntext = chntext + + def chntext2fraction(self): + denominator, numerator = self.chntext.split('分之') + return chn2num(numerator) + '/' + chn2num(denominator) + + def fraction2chntext(self): + numerator, denominator = self.fraction.split('/') + return num2chn(denominator) + '分之' + num2chn(numerator) + + +class Date: + """ + DATE类 + """ + + def __init__(self, date=None, chntext=None): + self.date = date + self.chntext = chntext + + # def chntext2date(self): + # chntext = self.chntext + # try: + # year, other = chntext.strip().split('年', maxsplit=1) + # year = Digit(chntext=year).digit2chntext() + '年' + # except ValueError: + # other = chntext + # year = '' + # if other: + # try: + # month, day = other.strip().split('月', maxsplit=1) + # month = Cardinal(chntext=month).chntext2cardinal() + '月' + # except ValueError: + # day = chntext + # month = '' + # if day: + # day = Cardinal(chntext=day[:-1]).chntext2cardinal() + day[-1] + # else: + # month = '' + # day = '' + # date = year + month + day + # self.date = date + # return self.date + + def date2chntext(self): + date = self.date + try: + year, other = date.strip().split('年', 1) + year = Digit(digit=year).digit2chntext() + '年' + except ValueError: + other = date + year = '' + if other: + try: + month, day = other.strip().split('月', 1) + month = Cardinal(cardinal=month).cardinal2chntext() + '月' + except ValueError: + day = date + month = '' + if day: + day = Cardinal(cardinal=day[:-1]).cardinal2chntext() + day[-1] + else: + month = '' + day = '' + chntext = year + month + day + self.chntext = chntext + return self.chntext + + +class Money: + """ + MONEY类 + """ + + def __init__(self, money=None, chntext=None): + self.money = money + self.chntext = chntext + + # def chntext2money(self): + # return self.money + + def money2chntext(self): + money = self.money + pattern = re.compile(r'(\d+(\.\d+)?)') + matchers = pattern.findall(money) + if matchers: + for matcher in matchers: + money = money.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext()) + self.chntext = money + return self.chntext + + +class Percentage: + """ + PERCENTAGE类 + """ + + def __init__(self, percentage=None, chntext=None): + self.percentage = percentage + self.chntext = chntext + + def chntext2percentage(self): + return chn2num(self.chntext.strip().strip('百分之')) + '%' + + def percentage2chntext(self): + return '百分之' + num2chn(self.percentage.strip().strip('%')) + + +# ================================================================================ # +# NSW Normalizer +# ================================================================================ # +class NSWNormalizer: + def __init__(self, raw_text): + self.raw_text = '^' + raw_text + '$' + self.norm_text = '' + + def _particular(self): + text = self.norm_text + pattern = re.compile(r"(([a-zA-Z]+)二([a-zA-Z]+))") + matchers = pattern.findall(text) + if matchers: + # print('particular') + for matcher in matchers: + text = text.replace(matcher[0], matcher[1] + '2' + matcher[2], 1) + self.norm_text = text + return self.norm_text + + def normalize(self, remove_punc=True): + text = self.raw_text + + # 规范化日期 + pattern = re.compile(r"\D+((([089]\d|(19|20)\d{2})年)?(\d{1,2}月(\d{1,2}[日号])?)?)") + matchers = pattern.findall(text) + if matchers: + # print('date') + for matcher in matchers: + text = text.replace(matcher[0], Date(date=matcher[0]).date2chntext(), 1) + + # 规范化金钱 + pattern = re.compile(r"\D+((\d+(\.\d+)?)[多余几]?" + CURRENCY_UNITS + r"(\d" + CURRENCY_UNITS + r"?)?)") + matchers = pattern.findall(text) + if matchers: + # print('money') + for matcher in matchers: + text = text.replace(matcher[0], Money(money=matcher[0]).money2chntext(), 1) + + # 规范化固话/手机号码 + # 手机 + # http://www.jihaoba.com/news/show/13680 + # 移动:139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198 + # 联通:130、131、132、156、155、186、185、176 + # 电信:133、153、189、180、181、177 + pattern = re.compile(r"\D((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})\D") + matchers = pattern.findall(text) + if matchers: + # print('telephone') + for matcher in matchers: + text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(), 1) + # 固话 + pattern = re.compile(r"\D((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})\D") + matchers = pattern.findall(text) + if matchers: + # print('fixed telephone') + for matcher in matchers: + text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(fixed=True), 1) + + # 规范化分数 + pattern = re.compile(r"(\d+/\d+)") + matchers = pattern.findall(text) + if matchers: + # print('fraction') + for matcher in matchers: + text = text.replace(matcher, Fraction(fraction=matcher).fraction2chntext(), 1) + + # 规范化百分数 + text = text.replace('%', '%') + pattern = re.compile(r"(\d+(\.\d+)?%)") + matchers = pattern.findall(text) + if matchers: + # print('percentage') + for matcher in matchers: + text = text.replace(matcher[0], Percentage(percentage=matcher[0]).percentage2chntext(), 1) + + # 规范化纯数+量词 + pattern = re.compile(r"(\d+(\.\d+)?)[多余几]?" + COM_QUANTIFIERS) + matchers = pattern.findall(text) + if matchers: + # print('cardinal+quantifier') + for matcher in matchers: + text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1) + + # 规范化数字编号 + pattern = re.compile(r"(\d{4,32})") + matchers = pattern.findall(text) + if matchers: + # print('digit') + for matcher in matchers: + text = text.replace(matcher, Digit(digit=matcher).digit2chntext(), 1) + + # 规范化纯数 + pattern = re.compile(r"(\d+(\.\d+)?)") + matchers = pattern.findall(text) + if matchers: + # print('cardinal') + for matcher in matchers: + text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1) + + self.norm_text = text + self._particular() + + text = self.norm_text.lstrip('^').rstrip('$') + if remove_punc: + # Punctuations removal + old_chars = CHINESE_PUNC_LIST + string.punctuation # includes all CN and EN punctuations + new_chars = ' ' * len(old_chars) + del_chars = '' + text = text.translate(str.maketrans(old_chars, new_chars, del_chars)) + return text + + +def nsw_test_case(raw_text): + print('I:' + raw_text) + print('O:' + NSWNormalizer(raw_text).normalize()) + print('') + + +def nsw_test(): + nsw_test_case('固话:0595-23865596或23880880。') + nsw_test_case('固话:0595-23865596或23880880。') + nsw_test_case('手机:+86 19859213959或15659451527。') + nsw_test_case('分数:32477/76391。') + nsw_test_case('百分数:80.03%。') + nsw_test_case('编号:31520181154418。') + nsw_test_case('纯数:2983.07克或12345.60米。') + nsw_test_case('日期:1999年2月20日或09年3月15号。') + nsw_test_case('金钱:12块5,34.5元,20.1万') + nsw_test_case('特殊:O2O或B2C。') + nsw_test_case('3456万吨') + nsw_test_case('2938个') + nsw_test_case('938') + nsw_test_case('今天吃了115个小笼包231个馒头') + nsw_test_case('有62%的概率') + + +if __name__ == '__main__': + # nsw_test() + + p = argparse.ArgumentParser() + p.add_argument('ifile', help='input filename, assume utf-8 encoding') + p.add_argument('ofile', help='output filename') + p.add_argument('--to_upper', action='store_true', help='convert to upper case') + p.add_argument('--to_lower', action='store_true', help='convert to lower case') + p.add_argument('--has_key', action='store_true', help="input text has Kaldi's key as first field.") + p.add_argument('--log_interval', type=int, default=10000, help='log interval in number of processed lines') + args = p.parse_args() + + ifile = codecs.open(args.ifile, 'r', 'utf8') + ofile = codecs.open(args.ofile, 'w+', 'utf8') + + n = 0 + for l in ifile: + key = '' + text = '' + if args.has_key: + cols = l.split(maxsplit=1) + key = cols[0] + if len(cols) == 2: + text = cols[1] + else: + text = '' + else: + text = l + + # cases + if args.to_upper and args.to_lower: + sys.stderr.write('text norm: to_upper OR to_lower?') + exit(1) + if args.to_upper: + text = text.upper() + if args.to_lower: + text = text.lower() + + # NSW(Non-Standard-Word) normalization + text = NSWNormalizer(text).normalize() + + # + if args.has_key: + ofile.write(key + '\t' + text) + else: + ofile.write(text) + + n += 1 + if n % args.log_interval == 0: + sys.stderr.write("text norm: {} lines done.\n".format(n)) + + sys.stderr.write("text norm: {} lines done in total.\n".format(n)) + + ifile.close() + ofile.close() diff --git a/utils/training_utils.py b/utils/training_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..409b15388790b1aadb24632313bdd1f41b4b06ac --- /dev/null +++ b/utils/training_utils.py @@ -0,0 +1,27 @@ +from utils.hparams import hparams + + +class RSQRTSchedule(object): + def __init__(self, optimizer): + super().__init__() + self.optimizer = optimizer + self.constant_lr = hparams['lr'] + self.warmup_updates = hparams['warmup_updates'] + self.hidden_size = hparams['hidden_size'] + self.lr = hparams['lr'] + for param_group in optimizer.param_groups: + param_group['lr'] = self.lr + self.step(0) + + def step(self, num_updates): + constant_lr = self.constant_lr + warmup = min(num_updates / self.warmup_updates, 1.0) + rsqrt_decay = max(self.warmup_updates, num_updates) ** -0.5 + rsqrt_hidden = self.hidden_size ** -0.5 + self.lr = max(constant_lr * warmup * rsqrt_decay * rsqrt_hidden, 1e-7) + for param_group in self.optimizer.param_groups: + param_group['lr'] = self.lr + return self.lr + + def get_lr(self): + return self.optimizer.param_groups[0]['lr'] diff --git "a/\351\242\204\345\244\204\347\220\206.bat" "b/\351\242\204\345\244\204\347\220\206.bat" new file mode 100644 index 0000000000000000000000000000000000000000..5ba8f329852f75bab00abb07fa41d9d9662b75a1 --- /dev/null +++ "b/\351\242\204\345\244\204\347\220\206.bat" @@ -0,0 +1,4 @@ +set PYTHONPATH=. +set CUDA_VISIBLE_DEVICES=0 +python preprocessing/binarize.py --config training/config.yaml +pause \ No newline at end of file