Safetensors
jamesr66a commited on
Commit
d391c5d
1 Parent(s): 56c313c
.fluminaignore ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git/
2
+ .gitmodules
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
113
+ .pdm.toml
114
+ .pdm-python
115
+ .pdm-build/
116
+
117
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118
+ __pypackages__/
119
+
120
+ # Celery stuff
121
+ celerybeat-schedule
122
+ celerybeat.pid
123
+
124
+ # SageMath parsed files
125
+ *.sage.py
126
+
127
+ # Environments
128
+ .env
129
+ .venv
130
+ env/
131
+ venv/
132
+ ENV/
133
+ env.bak/
134
+ venv.bak/
135
+
136
+ # Spyder project settings
137
+ .spyderproject
138
+ .spyproject
139
+
140
+ # Rope project settings
141
+ .ropeproject
142
+
143
+ # mkdocs documentation
144
+ /site
145
+
146
+ # mypy
147
+ .mypy_cache/
148
+ .dmypy.json
149
+ dmypy.json
150
+
151
+ # Pyre type checker
152
+ .pyre/
153
+
154
+ # pytype static type analyzer
155
+ .pytype/
156
+
157
+ # Cython debug symbols
158
+ cython_debug/
159
+
160
+ # PyCharm
161
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
164
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
165
+ #.idea/
166
+ .DS_Store
.gitattributes ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ flux1-dev.safetensors filter=lfs diff=lfs merge=lfs -text
2
+ ae.safetensors filter=lfs diff=lfs merge=lfs -text
3
+ t5-v1_1-xxl-encoder-bf16//*.safetensor filter=lfs diff=lfs merge=lfs -text
4
+ t5-v1_1-xxl-encoder-bf16//spiece.model filter=lfs diff=lfs merge=lfs -text
5
+ t5-v1_1-xxl-encoder-bf16/model.safetensors filter=lfs diff=lfs merge=lfs -text
6
+ stitched_output.png filter=lfs diff=lfs merge=lfs -text
7
+ out_image.png filter=lfs diff=lfs merge=lfs -text
8
+ example.png filter=lfs diff=lfs merge=lfs -text
LICENSE CHANGED
@@ -1,201 +1,82 @@
1
- Apache License
2
- Version 2.0, January 2004
3
- http://www.apache.org/licenses/
4
-
5
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
-
7
- 1. Definitions.
8
-
9
- "License" shall mean the terms and conditions for use, reproduction,
10
- and distribution as defined by Sections 1 through 9 of this document.
11
-
12
- "Licensor" shall mean the copyright owner or entity authorized by
13
- the copyright owner that is granting the License.
14
-
15
- "Legal Entity" shall mean the union of the acting entity and all
16
- other entities that control, are controlled by, or are under common
17
- control with that entity. For the purposes of this definition,
18
- "control" means (i) the power, direct or indirect, to cause the
19
- direction or management of such entity, whether by contract or
20
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
- outstanding shares, or (iii) beneficial ownership of such entity.
22
-
23
- "You" (or "Your") shall mean an individual or Legal Entity
24
- exercising permissions granted by this License.
25
-
26
- "Source" form shall mean the preferred form for making modifications,
27
- including but not limited to software source code, documentation
28
- source, and configuration files.
29
-
30
- "Object" form shall mean any form resulting from mechanical
31
- transformation or translation of a Source form, including but
32
- not limited to compiled object code, generated documentation,
33
- and conversions to other media types.
34
-
35
- "Work" shall mean the work of authorship, whether in Source or
36
- Object form, made available under the License, as indicated by a
37
- copyright notice that is included in or attached to the work
38
- (an example is provided in the Appendix below).
39
-
40
- "Derivative Works" shall mean any work, whether in Source or Object
41
- form, that is based on (or derived from) the Work and for which the
42
- editorial revisions, annotations, elaborations, or other modifications
43
- represent, as a whole, an original work of authorship. For the purposes
44
- of this License, Derivative Works shall not include works that remain
45
- separable from, or merely link (or bind by name) to the interfaces of,
46
- the Work and Derivative Works thereof.
47
-
48
- "Contribution" shall mean any work of authorship, including
49
- the original version of the Work and any modifications or additions
50
- to that Work or Derivative Works thereof, that is intentionally
51
- submitted to Licensor for inclusion in the Work by the copyright owner
52
- or by an individual or Legal Entity authorized to submit on behalf of
53
- the copyright owner. For the purposes of this definition, "submitted"
54
- means any form of electronic, verbal, or written communication sent
55
- to the Licensor or its representatives, including but not limited to
56
- communication on electronic mailing lists, source code control systems,
57
- and issue tracking systems that are managed by, or on behalf of, the
58
- Licensor for the purpose of discussing and improving the Work, but
59
- excluding communication that is conspicuously marked or otherwise
60
- designated in writing by the copyright owner as "Not a Contribution."
61
-
62
- "Contributor" shall mean Licensor and any individual or Legal Entity
63
- on behalf of whom a Contribution has been received by Licensor and
64
- subsequently incorporated within the Work.
65
-
66
- 2. Grant of Copyright License. Subject to the terms and conditions of
67
- this License, each Contributor hereby grants to You a perpetual,
68
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
- copyright license to reproduce, prepare Derivative Works of,
70
- publicly display, publicly perform, sublicense, and distribute the
71
- Work and such Derivative Works in Source or Object form.
72
-
73
- 3. Grant of Patent License. Subject to the terms and conditions of
74
- this License, each Contributor hereby grants to You a perpetual,
75
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
- (except as stated in this section) patent license to make, have made,
77
- use, offer to sell, sell, import, and otherwise transfer the Work,
78
- where such license applies only to those patent claims licensable
79
- by such Contributor that are necessarily infringed by their
80
- Contribution(s) alone or by combination of their Contribution(s)
81
- with the Work to which such Contribution(s) was submitted. If You
82
- institute patent litigation against any entity (including a
83
- cross-claim or counterclaim in a lawsuit) alleging that the Work
84
- or a Contribution incorporated within the Work constitutes direct
85
- or contributory patent infringement, then any patent licenses
86
- granted to You under this License for that Work shall terminate
87
- as of the date such litigation is filed.
88
-
89
- 4. Redistribution. You may reproduce and distribute copies of the
90
- Work or Derivative Works thereof in any medium, with or without
91
- modifications, and in Source or Object form, provided that You
92
- meet the following conditions:
93
-
94
- (a) You must give any other recipients of the Work or
95
- Derivative Works a copy of this License; and
96
-
97
- (b) You must cause any modified files to carry prominent notices
98
- stating that You changed the files; and
99
-
100
- (c) You must retain, in the Source form of any Derivative Works
101
- that You distribute, all copyright, patent, trademark, and
102
- attribution notices from the Source form of the Work,
103
- excluding those notices that do not pertain to any part of
104
- the Derivative Works; and
105
-
106
- (d) If the Work includes a "NOTICE" text file as part of its
107
- distribution, then any Derivative Works that You distribute must
108
- include a readable copy of the attribution notices contained
109
- within such NOTICE file, excluding those notices that do not
110
- pertain to any part of the Derivative Works, in at least one
111
- of the following places: within a NOTICE text file distributed
112
- as part of the Derivative Works; within the Source form or
113
- documentation, if provided along with the Derivative Works; or,
114
- within a display generated by the Derivative Works, if and
115
- wherever such third-party notices normally appear. The contents
116
- of the NOTICE file are for informational purposes only and
117
- do not modify the License. You may add Your own attribution
118
- notices within Derivative Works that You distribute, alongside
119
- or as an addendum to the NOTICE text from the Work, provided
120
- that such additional attribution notices cannot be construed
121
- as modifying the License.
122
-
123
- You may add Your own copyright statement to Your modifications and
124
- may provide additional or different license terms and conditions
125
- for use, reproduction, or distribution of Your modifications, or
126
- for any such Derivative Works as a whole, provided Your use,
127
- reproduction, and distribution of the Work otherwise complies with
128
- the conditions stated in this License.
129
-
130
- 5. Submission of Contributions. Unless You explicitly state otherwise,
131
- any Contribution intentionally submitted for inclusion in the Work
132
- by You to the Licensor shall be under the terms and conditions of
133
- this License, without any additional terms or conditions.
134
- Notwithstanding the above, nothing herein shall supersede or modify
135
- the terms of any separate license agreement you may have executed
136
- with Licensor regarding such Contributions.
137
-
138
- 6. Trademarks. This License does not grant permission to use the trade
139
- names, trademarks, service marks, or product names of the Licensor,
140
- except as required for reasonable and customary use in describing the
141
- origin of the Work and reproducing the content of the NOTICE file.
142
-
143
- 7. Disclaimer of Warranty. Unless required by applicable law or
144
- agreed to in writing, Licensor provides the Work (and each
145
- Contributor provides its Contributions) on an "AS IS" BASIS,
146
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
- implied, including, without limitation, any warranties or conditions
148
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
- PARTICULAR PURPOSE. You are solely responsible for determining the
150
- appropriateness of using or redistributing the Work and assume any
151
- risks associated with Your exercise of permissions under this License.
152
-
153
- 8. Limitation of Liability. In no event and under no legal theory,
154
- whether in tort (including negligence), contract, or otherwise,
155
- unless required by applicable law (such as deliberate and grossly
156
- negligent acts) or agreed to in writing, shall any Contributor be
157
- liable to You for damages, including any direct, indirect, special,
158
- incidental, or consequential damages of any character arising as a
159
- result of this License or out of the use or inability to use the
160
- Work (including but not limited to damages for loss of goodwill,
161
- work stoppage, computer failure or malfunction, or any and all
162
- other commercial damages or losses), even if such Contributor
163
- has been advised of the possibility of such damages.
164
-
165
- 9. Accepting Warranty or Additional Liability. While redistributing
166
- the Work or Derivative Works thereof, You may choose to offer,
167
- and charge a fee for, acceptance of support, warranty, indemnity,
168
- or other liability obligations and/or rights consistent with this
169
- License. However, in accepting such obligations, You may act only
170
- on Your own behalf and on Your sole responsibility, not on behalf
171
- of any other Contributor, and only if You agree to indemnify,
172
- defend, and hold each Contributor harmless for any liability
173
- incurred by, or claims asserted against, such Contributor by reason
174
- of your accepting any such warranty or additional liability.
175
-
176
- END OF TERMS AND CONDITIONS
177
-
178
- APPENDIX: How to apply the Apache License to your work.
179
-
180
- To apply the Apache License to your work, attach the following
181
- boilerplate notice, with the fields enclosed by brackets "[]"
182
- replaced with your own identifying information. (Don't include
183
- the brackets!) The text should be enclosed in the appropriate
184
- comment syntax for the file format. We also recommend that a
185
- file or class name and description of purpose be included on the
186
- same "printed page" as the copyright notice for easier
187
- identification within third-party archives.
188
-
189
- Copyright 2024 Alex Redden
190
-
191
- Licensed under the Apache License, Version 2.0 (the "License");
192
- you may not use this file except in compliance with the License.
193
- You may obtain a copy of the License at
194
-
195
- http://www.apache.org/licenses/LICENSE-2.0
196
-
197
- Unless required by applicable law or agreed to in writing, software
198
- distributed under the License is distributed on an "AS IS" BASIS,
199
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
- See the License for the specific language governing permissions and
201
- limitations under the License.
 
1
+ **FLUX.1 \[dev\] Non-Commercial License**
2
+
3
+ Black Forest Labs, Inc. (“**we**” or “**our**” or “**Company**”) is pleased to make available the weights, parameters and inference code for the FLUX.1 \[dev\] Model (as defined below) freely available for your non-commercial and non-production use as set forth in this FLUX.1 \[dev\] Non-Commercial License (“**License**”). The “**FLUX.1 \[dev\] Model**” means the FLUX.1 \[dev\] text-to-image AI model and its elements which includes algorithms, software, checkpoints, parameters, source code (inference code, evaluation code, and if applicable, fine-tuning code) and any other materials associated with the FLUX.1 \[dev\] AI model made available by Company under this License, including if any, the technical documentation, manuals and instructions for the use and operation thereof (collectively, “**FLUX.1 \[dev\] Model**”).
4
+
5
+ By downloading, accessing, use, Distributing (as defined below), or creating a Derivative (as defined below) of the FLUX.1 \[dev\] Model, you agree to the terms of this License. If you do not agree to this License, then you do not have any rights to access, use, Distribute or create a Derivative of the FLUX.1 \[dev\] Model and you must immediately cease using the FLUX.1 \[dev\] Model. If you are agreeing to be bound by the terms of this License on behalf of your employer or other entity, you represent and warrant to us that you have full legal authority to bind your employer or such entity to this License. If you do not have the requisite authority, you may not accept the License or access the FLUX.1 \[dev\] Model on behalf of your employer or other entity.
6
+
7
+ 1. **Definitions**. Capitalized terms used in this License but not defined herein have the following meanings:
8
+
9
+ 1. “**Derivative**” means any (i) modified version of the FLUX.1 \[dev\] Model (including but not limited to any customized or fine-tuned version thereof), (ii) work based on the FLUX.1 \[dev\] Model, or (iii) any other derivative work thereof. For the avoidance of doubt, Outputs are not considered Derivatives under this License.
10
+
11
+ 1. “**Distribution**” or “**Distribute**” or “**Distributing**” means providing or making available, by any means, a copy of the FLUX.1 \[dev\] Models and/or the Derivatives as the case may be.
12
+
13
+ 1. “**Non-Commercial Purpose**” means any of the following uses, but only so far as you do not receive any direct or indirect payment arising from the use of the model or its output: (i) personal use for research, experiment, and testing for the benefit of public knowledge, personal study, private entertainment, hobby projects, or otherwise not directly or indirectly connected to any commercial activities, business operations, or employment responsibilities; (ii) use by commercial or for-profit entities for testing, evaluation, or non-commercial research and development in a non-production environment, (iii) use by any charitable organization for charitable purposes, or for testing or evaluation. For clarity, use for revenue-generating activity or direct interactions with or impacts on end users, or use to train, fine tune or distill other models for commercial use is not a Non-Commercial purpose.
14
+
15
+ 1. “**Outputs**” means any content generated by the operation of the FLUX.1 \[dev\] Models or the Derivatives from a prompt (i.e., text instructions) provided by users. For the avoidance of doubt, Outputs do not include any components of a FLUX.1 \[dev\] Models, such as any fine-tuned versions of the FLUX.1 \[dev\] Models, the weights, or parameters.
16
+
17
+ 1. “**you**” or “**your**” means the individual or entity entering into this License with Company.
18
+
19
+ 1. **License Grant**.
20
+
21
+ 1. License. Subject to your compliance with this License, Company grants you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty free and limited license to access, use, create Derivatives of, and Distribute the FLUX.1 \[dev\] Models solely for your Non-Commercial Purposes. The foregoing license is personal to you, and you may not assign or sublicense this License or any other rights or obligations under this License without Company’s prior written consent; any such assignment or sublicense will be void and will automatically and immediately terminate this License. Any restrictions set forth herein in regarding the FLUX.1 \[dev\] Model also applies to any Derivative you create or that are created on your behalf.
22
+
23
+ 1. Non-Commercial Use Only. You may only access, use, Distribute, or creative Derivatives of or the FLUX.1 \[dev\] Model or Derivatives for Non-Commercial Purposes. If You want to use a FLUX.1 \[dev\] Model a Derivative for any purpose that is not expressly authorized under this License, such as for a commercial activity, you must request a license from Company, which Company may grant to you in Company’s sole discretion and which additional use may be subject to a fee, royalty or other revenue share. Please contact Company at the following e-mail address if you want to discuss such a license: info@blackforestlabs.ai.
24
+
25
+ 1. Reserved Rights. The grant of rights expressly set forth in this License are the complete grant of rights to you in the FLUX.1 \[dev\] Model, and no other licenses are granted, whether by waiver, estoppel, implication, equity or otherwise. Company and its licensors reserve all rights not expressly granted by this License.
26
+
27
+ 1. Outputs. We claim no ownership rights in and to the Outputs. You are solely responsible for the Outputs you generate and their subsequent uses in accordance with this License. You may use Output for any purpose (including for commercial purposes), except as expressly prohibited herein. You may not use the Output to train, fine-tune or distill a model that is competitive with the FLUX.1 \[dev\] Model.
28
+
29
+ 1. **Distribution**. Subject to this License, you may Distribute copies of the FLUX.1 \[dev\] Model and/or Derivatives made by you, under the following conditions:
30
+
31
+ 1. you must make available a copy of this License to third-party recipients of the FLUX.1 \[dev\] Models and/or Derivatives you Distribute, and specify that any rights to use the FLUX.1 \[dev\] Models and/or Derivatives shall be directly granted by Company to said third-party recipients pursuant to this License;
32
+
33
+ 1. you must make prominently display the following notice alongside the Distribution of the FLUX.1 \[dev\] Model or Derivative (such as via a “Notice” text file distributed as part of such FLUX.1 \[dev\] Model or Derivative) (the “**Attribution Notice**”):
34
+
35
+ “The FLUX.1 \[dev\] Model is licensed by Black Forest Labs. Inc. under the FLUX.1 \[dev\] Non-Commercial License. Copyright Black Forest Labs. Inc.
36
+
37
+ IN NO EVENT SHALL BLACK FOREST LABS, INC. BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH USE OF THIS MODEL.”
38
+
39
+ 1. in the case of Distribution of Derivatives made by you, you must also include in the Attribution Notice a statement that you have modified the applicable FLUX.1 \[dev\] Model; and
40
+
41
+ 1. in the case of Distribution of Derivatives made by you, any terms and conditions you impose on any third-party recipients relating to Derivatives made by or for you shall neither limit such third-party recipients’ use of the FLUX.1 \[dev\] Model or any Derivatives made by or for Company in accordance with this License nor conflict with any of its terms and conditions.
42
+
43
+ 1. In the case of Distribution of Derivatives made by you, you must not misrepresent or imply, through any means, that the Derivatives made by or for you and/or any modified version of the FLUX.1 \[dev\] Model you Distribute under your name and responsibility is an official product of the Company or has been endorsed, approved or validated by the Company, unless you are authorized by Company to do so in writing.
44
+
45
+ 1. **Restrictions**. You will not, and will not permit, assist or cause any third party to
46
+
47
+ 1. use, modify, copy, reproduce, create Derivatives of, or Distribute the FLUX.1 \[dev\] Model (or any Derivative thereof, or any data produced by the FLUX.1 \[dev\] Model), in whole or in part, for (i) any commercial or production purposes, (ii) military purposes, (iii) purposes of surveillance, including any research or development relating to surveillance, (iv) biometric processing, (v) in any manner that infringes, misappropriates, or otherwise violates any third-party rights, or (vi) in any manner that violates any applicable law and violating any privacy or security laws, rules, regulations, directives, or governmental requirements (including the General Data Privacy Regulation (Regulation (EU) 2016/679), the California Consumer Privacy Act, and any and all laws governing the processing of biometric information), as well as all amendments and successor laws to any of the foregoing;
48
+
49
+ 1. alter or remove copyright and other proprietary notices which appear on or in any portion of the FLUX.1 \[dev\] Model;
50
+
51
+ 1. utilize any equipment, device, software, or other means to circumvent or remove any security or protection used by Company in connection with the FLUX.1 \[dev\] Model, or to circumvent or remove any usage restrictions, or to enable functionality disabled by FLUX.1 \[dev\] Model; or
52
+
53
+ 1. offer or impose any terms on the FLUX.1 \[dev\] Model that alter, restrict, or are inconsistent with the terms of this License.
54
+
55
+ 1. violate any applicable U.S. and non-U.S. export control and trade sanctions laws (“**Export Laws**”) in connection with your use or Distribution of any FLUX.1 \[dev\] Model;
56
+
57
+ 1. directly or indirectly Distribute, export, or otherwise transfer FLUX.1 \[dev\] Model (a) to any individual, entity, or country prohibited by Export Laws; (b) to anyone on U.S. or non-U.S. government restricted parties lists; or (c) for any purpose prohibited by Export Laws, including nuclear, chemical or biological weapons, or missile technology applications; 3\) use or download FLUX.1 \[dev\] Model if you or they are (a) located in a comprehensively sanctioned jurisdiction, (b) currently listed on any U.S. or non-U.S. restricted parties list, or (c) for any purpose prohibited by Export Laws; and (4) will not disguise your location through IP proxying or other methods.
58
+
59
+ 1. **DISCLAIMERS**. THE FLUX.1 \[dev\] MODEL IS PROVIDED “AS IS” AND “WITH ALL FAULTS” WITH NO WARRANTY OF ANY KIND, EXPRESS OR IMPLIED. COMPANY EXPRESSLY DISCLAIMS ALL REPRESENTATIONS AND WARRANTIES, EXPRESS OR IMPLIED, WHETHER BY STATUTE, CUSTOM, USAGE OR OTHERWISE AS TO ANY MATTERS RELATED TO THE FLUX.1 \[dev\] MODEL, INCLUDING BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE, SATISFACTORY QUALITY, OR NON-INFRINGEMENT. COMPANY MAKES NO WARRANTIES OR REPRESENTATIONS THAT THE FLUX.1 \[dev\] MODEL WILL BE ERROR FREE OR FREE OF VIRUSES OR OTHER HARMFUL COMPONENTS, OR PRODUCE ANY PARTICULAR RESULTS.
60
+
61
+ 1. **LIMITATION OF LIABILITY**. TO THE FULLEST EXTENT PERMITTED BY LAW, IN NO EVENT WILL COMPANY BE LIABLE TO YOU OR YOUR EMPLOYEES, AFFILIATES, USERS, OFFICERS OR DIRECTORS (A) UNDER ANY THEORY OF LIABILITY, WHETHER BASED IN CONTRACT, TORT, NEGLIGENCE, STRICT LIABILITY, WARRANTY, OR OTHERWISE UNDER THIS LICENSE, OR (B) FOR ANY INDIRECT, CONSEQUENTIAL, EXEMPLARY, INCIDENTAL, PUNITIVE OR SPECIAL DAMAGES OR LOST PROFITS, EVEN IF COMPANY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. THE FLUX.1 \[dev\] MODEL, ITS CONSTITUENT COMPONENTS, AND ANY OUTPUT (COLLECTIVELY, “MODEL MATERIALS”) ARE NOT DESIGNED OR INTENDED FOR USE IN ANY APPLICATION OR SITUATION WHERE FAILURE OR FAULT OF THE MODEL MATERIALS COULD REASONABLY BE ANTICIPATED TO LEAD TO SERIOUS INJURY OF ANY PERSON, INCLUDING POTENTIAL DISCRIMINATION OR VIOLATION OF AN INDIVIDUAL’S PRIVACY RIGHTS, OR TO SEVERE PHYSICAL, PROPERTY, OR ENVIRONMENTAL DAMAGE (EACH, A “**HIGH-RISK USE**”). IF YOU ELECT TO USE ANY OF THE MODEL MATERIALS FOR A HIGH-RISK USE, YOU DO SO AT YOUR OWN RISK. YOU AGREE TO DESIGN AND IMPLEMENT APPROPRIATE DECISION-MAKING AND RISK-MITIGATION PROCEDURES AND POLICIES IN CONNECTION WITH A HIGH-RISK USE SUCH THAT EVEN IF THERE IS A FAILURE OR FAULT IN ANY OF THE MODEL MATERIALS, THE SAFETY OF PERSONS OR PROPERTY AFFECTED BY THE ACTIVITY STAYS AT A LEVEL THAT IS REASONABLE, APPROPRIATE, AND LAWFUL FOR THE FIELD OF THE HIGH-RISK USE.
62
+
63
+ 1. **INDEMNIFICATION**
64
+
65
+ You will indemnify, defend and hold harmless Company and our subsidiaries and affiliates, and each of our respective shareholders, directors, officers, employees, agents, successors, and assigns (collectively, the “**Company Parties**”) from and against any losses, liabilities, damages, fines, penalties, and expenses (including reasonable attorneys’ fees) incurred by any Company Party in connection with any claim, demand, allegation, lawsuit, proceeding, or investigation (collectively, “**Claims**”) arising out of or related to (a) your access to or use of the FLUX.1 \[dev\] Model (as well as any Output, results or data generated from such access or use), including any High-Risk Use (defined below); (b) your violation of this License; or (c) your violation, misappropriation or infringement of any rights of another (including intellectual property or other proprietary rights and privacy rights). You will promptly notify the Company Parties of any such Claims, and cooperate with Company Parties in defending such Claims. You will also grant the Company Parties sole control of the defense or settlement, at Company’s sole option, of any Claims. This indemnity is in addition to, and not in lieu of, any other indemnities or remedies set forth in a written agreement between you and Company or the other Company Parties.
66
+
67
+ 1. **Termination; Survival**.
68
+
69
+ 1. This License will automatically terminate upon any breach by you of the terms of this License.
70
+
71
+ 1. We may terminate this License, in whole or in part, at any time upon notice (including electronic) to you.
72
+
73
+ 1. If You initiate any legal action or proceedings against Company or any other entity (including a cross-claim or counterclaim in a lawsuit), alleging that the FLUX.1 \[dev\] Model or any Derivative, or any part thereof, infringe upon intellectual property or other rights owned or licensable by you, then any licenses granted to you under this License will immediately terminate as of the date such legal action or claim is filed or initiated.
74
+
75
+ 1. Upon termination of this License, you must cease all use, access or Distribution of the FLUX.1 \[dev\] Model and any Derivatives. The following sections survive termination of this License 2(c), 2(d), 4-11.
76
+
77
+ 1. **Third Party Materials**. The FLUX.1 \[dev\] Model may contain third-party software or other components (including free and open source software) (all of the foregoing, “**Third Party Materials**”), which are subject to the license terms of the respective third-party licensors. Your dealings or correspondence with third parties and your use of or interaction with any Third Party Materials are solely between you and the third party. Company does not control or endorse, and makes no representations or warranties regarding, any Third Party Materials, and your access to and use of such Third Party Materials are at your own risk.
78
+
79
+ 1. **Trademarks**. You have not been granted any trademark license as part of this License and may not use any name or mark associated with Company without the prior written permission of Company, except to the extent necessary to make the reference required in the Attribution Notice as specified above or as is reasonably necessary in describing the FLUX.1 \[dev\] Model and its creators.
80
+
81
+ 1. **General**. This License will be governed and construed under the laws of the State of Delaware without regard to conflicts of law provisions. If any provision or part of a provision of this License is unlawful, void or unenforceable, that provision or part of the provision is deemed severed from this License, and will not affect the validity and enforceability of any remaining provisions. The failure of Company to exercise or enforce any right or provision of this License will not operate as a waiver of such right or provision. This License does not confer any third-party beneficiary rights upon any other person or entity. This License, together with the Documentation, contains the entire understanding between you and Company regarding the subject matter of this License, and supersedes all other written or oral agreements and understandings between you and Company regarding such subject matter. No change or addition to any provision of this License will be binding unless it is in writing and signed by an authorized representative of both you and Company.
82
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,420 +1,107 @@
1
- # Flux FP8 (true) Matmul Implementation with FastAPI
 
 
 
 
2
 
3
- This repository contains an implementation of the Flux model, along with an API that allows you to generate images based on text prompts. And also a simple single line of code to use the generator as a single object, similar to diffusers pipelines.
4
 
5
- ## Speed Comparison
6
 
7
- Note:
8
 
9
- - The "bfl codebase" refers to the original [BFL codebase](https://github.com/black-forest-labs/flux), not this repo.
10
- - The "fp8 wo quant" refers to the original BFL codebase using fp8 weight only quantization, not using fp8 matmul which is default in this repo.
11
- - The "compile blocks & extras" refers to the option within this repo setting the config values `"compile_blocks" true` & `"compile_extras": true`. ❌ means both were set to false, ✅ means both were set to true.
12
- - All generations which including a ❌ or ✅ are using this repo.
13
 
14
- | Resolution | Device | Test | Average it/s |
15
- | ---------- | ---------- | -------------------------- | ------------ |
16
- | 1024x1024 | RTX4090 | bfl codebase fp8 wo quant | 1.7 |
17
- | 1024x1024 | RTX4090 | ❌ compile blocks & extras | 2.55 |
18
- | 1024x1024 | RTX4090 | ✅ compile blocks & extras | 3.51 |
19
- | 1024x1024 | RTX4000ADA | ❌ compile blocks & extras | 0.79 |
20
- | 1024x1024 | RTX4000ADA | ✅ compile blocks & extras | 1.26 |
21
- | 1024x1024 | RTX6000ADA | bfl codebase | 1.74 |
22
- | 1024x1024 | RTX6000ADA | ❌ compile blocks & extras | 2.08 |
23
- | 1024x1024 | RTX6000ADA | ✅ compile blocks & extras | 2.8 |
24
- | 1024x1024 | H100 | ❌ compile blocks & extras | 6.1 |
25
- | 1024x1024 | H100 | ✅ compile blocks & extras | 11.5 |
26
- | 768x768 | RTX4090 | bfl codebase fp8 wo quant | 2.32 |
27
- | 768x768 | RTX4090 | ❌ compile blocks & extras | 4.47 |
28
- | 768x768 | RTX4090 | ✅ compile blocks & extras | 6.2 |
29
- | 768x768 | RTX4000 | ❌ compile blocks & extras | 1.41 |
30
- | 768x768 | RTX4000 | ✅ compile blocks & extras | 2.19 |
31
- | 768x768 | RTX6000ADA | bfl codebase | 3.01 |
32
- | 768x768 | RTX6000ADA | ❌ compile blocks & extras | 3.43 |
33
- | 768x768 | RTX6000ADA | ✅ compile blocks & extras | 4.46 |
34
- | 768x768 | H100 | ❌ compile blocks & extras | 10.3 |
35
- | 768x768 | H100 | ✅ compile blocks & extras | 20.8 |
36
- | 1024x720 | RTX4090 | bfl codebase fp8 wo quant | 3.01 |
37
- | 1024x720 | RTX4090 | ❌ compile blocks & extras | 3.6 |
38
- | 1024x720 | RTX4090 | ✅ compile blocks & extras | 4.96 |
39
- | 1024x720 | RTX4000 | ❌ compile blocks & extras | 1.14 |
40
- | 1024x720 | RTX4000 | ✅ compile blocks & extras | 1.78 |
41
- | 1024x720 | RTX6000ADA | bfl codebase | 2.37 |
42
- | 1024x720 | RTX6000ADA | ❌ compile blocks & extras | 2.87 |
43
- | 1024x720 | RTX6000ADA | ✅ compile blocks & extras | 3.78 |
44
- | 1024x720 | H100 | ❌ compile blocks & extras | 8.2 |
45
- | 1024x720 | H100 | ✅ compile blocks & extras | 15.7 |
46
 
47
- ## Table of Contents
48
-
49
- - [Installation](#installation)
50
- - [Usage](#usage)
51
- - [Configuration](#configuration)
52
- - [API Endpoints](#api-endpoints)
53
- - [Examples](#examples)
54
- - [License](https://github.com/aredden/flux-fp8-api/blob/main/LICENSE)
55
-
56
- ### Updates 08/24/24
57
-
58
- - Add config options for levels of quantization for the flow transformer:
59
- - `quantize_modulation`: Quantize the modulation layers in the flow model. If false, adds ~2GB vram usage for moderate precision improvements `(default: true)`
60
- - `quantize_flow_embedder_layers`: Quantize the flow embedder layers in the flow model. If false, adds ~512MB vram usage, but precision improves considerably. `(default: false)`
61
- - Override default config values when loading FluxPipeline, e.g. `FluxPipeline.load_pipeline_from_config_path(config_path, **config_overrides)`
62
-
63
- #### Fixes
64
-
65
- - Fix bug where loading text encoder from HF with bnb will error if device is not set to cuda:0
66
-
67
- **note:** prequantized flow models will only work with the specified quantization levels as when they were created. e.g. if you create a prequantized flow model with `quantize_modulation` set to false, it will only work with `quantize_modulation` set to false, same with `quantize_flow_embedder_layers`.
68
-
69
- ### Updates 08/25/24
70
-
71
- - Added LoRA loading functionality to FluxPipeline. Simple example:
72
-
73
- ```python
74
- from flux_pipeline import FluxPipeline
75
-
76
- config_path = "path/to/config/file.json"
77
- config_overrides = {
78
- #...
79
- }
80
-
81
- lora_path = "path/to/lora/file.safetensors"
82
-
83
- pipeline = FluxPipeline.load_pipeline_from_config_path(config_path, **config_overrides)
84
-
85
- pipeline.load_lora(lora_path, scale=1.0)
86
- ```
87
-
88
- ### Updates 09/07/24
89
-
90
- - Improve quality by ensuring that the RMSNorm layers use fp32
91
- - Raise the clamp range for single blocks & double blocks to +/-32000 to reduce deviation from expected outputs.
92
- - Make BF16 _not_ clamp, which improves quality and isn't needed because bf16 is the expected dtype for flux. **I would now recommend always using `"flow_dtype": "bfloat16"` in the config**, though it will slow things down on consumer gpus- but not by much at all since most of the compute still happens via fp8.
93
- - Allow for the T5 Model to be run without any quantization, by specifying `"text_enc_quantization_dtype": "bfloat16"` in the config - or also `"float16"`, though not recommended since t5 deviates a bit when running with float16. I noticed that even with qint8/qfloat8 there is a bit of deviation from bf16 text encoder outputs- so for those who want more accurate / expected text encoder outputs, you can use this option.
94
-
95
- ### Updates 10/3/24
96
-
97
- - #### Adding configurable clip model path
98
- Now you can specify the clip model's path in the config, using the `clip_path` parameter in a config file.
99
- - #### Improved lora loading
100
- I believe I have fixed the lora loading bug that was causing the lora to not apply properly, or when not all of the linear weights in the q/k/v/o had loras attached (it wouldn't be able to apply if only some of them did).
101
- - #### Lora loading via api endpoint
102
-
103
- You can now post to the `/lora` endpoint with a json file containing a `scale`, `path`, `name`, and `action` parameters.
104
-
105
- The `path` should be the path to the lora safetensors file either absolute or relative to the root of this repo.
106
-
107
- The `name` is an optional parameter, mainly just for checking purposes to see if the correct lora was being loaded, it's used as an identifier to check whether it's already been loaded or which lora to unload if `action` is `unload` (you can also use the exact same path which was loaded previously to unload the same lora).
108
-
109
- The `action` should be either `load` or `unload`, to load or unload the lora.
110
-
111
- The `scale` should be a float, which is the scale of the lora.
112
-
113
- e.g.
114
-
115
- ```json
116
- {
117
- <!-- If you have a lora directory like 'fluxloras' in the root of this repo -->
118
- "path": "./fluxloras/loras/aidmaImageUpgrader-FLUX-V0.2.safetensors",
119
- <!-- name is optional -->
120
- "name": "imgupgrade",
121
- <!-- action (load or unload) is required -->
122
- "action": "load",
123
- <!-- lora scale to use -->
124
- "scale": 0.6
125
- }
126
- ```
127
-
128
- ## Installation
129
-
130
- This repo _requires_ at least pytorch with cuda=12.4 and an ADA gpu with fp8 support, otherwise `torch._scaled_mm` will throw a CUDA error saying it's not supported. To install with conda/mamba:
131
 
132
  ```bash
133
- mamba create -n flux-fp8-matmul-api python=3.11 pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch -c nvidia
134
- mamba activate flux-fp8-matmul-api
135
-
136
- # or with conda
137
- conda create -n flux-fp8-matmul-api python=3.11 pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch -c nvidia
138
- conda activate flux-fp8-matmul-api
139
-
140
- # or with nightly... (which is what I am using) - also, just switch 'mamba' to 'conda' if you are using conda
141
- mamba create -n flux-fp8-matmul-api python=3.11 pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch-nightly -c nvidia
142
- mamba activate flux-fp8-matmul-api
143
-
144
- # or with pip
145
- python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
146
- # or pip nightly
147
- python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
148
  ```
149
 
150
- To install the required dependencies, run:
151
 
152
  ```bash
153
- python -m pip install -r requirements.txt
 
 
 
 
 
 
 
 
 
 
 
154
  ```
155
 
156
- If you get errors installing `torch-cublas-hgemm`, feel free to comment it out in requirements.txt, since it's not necessary, but will speed up inference for non-fp8 linear layers.
157
-
158
- ## Usage
159
-
160
- For a single ADA GPU with less than 24GB vram, and more than 16GB vram, you should use the `configs/config-dev-offload-1-4080.json` config file as a base, and then tweak the parameters to fit your needs. It offloads all models to CPU when not in use, compiles the flow model with extra optimizations, and quantizes the text encoder to nf4 and the autoencoder to qfloat8.
161
-
162
- For a single ADA GPU with more than ~32GB vram, you should use the `configs/config-dev-1-RTX6000ADA.json` config file as a base, and then tweak the parameters to fit your needs. It does not offload any models to CPU, compiles the flow model with extra optimizations, and quantizes the text encoder to qfloat8 and the autoencoder to stays as bfloat16.
163
 
164
- For a single 4090 GPU, you should use the `configs/config-dev-offload-1-4090.json` config file as a base, and then tweak the parameters to fit your needs. It offloads the text encoder and the autoencoder to CPU, compiles the flow model with extra optimizations, and quantizes the text encoder to nf4 and the autoencoder to float8.
165
 
166
- **NOTE:** For all of these configs, you must change the `ckpt_path`, `ae_path`, and `text_enc_path` parameters to the path to your own checkpoint, autoencoder, and text encoder.
167
 
168
- You can run the API server using the following command:
169
 
 
170
  ```bash
171
- python main.py --config-path <path_to_config> --port <port_number> --host <host_address>
172
  ```
173
 
174
- ### API Command-Line Arguments
175
-
176
- - `--config-path`: Path to the configuration file. If not provided, the model will be loaded from the command line arguments.
177
- - `--port`: Port to run the server on (default: 8088).
178
- - `--host`: Host to run the server on (default: 0.0.0.0).
179
- - `--flow-model-path`: Path to the flow model.
180
- - `--text-enc-path`: Path to the text encoder.
181
- - `--autoencoder-path`: Path to the autoencoder.
182
- - `--model-version`: Choose model version (`flux-dev` or `flux-schnell`).
183
- - `--flux-device`: Device to run the flow model on (default: cuda:0).
184
- - `--text-enc-device`: Device to run the text encoder on (default: cuda:0).
185
- - `--autoencoder-device`: Device to run the autoencoder on (default: cuda:0).
186
- - `--compile`: Compile the flow model with extra optimizations (default: False).
187
- - `--quant-text-enc`: Quantize the T5 text encoder to the given dtype (`qint4`, `qfloat8`, `qint2`, `qint8`, `bf16`), if `bf16`, will not quantize (default: `qfloat8`).
188
- - `--quant-ae`: Quantize the autoencoder with float8 linear layers, otherwise will use bfloat16 (default: False).
189
- - `--offload-flow`: Offload the flow model to the CPU when not being used to save memory (default: False).
190
- - `--no-offload-ae`: Disable offloading the autoencoder to the CPU when not being used to increase e2e inference speed (default: True [implies it will offload, setting this flag sets it to False]).
191
- - `--no-offload-text-enc`: Disable offloading the text encoder to the CPU when not being used to increase e2e inference speed (default: True [implies it will offload, setting this flag sets it to False]).
192
- - `--prequantized-flow`: Load the flow model from a prequantized checkpoint, which reduces the size of the checkpoint by about 50% & reduces startup time (default: False).
193
- - `--no-quantize-flow-modulation`: Disable quantization of the modulation layers in the flow transformer, which improves precision _moderately_ but adds ~2GB vram usage.
194
- - `--quantize-flow-embedder-layers`: Quantize the flow embedder layers in the flow transformer, reduces precision _considerably_ but saves ~512MB vram usage.
195
-
196
- ## Configuration
197
-
198
- The configuration files are located in the `configs` directory. You can specify different configurations for different model versions and devices.
199
-
200
- Example configuration file for a single 4090 (`configs/config-dev-offload-1-4090.json`):
201
-
202
- ```js
203
- {
204
- "version": "flux-dev", // or flux-schnell
205
- "params": {
206
- "in_channels": 64,
207
- "vec_in_dim": 768,
208
- "context_in_dim": 4096,
209
- "hidden_size": 3072,
210
- "mlp_ratio": 4.0,
211
- "num_heads": 24,
212
- "depth": 19,
213
- "depth_single_blocks": 38,
214
- "axes_dim": [16, 56, 56],
215
- "theta": 10000,
216
- "qkv_bias": true,
217
- "guidance_embed": true // if you are using flux-schnell, set this to false
218
- },
219
- "ae_params": {
220
- "resolution": 256,
221
- "in_channels": 3,
222
- "ch": 128,
223
- "out_ch": 3,
224
- "ch_mult": [1, 2, 4, 4],
225
- "num_res_blocks": 2,
226
- "z_channels": 16,
227
- "scale_factor": 0.3611,
228
- "shift_factor": 0.1159
229
- },
230
- "ckpt_path": "/your/path/to/flux1-dev.sft", // local path to original bf16 BFL flux checkpoint
231
- "ae_path": "/your/path/to/ae.sft", // local path to original bf16 BFL autoencoder checkpoint
232
- "repo_id": "black-forest-labs/FLUX.1-dev", // can ignore
233
- "repo_flow": "flux1-dev.sft", // can ignore
234
- "repo_ae": "ae.sft", // can ignore
235
- "text_enc_max_length": 512, // use 256 if you are using flux-schnell
236
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16", // or custom HF full bf16 T5EncoderModel repo id
237
- "text_enc_device": "cuda:0",
238
- "ae_device": "cuda:0",
239
- "flux_device": "cuda:0",
240
- "flow_dtype": "float16",
241
- "ae_dtype": "bfloat16",
242
- "text_enc_dtype": "bfloat16",
243
- "flow_quantization_dtype": "qfloat8", // will always be qfloat8, so can ignore
244
- "text_enc_quantization_dtype": "qint4", // choose between qint4, qint8, qfloat8, qint2 or delete entry for no quantization
245
- "ae_quantization_dtype": "qfloat8", // can either be qfloat8 or delete entry for no quantization
246
- "compile_extras": true, // compile the layers not included in the single-blocks or double-blocks
247
- "compile_blocks": true, // compile the single-blocks and double-blocks
248
- "offload_text_encoder": true, // offload the text encoder to cpu when not in use
249
- "offload_vae": true, // offload the autoencoder to cpu when not in use
250
- "offload_flow": false, // offload the flow transformer to cpu when not in use
251
- "prequantized_flow": false, // load the flow transformer from a prequantized checkpoint, which reduces the size of the checkpoint by about 50% & reduces startup time (default: false)
252
- "quantize_modulation": true, // quantize the modulation layers in the flow transformer, which reduces precision moderately but saves ~2GB vram usage (default: true)
253
- "quantize_flow_embedder_layers": false, // quantize the flow embedder layers in the flow transformer, if false, improves precision considerably at the cost of adding ~512MB vram usage (default: false)
254
- }
255
- ```
256
-
257
- The only things you should need to change in general are the:
258
-
259
- ```json5
260
- "ckpt_path": "/path/to/your/flux1-dev.sft", // path to your original BFL flow transformer (not diffusers)
261
- "ae_path": "/path/to/your/ae.sft", // path to your original BFL autoencoder (not diffusers)
262
- "text_enc_path": "path/to/your/t5-v1_1-xxl-encoder-bf16", // HF T5EncoderModel - can use "city96/t5-v1_1-xxl-encoder-bf16" for a simple to download version
263
- ```
264
-
265
- Other things to change can be the
266
-
267
- - `"text_enc_max_length": 512`
268
- max length for the text encoder, 256 if you are using flux-schnell
269
-
270
- - `"ae_quantization_dtype": "qfloat8"`
271
- quantization dtype for the autoencoder, can be `qfloat8` or delete entry for no quantization, will use the float8 linear layer implementation included in this repo.
272
-
273
- - `"text_enc_quantization_dtype": "qfloat8"`
274
- quantization dtype for the text encoder, if `qfloat8` or `qint2` will use quanto, `qint4`, `qint8` will use bitsandbytes
275
-
276
- - `"compile_extras": true,`
277
- compiles all modules that are not the single-blocks or double-blocks (default: false)
278
-
279
- - `"compile_blocks": true,`
280
- compiles all single-blocks and double-blocks (default: false)
281
-
282
- - `"text_enc_offload": false,`
283
- offload text encoder to cpu (default: false) - set to true if you only have a single 4090 and no other GPUs, otherwise you can set this to false and reduce latency [NOTE: this will be slow, if you have multiple GPUs, change the text_enc_device to a different device so you can set offloading for text_enc to false]
284
-
285
- - `"ae_offload": false,`
286
- offload autoencoder to cpu (default: false) - set to true if you only have a single 4090 and no other GPUs, otherwise you can set this to false and reduce latency [NOTE: this will be slow, if you have multiple GPUs, change the ae_device to a different device so you can set offloading for ae to false]
287
-
288
- - `"flux_offload": false,`
289
- offload flow transformer to cpu (default: false) - set to true if you only have a single 4090 and no other GPUs, otherwise you can set this to false and reduce latency [NOTE: this will be slow, if you have multiple GPUs, change the flux_device to a different device so you can set offloading for flux to false]
290
-
291
- - `"flux_device": "cuda:0",`
292
- device for flow transformer (default: cuda:0) - this gpu must have fp8 support and at least 16GB of memory, does not need to be the same as text_enc_device or ae_device
293
-
294
- - `"text_enc_device": "cuda:0",`
295
- device for text encoder (default: cuda:0) - set this to a different device - e.g. `"cuda:1"` if you have multiple gpus so you can set offloading for text_enc to false, does not need to be the same as flux_device or ae_device
296
-
297
- - `"ae_device": "cuda:0",`
298
- device for autoencoder (default: cuda:0) - set this to a different device - e.g. `"cuda:1"` if you have multiple gpus so you can set offloading for ae to false, does not need to be the same as flux_device or text_enc_device
299
-
300
- - `"prequantized_flow": false,`
301
- load the flow transformer from a prequantized checkpoint, which reduces the size of the checkpoint by about 50% & reduces startup time (default: false)
302
-
303
- - Note: MUST be a prequantized checkpoint created with the same quantization settings as the current config, and must have been quantized using this repo.
304
-
305
- - `"quantize_modulation": true,`
306
- quantize the modulation layers in the flow transformer, which improves precision at the cost of adding ~2GB vram usage (default: true)
307
-
308
- - `"quantize_flow_embedder_layers": false,`
309
- quantize the flow embedder layers in the flow transformer, which improves precision considerably at the cost of adding ~512MB vram usage (default: false)
310
-
311
- ## API Endpoints
312
-
313
- ### Generate Image
314
-
315
- - **URL**: `/generate`
316
- - **Method**: `POST`
317
- - **Request Body**:
318
-
319
- - `prompt` (str): The text prompt for image generation.
320
- - `width` (int, optional): The width of the generated image (default: 720).
321
- - `height` (int, optional): The height of the generated image (default: 1024).
322
- - `num_steps` (int, optional): The number of steps for the generation process (default: 24).
323
- - `guidance` (float, optional): The guidance scale for the generation process (default: 3.5).
324
- - `seed` (int, optional): The seed for random number generation.
325
- - `init_image` (str, optional): The base64 encoded image to be used as a reference for the generation process.
326
- - `strength` (float, optional): The strength of the diffusion process when image is provided (default: 1.0).
327
-
328
- - **Response**: A JPEG image stream.
329
-
330
- ## Examples
331
-
332
- ### Running the Server
333
 
334
  ```bash
335
- python main.py --config-path configs/config-dev-1-4090.json --port 8088 --host 0.0.0.0
336
  ```
337
 
338
- Or if you need more granular control over the all of the settings, you can run the server with something like this:
 
 
339
 
340
  ```bash
341
- python main.py --port 8088 --host 0.0.0.0 \
342
- --flow-model-path /path/to/your/flux1-dev.sft \
343
- --text-enc-path /path/to/your/t5-v1_1-xxl-encoder-bf16 \
344
- --autoencoder-path /path/to/your/ae.sft \
345
- --model-version flux-dev \
346
- --flux-device cuda:0 \
347
- --text-enc-device cuda:0 \
348
- --autoencoder-device cuda:0 \
349
- --compile \
350
- --quant-text-enc qfloat8 \
351
- --quant-ae
352
  ```
353
 
354
- ### Generating an image on a client
355
 
356
- Send a POST request to `http://<host>:<port>/generate` with the following JSON body:
357
 
358
- ```json
359
- {
360
- "prompt": "a beautiful asian woman in traditional clothing with golden hairpin and blue eyes, wearing a red kimono with dragon patterns",
361
- "width": 1024,
362
- "height": 1024,
363
- "num_steps": 24,
364
- "guidance": 3.0,
365
- "seed": 13456
366
- }
 
 
367
  ```
368
 
369
- For an example of how to generate from a python client using the FastAPI server:
 
 
 
370
 
371
- ```py
372
- import requests
373
- import io
374
 
375
- prompt = "a beautiful asian woman in traditional clothing with golden hairpin and blue eyes, wearing a red kimono with dragon patterns"
376
- res = requests.post(
377
- "http://localhost:8088/generate",
378
- json={
379
- "width": 1024,
380
- "height": 720,
381
- "num_steps": 20,
382
- "guidance": 4,
383
- "prompt": prompt,
384
- },
385
- stream=True,
386
- )
387
 
388
- with open(f"output.jpg", "wb") as f:
389
- f.write(io.BytesIO(res.content).read())
390
-
391
- ```
392
 
393
- You can also generate an image by directly importing the FluxPipeline class and using it to generate an image. This is useful if you have a custom model configuration and want to generate an image without having to run the server.
394
 
395
- ```py
396
- import io
397
- from flux_pipeline import FluxPipeline
 
 
 
 
398
 
 
399
 
400
- pipe = FluxPipeline.load_pipeline_from_config_path(
401
- "configs/config-dev-offload-1-4090.json" # or whatever your config is
402
- )
403
-
404
- output_jpeg_bytes: io.BytesIO = pipe.generate(
405
- # Required args:
406
- prompt="A beautiful asian woman in traditional clothing with golden hairpin and blue eyes, wearing a red kimono with dragon patterns",
407
- # Optional args:
408
- width=1024,
409
- height=1024,
410
- num_steps=20,
411
- guidance=3.5,
412
- seed=13456,
413
- init_image="path/to/your/init_image.jpg",
414
- strength=0.8,
415
- )
416
-
417
- with open("output.jpg", "wb") as f:
418
- f.write(output_jpeg_bytes.getvalue())
419
-
420
- ```
 
1
+ ---
2
+ license: other
3
+ license_name: flux-1-dev-non-commercial-license
4
+ license_link: LICENSE
5
+ ---
6
 
7
+ # FLUX.1 [dev] -- Flumina Server App (FP8 Version)
8
 
9
+ This repository contains an implementation of the FLUX.1 [dev] [FP8 version](https://github.com/aredden/flux-fp8-api), which uses float8 numerics instead of bfloat16. This optimization leads to 2x faster performance in inference when compared to previous versions, making it ideal for high-speed, resource-efficient applications on Fireworks AI’s Flumina Server App toolkit.
10
 
11
+ ![Example output](example.png)
12
 
13
+ ## Getting Started -- Serverless deployment on Fireworks
 
 
 
14
 
15
+ This FP8 Server App is deployed to Fireworks as-is in a "serverless" deployment, enabling you to leverage its performance boost without needing to manage servers manually.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ Grab an [API Key](https://fireworks.ai/account/api-keys) from Fireworks and set it in your environment variables:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  ```bash
20
+ export API_KEY=YOUR_API_KEY_HERE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  ```
22
 
23
+ ### Text-to-Image Example Call
24
 
25
  ```bash
26
+ curl -X POST 'https://api.fireworks.ai/inference/v1/workflows/accounts/fireworks/models/flux-1-dev-fp8/text_to_image' \
27
+ -H "Authorization: Bearer $API_KEY" \
28
+ -H "Content-Type: application/json" \
29
+ -H "Accept: image/jpeg" \
30
+ -d '{
31
+ "prompt": "Woman laying in the grass",
32
+ "aspect_ratio": "16:9",
33
+ "guidance_scale": 3.5,
34
+ "num_inference_steps": 30,
35
+ "seed": 0
36
+ }' \
37
+ --output output.jpg
38
  ```
39
 
40
+ ![Output of text-to-image](t2i_output.jpg)
 
 
 
 
 
 
41
 
42
+ ## Deploying FLUX.1 [dev] to Fireworks On-Demand
43
 
44
+ FLUX.1 [dev] (bfloat16) is available on Fireworks via [on-demand deployments](https://docs.fireworks.ai/guides/ondemand-deployments). It can be deployed in a few simple steps:
45
 
46
+ ### Prerequisite: Install the Flumina CLI
47
 
48
+ The Flumina CLI is included with the [fireworks-ai](https://pypi.org/project/fireworks-ai/) Python package. It can be installed with pip like so:
49
  ```bash
50
+ pip install 'fireworks-ai[flumina]>=0.15.7'
51
  ```
52
 
53
+ Also get an API key from the [Fireworks site](https://fireworks.ai/account/api-keys) and set it in the Flumina CLI:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  ```bash
56
+ flumina set-api-key YOURAPIKEYHERE
57
  ```
58
 
59
+ ### Creating an On-Demand Deployment
60
+
61
+ `flumina deploy` can be used to create an on-demand deployment. When invoked with a model name that exists already, it will create a new deployment in your account which has that model:
62
 
63
  ```bash
64
+ flumina deploy accounts/fireworks/models/flux-1-dev-fp8
 
 
 
 
 
 
 
 
 
 
65
  ```
66
 
67
+ *Note that fp8 FLUX models require `--accelerator-type H100` to successfully deploy*
68
 
69
+ When successful, the CLI will print out example commands to call your new deployment, for example:
70
 
71
+ ```bash
72
+ curl -X POST 'https://api.fireworks.ai/inference/v1/workflows/accounts/fireworks/models/flux-1-dev-fp8/text_to_image?deployment=accounts/u-6jamesr6-63834f/deployments/a0dab4ba' \
73
+ -H 'Authorization: Bearer API_KEY' \
74
+ -H "Content-Type: application/json" \
75
+ -d '{
76
+ "prompt": "<value>",
77
+ "aspect_ratio": "16:9",
78
+ "guidance_scale": 3.5,
79
+ "num_inference_steps": 30,
80
+ "seed": 0
81
+ }'
82
  ```
83
 
84
+ Your deployment can also be administered using the Flumina CLI. Useful commands include:
85
+ * `flumina list deployments` to show all of your deployments
86
+ * `flumina get deployment` to get details about a specific deployment
87
+ * `flumina delete deployment` to delete a deployment
88
 
89
+ ## What is Flumina?
 
 
90
 
91
+ Flumina is Fireworks.ai’s new system for hosting Server Apps that allows users to deploy deep learning inference to production in minutes, not weeks.
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ ## What does Flumina offer for FLUX models?
 
 
 
94
 
95
+ Flumina offers the following benefits:
96
 
97
+ * Clear, precise definition of the server-side workload by looking at the server app implementation (you are here)
98
+ * Extensibility interface, which allows for dynamic loading/dispatching of add-ons server-side. For FLUX:
99
+ * ControlNet (Union) adapters
100
+ * LoRA adapters
101
+ * Off-the-shelf support for standing up on-demand capacity for the Server App on Fireworks
102
+ * Further, customization of the logic of the deployment by modifying the Server App and deploying the modified version.
103
+ * Now with support for FP8 numerics, delivering enhanced speed and efficiency for intensive workloads.
104
 
105
+ ## Deploying Custom FLUX.1 [dev] FP8 Apps to Fireworks On-demand
106
 
107
+ Coming soon!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
clone_weights.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Clone the repository without checking out all the files
2
+ git clone --no-checkout git@hf.co:black-forest-labs/FLUX.1-dev temp-repo
3
+ cd temp-repo
4
+
5
+ # Enable sparse-checkout
6
+ git sparse-checkout init --cone
7
+
8
+ # Specify only the files you want
9
+ git sparse-checkout set ae.safetensors flux1-dev.safetensors
10
+
11
+ # Checkout the specified files
12
+ git checkout
13
+
14
+ # Move the files to the original directory (parent directory)
15
+ mv ae.safetensors ../
16
+ mv flux1-dev.safetensors ../
17
+
18
+ # Go back to the original directory
19
+ cd ..
20
+
21
+ # Remove the temporary directory
22
+ rm -rf temp-repo
config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_path": "configs/config-dev.json",
3
+ "flow_model_path": "flux1-dev.safetensors",
4
+ "model_version": "flux-dev",
5
+ "flux_device": "cuda",
6
+ "autoencoder_path": null,
7
+ "autoencoder_device": "cuda",
8
+ "text_enc_path": null,
9
+ "text_enc_device": "cuda",
10
+ "num_to_quant": 20,
11
+ "compile": false,
12
+ "quant_text_enc": "qfloat8",
13
+ "quant_ae": false,
14
+ "offload_flow": false,
15
+ "offload_ae": true,
16
+ "offload_text_enc": true,
17
+ "prequantized_flow": false,
18
+ "quantize_modulation": true,
19
+ "quantize_flow_embedder_layers": false
20
+ }
configs/config-dev-1-RTX6000ADA.json DELETED
@@ -1,57 +0,0 @@
1
- {
2
- "version": "flux-dev",
3
- "params": {
4
- "in_channels": 64,
5
- "vec_in_dim": 768,
6
- "context_in_dim": 4096,
7
- "hidden_size": 3072,
8
- "mlp_ratio": 4.0,
9
- "num_heads": 24,
10
- "depth": 19,
11
- "depth_single_blocks": 38,
12
- "axes_dim": [
13
- 16,
14
- 56,
15
- 56
16
- ],
17
- "theta": 10000,
18
- "qkv_bias": true,
19
- "guidance_embed": true
20
- },
21
- "ae_params": {
22
- "resolution": 256,
23
- "in_channels": 3,
24
- "ch": 128,
25
- "out_ch": 3,
26
- "ch_mult": [
27
- 1,
28
- 2,
29
- 4,
30
- 4
31
- ],
32
- "num_res_blocks": 2,
33
- "z_channels": 16,
34
- "scale_factor": 0.3611,
35
- "shift_factor": 0.1159
36
- },
37
- "ckpt_path": "/big/generator-ui/flux-testing/flux/model-dir/flux1-dev.sft",
38
- "ae_path": "/big/generator-ui/flux-testing/flux/model-dir/ae.sft",
39
- "repo_id": "black-forest-labs/FLUX.1-dev",
40
- "repo_flow": "flux1-dev.sft",
41
- "repo_ae": "ae.sft",
42
- "text_enc_max_length": 512,
43
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16",
44
- "text_enc_device": "cuda:0",
45
- "ae_device": "cuda:0",
46
- "flux_device": "cuda:0",
47
- "flow_dtype": "float16",
48
- "ae_dtype": "bfloat16",
49
- "text_enc_dtype": "bfloat16",
50
- "flow_quantization_dtype": "qfloat8",
51
- "text_enc_quantization_dtype": "qfloat8",
52
- "compile_extras": true,
53
- "compile_blocks": true,
54
- "offload_text_encoder": false,
55
- "offload_vae": false,
56
- "offload_flow": false
57
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/config-dev-cuda0.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "version": "flux-dev",
3
- "params": {
4
- "in_channels": 64,
5
- "vec_in_dim": 768,
6
- "context_in_dim": 4096,
7
- "hidden_size": 3072,
8
- "mlp_ratio": 4.0,
9
- "num_heads": 24,
10
- "depth": 19,
11
- "depth_single_blocks": 38,
12
- "axes_dim": [
13
- 16,
14
- 56,
15
- 56
16
- ],
17
- "theta": 10000,
18
- "qkv_bias": true,
19
- "guidance_embed": true
20
- },
21
- "ae_params": {
22
- "resolution": 256,
23
- "in_channels": 3,
24
- "ch": 128,
25
- "out_ch": 3,
26
- "ch_mult": [
27
- 1,
28
- 2,
29
- 4,
30
- 4
31
- ],
32
- "num_res_blocks": 2,
33
- "z_channels": 16,
34
- "scale_factor": 0.3611,
35
- "shift_factor": 0.1159
36
- },
37
- "ckpt_path": "/big/generator-ui/flux-testing/flux/model-dir/flux1-dev.sft",
38
- "ae_path": "/big/generator-ui/flux-testing/flux/model-dir/ae.sft",
39
- "repo_id": "black-forest-labs/FLUX.1-dev",
40
- "repo_flow": "flux1-dev.sft",
41
- "repo_ae": "ae.sft",
42
- "text_enc_max_length": 512,
43
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16",
44
- "text_enc_device": "cuda:0",
45
- "ae_device": "cuda:0",
46
- "flux_device": "cuda:0",
47
- "flow_dtype": "float16",
48
- "ae_dtype": "bfloat16",
49
- "text_enc_dtype": "bfloat16",
50
- "text_enc_quantization_dtype": "qfloat8",
51
- "compile_extras": false,
52
- "compile_blocks": false,
53
- "offload_ae": false,
54
- "offload_text_enc": false,
55
- "offload_flow": false
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/config-dev-eval.json DELETED
@@ -1,57 +0,0 @@
1
- {
2
- "version": "flux-dev",
3
- "params": {
4
- "in_channels": 64,
5
- "vec_in_dim": 768,
6
- "context_in_dim": 4096,
7
- "hidden_size": 3072,
8
- "mlp_ratio": 4.0,
9
- "num_heads": 24,
10
- "depth": 19,
11
- "depth_single_blocks": 38,
12
- "axes_dim": [
13
- 16,
14
- 56,
15
- 56
16
- ],
17
- "theta": 10000,
18
- "qkv_bias": true,
19
- "guidance_embed": true
20
- },
21
- "ae_params": {
22
- "resolution": 256,
23
- "in_channels": 3,
24
- "ch": 128,
25
- "out_ch": 3,
26
- "ch_mult": [
27
- 1,
28
- 2,
29
- 4,
30
- 4
31
- ],
32
- "num_res_blocks": 2,
33
- "z_channels": 16,
34
- "scale_factor": 0.3611,
35
- "shift_factor": 0.1159
36
- },
37
- "ckpt_path": "/big/generator-ui/flux-testing/flux/model-dir/flux1-dev.sft",
38
- "ae_path": "/big/generator-ui/flux-testing/flux/model-dir/ae.sft",
39
- "repo_id": "black-forest-labs/FLUX.1-dev",
40
- "repo_flow": "flux1-dev.sft",
41
- "repo_ae": "ae.sft",
42
- "text_enc_max_length": 512,
43
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16",
44
- "text_enc_device": "cuda:1",
45
- "ae_device": "cuda:1",
46
- "flux_device": "cuda:0",
47
- "flow_dtype": "float16",
48
- "ae_dtype": "bfloat16",
49
- "text_enc_dtype": "bfloat16",
50
- "flow_quantization_dtype": "qfloat8",
51
- "text_enc_quantization_dtype": "qfloat8",
52
- "compile_extras": false,
53
- "compile_blocks": false,
54
- "offload_ae": false,
55
- "offload_text_enc": false,
56
- "offload_flow": false
57
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/config-dev-gigaquant.json DELETED
@@ -1,58 +0,0 @@
1
- {
2
- "version": "flux-dev",
3
- "params": {
4
- "in_channels": 64,
5
- "vec_in_dim": 768,
6
- "context_in_dim": 4096,
7
- "hidden_size": 3072,
8
- "mlp_ratio": 4.0,
9
- "num_heads": 24,
10
- "depth": 19,
11
- "depth_single_blocks": 38,
12
- "axes_dim": [
13
- 16,
14
- 56,
15
- 56
16
- ],
17
- "theta": 10000,
18
- "qkv_bias": true,
19
- "guidance_embed": true
20
- },
21
- "ae_params": {
22
- "resolution": 256,
23
- "in_channels": 3,
24
- "ch": 128,
25
- "out_ch": 3,
26
- "ch_mult": [
27
- 1,
28
- 2,
29
- 4,
30
- 4
31
- ],
32
- "num_res_blocks": 2,
33
- "z_channels": 16,
34
- "scale_factor": 0.3611,
35
- "shift_factor": 0.1159
36
- },
37
- "ckpt_path": "/big/generator-ui/flux-testing/flux/model-dir/flux1-dev.sft",
38
- "ae_path": "/big/generator-ui/flux-testing/flux/model-dir/ae.sft",
39
- "repo_id": "black-forest-labs/FLUX.1-dev",
40
- "repo_flow": "flux1-dev.sft",
41
- "repo_ae": "ae.sft",
42
- "text_enc_max_length": 512,
43
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16",
44
- "text_enc_device": "cuda:0",
45
- "ae_device": "cuda:0",
46
- "flux_device": "cuda:0",
47
- "flow_dtype": "float16",
48
- "ae_dtype": "bfloat16",
49
- "text_enc_dtype": "bfloat16",
50
- "num_to_quant": 220,
51
- "flow_quantization_dtype": "qint4",
52
- "text_enc_quantization_dtype": "qint4",
53
- "ae_quantization_dtype": "qint4",
54
- "clip_quantization_dtype": "qint4",
55
- "compile_extras": false,
56
- "compile_blocks": false,
57
- "quantize_extras": true
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/config-dev-offload-1-4080.json DELETED
@@ -1,58 +0,0 @@
1
- {
2
- "version": "flux-dev",
3
- "params": {
4
- "in_channels": 64,
5
- "vec_in_dim": 768,
6
- "context_in_dim": 4096,
7
- "hidden_size": 3072,
8
- "mlp_ratio": 4.0,
9
- "num_heads": 24,
10
- "depth": 19,
11
- "depth_single_blocks": 38,
12
- "axes_dim": [
13
- 16,
14
- 56,
15
- 56
16
- ],
17
- "theta": 10000,
18
- "qkv_bias": true,
19
- "guidance_embed": true
20
- },
21
- "ae_params": {
22
- "resolution": 256,
23
- "in_channels": 3,
24
- "ch": 128,
25
- "out_ch": 3,
26
- "ch_mult": [
27
- 1,
28
- 2,
29
- 4,
30
- 4
31
- ],
32
- "num_res_blocks": 2,
33
- "z_channels": 16,
34
- "scale_factor": 0.3611,
35
- "shift_factor": 0.1159
36
- },
37
- "ckpt_path": "/big/generator-ui/flux-testing/flux/model-dir/flux1-dev.sft",
38
- "ae_path": "/big/generator-ui/flux-testing/flux/model-dir/ae.sft",
39
- "repo_id": "black-forest-labs/FLUX.1-dev",
40
- "repo_flow": "flux1-dev.sft",
41
- "repo_ae": "ae.sft",
42
- "text_enc_max_length": 512,
43
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16",
44
- "text_enc_device": "cuda:0",
45
- "ae_device": "cuda:0",
46
- "flux_device": "cuda:0",
47
- "flow_dtype": "float16",
48
- "ae_dtype": "bfloat16",
49
- "text_enc_dtype": "bfloat16",
50
- "flow_quantization_dtype": "qfloat8",
51
- "text_enc_quantization_dtype": "qint4",
52
- "ae_quantization_dtype": "qfloat8",
53
- "compile_extras": true,
54
- "compile_blocks": true,
55
- "offload_text_encoder": true,
56
- "offload_vae": true,
57
- "offload_flow": true
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/config-dev-offload-1-4090.json DELETED
@@ -1,58 +0,0 @@
1
- {
2
- "version": "flux-dev",
3
- "params": {
4
- "in_channels": 64,
5
- "vec_in_dim": 768,
6
- "context_in_dim": 4096,
7
- "hidden_size": 3072,
8
- "mlp_ratio": 4.0,
9
- "num_heads": 24,
10
- "depth": 19,
11
- "depth_single_blocks": 38,
12
- "axes_dim": [
13
- 16,
14
- 56,
15
- 56
16
- ],
17
- "theta": 10000,
18
- "qkv_bias": true,
19
- "guidance_embed": true
20
- },
21
- "ae_params": {
22
- "resolution": 256,
23
- "in_channels": 3,
24
- "ch": 128,
25
- "out_ch": 3,
26
- "ch_mult": [
27
- 1,
28
- 2,
29
- 4,
30
- 4
31
- ],
32
- "num_res_blocks": 2,
33
- "z_channels": 16,
34
- "scale_factor": 0.3611,
35
- "shift_factor": 0.1159
36
- },
37
- "ckpt_path": "/big/generator-ui/flux-testing/flux/model-dir/flux1-dev.sft",
38
- "ae_path": "/big/generator-ui/flux-testing/flux/model-dir/ae.sft",
39
- "repo_id": "black-forest-labs/FLUX.1-dev",
40
- "repo_flow": "flux1-dev.sft",
41
- "repo_ae": "ae.sft",
42
- "text_enc_max_length": 512,
43
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16",
44
- "text_enc_device": "cuda:0",
45
- "ae_device": "cuda:0",
46
- "flux_device": "cuda:0",
47
- "flow_dtype": "float16",
48
- "ae_dtype": "bfloat16",
49
- "text_enc_dtype": "bfloat16",
50
- "flow_quantization_dtype": "qfloat8",
51
- "text_enc_quantization_dtype": "qint4",
52
- "ae_quantization_dtype": "qfloat8",
53
- "compile_extras": true,
54
- "compile_blocks": true,
55
- "offload_text_encoder": true,
56
- "offload_vae": true,
57
- "offload_flow": false
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/config-dev-offload.json DELETED
@@ -1,58 +0,0 @@
1
- {
2
- "version": "flux-dev",
3
- "params": {
4
- "in_channels": 64,
5
- "vec_in_dim": 768,
6
- "context_in_dim": 4096,
7
- "hidden_size": 3072,
8
- "mlp_ratio": 4.0,
9
- "num_heads": 24,
10
- "depth": 19,
11
- "depth_single_blocks": 38,
12
- "axes_dim": [
13
- 16,
14
- 56,
15
- 56
16
- ],
17
- "theta": 10000,
18
- "qkv_bias": true,
19
- "guidance_embed": true
20
- },
21
- "ae_params": {
22
- "resolution": 256,
23
- "in_channels": 3,
24
- "ch": 128,
25
- "out_ch": 3,
26
- "ch_mult": [
27
- 1,
28
- 2,
29
- 4,
30
- 4
31
- ],
32
- "num_res_blocks": 2,
33
- "z_channels": 16,
34
- "scale_factor": 0.3611,
35
- "shift_factor": 0.1159
36
- },
37
- "ckpt_path": "/big/generator-ui/flux-testing/flux/model-dir/flux1-dev.sft",
38
- "ae_path": "/big/generator-ui/flux-testing/flux/model-dir/ae.sft",
39
- "repo_id": "black-forest-labs/FLUX.1-dev",
40
- "repo_flow": "flux1-dev.sft",
41
- "repo_ae": "ae.sft",
42
- "text_enc_max_length": 512,
43
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16",
44
- "text_enc_device": "cuda:0",
45
- "ae_device": "cuda:0",
46
- "flux_device": "cuda:0",
47
- "flow_dtype": "float16",
48
- "ae_dtype": "bfloat16",
49
- "text_enc_dtype": "bfloat16",
50
- "flow_quantization_dtype": "qfloat8",
51
- "text_enc_quantization_dtype": "qint4",
52
- "ae_quantization_dtype": "qfloat8",
53
- "compile_extras": false,
54
- "compile_blocks": false,
55
- "offload_text_encoder": true,
56
- "offload_vae": true,
57
- "offload_flow": true
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/config-dev-prequant.json DELETED
@@ -1,57 +0,0 @@
1
- {
2
- "version": "flux-dev",
3
- "params": {
4
- "in_channels": 64,
5
- "vec_in_dim": 768,
6
- "context_in_dim": 4096,
7
- "hidden_size": 3072,
8
- "mlp_ratio": 4.0,
9
- "num_heads": 24,
10
- "depth": 19,
11
- "depth_single_blocks": 38,
12
- "axes_dim": [
13
- 16,
14
- 56,
15
- 56
16
- ],
17
- "theta": 10000,
18
- "qkv_bias": true,
19
- "guidance_embed": true
20
- },
21
- "ae_params": {
22
- "resolution": 256,
23
- "in_channels": 3,
24
- "ch": 128,
25
- "out_ch": 3,
26
- "ch_mult": [
27
- 1,
28
- 2,
29
- 4,
30
- 4
31
- ],
32
- "num_res_blocks": 2,
33
- "z_channels": 16,
34
- "scale_factor": 0.3611,
35
- "shift_factor": 0.1159
36
- },
37
- "ckpt_path": "/big/generator-ui/flux-testing/flux/flux-fp16-acc/flux_fp8.safetensors",
38
- "ae_path": "/big/generator-ui/flux-testing/flux/model-dir/ae.sft",
39
- "repo_id": "black-forest-labs/FLUX.1-dev",
40
- "repo_flow": "flux1-dev.sft",
41
- "repo_ae": "ae.sft",
42
- "text_enc_max_length": 512,
43
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16",
44
- "text_enc_device": "cuda:1",
45
- "ae_device": "cuda:1",
46
- "flux_device": "cuda:0",
47
- "flow_dtype": "float16",
48
- "ae_dtype": "bfloat16",
49
- "text_enc_dtype": "bfloat16",
50
- "text_enc_quantization_dtype": "qfloat8",
51
- "compile_extras": false,
52
- "compile_blocks": false,
53
- "prequantized_flow": true,
54
- "offload_ae": false,
55
- "offload_text_enc": false,
56
- "offload_flow": false
57
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/config-dev.json CHANGED
@@ -34,16 +34,16 @@
34
  "scale_factor": 0.3611,
35
  "shift_factor": 0.1159
36
  },
37
- "ckpt_path": "/big/generator-ui/flux-testing/flux/model-dir/flux1-dev.sft",
38
- "ae_path": "/big/generator-ui/flux-testing/flux/model-dir/ae.sft",
39
  "repo_id": "black-forest-labs/FLUX.1-dev",
40
- "repo_flow": "flux1-dev.sft",
41
- "repo_ae": "ae.sft",
42
  "text_enc_max_length": 512,
43
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16",
44
- "text_enc_device": "cuda:1",
45
- "ae_device": "cuda:1",
46
- "flux_device": "cuda:0",
47
  "flow_dtype": "float16",
48
  "ae_dtype": "bfloat16",
49
  "text_enc_dtype": "bfloat16",
@@ -54,4 +54,4 @@
54
  "offload_ae": false,
55
  "offload_text_enc": false,
56
  "offload_flow": false
57
- }
 
34
  "scale_factor": 0.3611,
35
  "shift_factor": 0.1159
36
  },
37
+ "ckpt_path": "flux1-dev.safetensors",
38
+ "ae_path": "ae.safetensors",
39
  "repo_id": "black-forest-labs/FLUX.1-dev",
40
+ "repo_flow": "flux1-dev.safetensors",
41
+ "repo_ae": "ae.safetensors",
42
  "text_enc_max_length": 512,
43
+ "text_enc_path": "t5-v1_1-xxl-encoder-bf16",
44
+ "text_enc_device": "cuda",
45
+ "ae_device": "cuda",
46
+ "flux_device": "cuda",
47
  "flow_dtype": "float16",
48
  "ae_dtype": "bfloat16",
49
  "text_enc_dtype": "bfloat16",
 
54
  "offload_ae": false,
55
  "offload_text_enc": false,
56
  "offload_flow": false
57
+ }
configs/config-schnell-cuda0.json DELETED
@@ -1,57 +0,0 @@
1
- {
2
- "version": "flux-schnell",
3
- "params": {
4
- "in_channels": 64,
5
- "vec_in_dim": 768,
6
- "context_in_dim": 4096,
7
- "hidden_size": 3072,
8
- "mlp_ratio": 4.0,
9
- "num_heads": 24,
10
- "depth": 19,
11
- "depth_single_blocks": 38,
12
- "axes_dim": [
13
- 16,
14
- 56,
15
- 56
16
- ],
17
- "theta": 10000,
18
- "qkv_bias": true,
19
- "guidance_embed": false
20
- },
21
- "ae_params": {
22
- "resolution": 256,
23
- "in_channels": 3,
24
- "ch": 128,
25
- "out_ch": 3,
26
- "ch_mult": [
27
- 1,
28
- 2,
29
- 4,
30
- 4
31
- ],
32
- "num_res_blocks": 2,
33
- "z_channels": 16,
34
- "scale_factor": 0.3611,
35
- "shift_factor": 0.1159
36
- },
37
- "ckpt_path": "/big/generator-ui/flux-testing/flux/model-dir-schnell/flux1-schnell.sft",
38
- "ae_path": "/big/generator-ui/flux-testing/flux/model-dir-schnell/ae.sft",
39
- "repo_id": "black-forest-labs/FLUX.1-schnell",
40
- "repo_flow": "flux1-schnell.sft",
41
- "repo_ae": "ae.sft",
42
- "text_enc_max_length": 256,
43
- "text_enc_path": "city96/t5-v1_1-xxl-encoder-bf16",
44
- "text_enc_device": "cuda:0",
45
- "ae_device": "cuda:0",
46
- "flux_device": "cuda:0",
47
- "flow_dtype": "float16",
48
- "ae_dtype": "bfloat16",
49
- "text_enc_dtype": "bfloat16",
50
- "text_enc_quantization_dtype": "qfloat8",
51
- "ae_quantization_dtype": "qfloat8",
52
- "compile_extras": false,
53
- "compile_blocks": false,
54
- "offload_ae": false,
55
- "offload_text_enc": false,
56
- "offload_flow": false
57
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
example.png ADDED

Git LFS Details

  • SHA256: 004d59b77fb4d8ac38e282f4c55aeeabd56f8526300c0a1c744bcbbbe737515a
  • Pointer size: 132 Bytes
  • Size of remote file: 3.2 MB
fireworks.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_is_flumina_model": true}
flumina.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # flumina.py
2
+ import torch
3
+ import io
4
+ import json
5
+ from fireworks.flumina import FluminaModule, main as flumina_main
6
+ from fireworks.flumina.route import post
7
+ import pydantic
8
+ from pydantic import BaseModel
9
+ from fastapi import Header
10
+ from fastapi.responses import Response
11
+ import math
12
+ import logging
13
+ import os
14
+ import re
15
+ import PIL.Image as Image
16
+ from typing import Optional, Set, Tuple
17
+
18
+ from flux_pipeline import FluxPipeline
19
+ from util import load_config, ModelVersion
20
+
21
+ _ASPECT_RATIOS = [
22
+ (1, 1),
23
+ (21, 9),
24
+ (16, 9),
25
+ (3, 2),
26
+ (5, 4),
27
+ (4, 5),
28
+ (2, 3),
29
+ (9, 16),
30
+ (9, 21),
31
+ ]
32
+
33
+ # Util
34
+ def _aspect_ratio_to_width_height(aspect_ratio: str) -> Tuple[int, int]:
35
+ """
36
+ Convert specified aspect ratio to a height/width pair.
37
+ """
38
+ if ":" not in aspect_ratio:
39
+ raise ValueError(
40
+ f"Invalid aspect ratio: {aspect_ratio}. Aspect ratio must be in w:h format, e.g. 16:9"
41
+ )
42
+
43
+ w, h = aspect_ratio.split(":")
44
+ try:
45
+ w, h = int(w), int(h)
46
+ except ValueError:
47
+ raise ValueError(
48
+ f"Invalid aspect ratio: {aspect_ratio}. Aspect ratio must be in w:h format, e.g. 16:9"
49
+ )
50
+
51
+ if (w, h) not in _ASPECT_RATIOS:
52
+ raise ValueError(
53
+ f"Invalid aspect ratio: {aspect_ratio}. Aspect ratio must be one of {_ASPECT_RATIOS}"
54
+ )
55
+
56
+ # We consider megapixel not 10^6 pixels but 2^20 (1024x1024) pixels
57
+ TARGET_SIZE_MP = 1
58
+ target_size = TARGET_SIZE_MP * 2**20
59
+
60
+ width = math.sqrt(target_size / (w * h)) * w
61
+ height = math.sqrt(target_size / (w * h)) * h
62
+
63
+ PAD_MULTIPLE = 64
64
+
65
+ if PAD_MULTIPLE:
66
+ width = width // PAD_MULTIPLE * PAD_MULTIPLE
67
+ height = height // PAD_MULTIPLE * PAD_MULTIPLE
68
+
69
+ return int(width), int(height)
70
+
71
+
72
+ def encode_image(
73
+ image: Image.Image, mime_type: str, jpeg_quality: int = 95
74
+ ) -> bytes:
75
+ buffered = io.BytesIO()
76
+ if mime_type == "image/jpeg":
77
+ if jpeg_quality < 0 or jpeg_quality > 100:
78
+ raise ValueError(
79
+ f"jpeg_quality must be between 0 and 100, not {jpeg_quality}"
80
+ )
81
+ image.save(buffered, format="JPEG", quality=jpeg_quality)
82
+ elif mime_type == "image/png":
83
+ image.save(buffered, format="PNG")
84
+ else:
85
+ raise ValueError(f"invalid mime_type {mime_type}")
86
+ return buffered.getvalue()
87
+
88
+
89
+ def parse_accept_header(accept: str) -> str:
90
+ # Split the string into the comma-separated components
91
+ parts = accept.split(",")
92
+ weighted_types = []
93
+
94
+ for part in parts:
95
+ # Use a regular expression to extract the media type and the optional q-factor
96
+ match = re.match(
97
+ r"(?P<media_type>[^;]+)(;q=(?P<q_factor>\d+(\.\d+)?))?", part.strip()
98
+ )
99
+ if match:
100
+ media_type = match.group("media_type").strip()
101
+ q_factor = (
102
+ float(match.group("q_factor")) if match.group("q_factor") else 1.0
103
+ )
104
+ weighted_types.append((media_type, q_factor))
105
+ else:
106
+ raise ValueError(f"Malformed Accept header value: {part.strip()}")
107
+
108
+ # Sort the media types by q-factor, descending
109
+ sorted_types = sorted(weighted_types, key=lambda x: x[1], reverse=True)
110
+
111
+ # Define a list of supported MIME types
112
+ supported_types = ["image/jpeg", "image/png"]
113
+
114
+ for media_type, _ in sorted_types:
115
+ if media_type in supported_types:
116
+ return media_type
117
+ elif media_type == "*/*":
118
+ return supported_types[0] # Default to the first supported type
119
+ elif media_type == "image/*":
120
+ # If "image/*" is specified, return the first matching supported image type
121
+ return supported_types[0]
122
+
123
+ raise ValueError(f"Accept header did not include any supported MIME types: {supported_types}")
124
+
125
+
126
+ # Define request and response schemata
127
+ class Text2ImageRequest(BaseModel):
128
+ prompt: str
129
+ aspect_ratio: str = "16:9"
130
+ guidance_scale: float = 3.5
131
+ num_inference_steps: int = 30
132
+ seed: int = 0
133
+
134
+
135
+ class Error(BaseModel):
136
+ object: str = "error"
137
+ type: str = "invalid_request_error"
138
+ message: str
139
+
140
+
141
+ class ErrorResponse(BaseModel):
142
+ error: Error = pydantic.Field(default_factory=Error)
143
+
144
+
145
+ class BillingInfo(BaseModel):
146
+ steps: int
147
+ height: int
148
+ width: int
149
+ is_control_net: bool = False
150
+
151
+
152
+ class FluminaModule(FluminaModule):
153
+ def __init__(self):
154
+ super().__init__()
155
+
156
+ # Read configuration from config.json
157
+ with open('config.json', 'r') as f:
158
+ config_data = json.load(f)
159
+
160
+ # Now, we need to construct the config and load the model
161
+ if 'config_path' in config_data:
162
+ self.pipeline = FluxPipeline.load_pipeline_from_config_path(
163
+ config_data['config_path'],
164
+ flow_model_path=config_data.get('flow_model_path', None)
165
+ )
166
+ else:
167
+ model_version = (
168
+ ModelVersion.flux_dev
169
+ if config_data.get('model_version', 'flux-dev') == "flux-dev"
170
+ else ModelVersion.flux_schnell
171
+ )
172
+ config = load_config(
173
+ model_version,
174
+ flux_path=config_data.get('flow_model_path', None),
175
+ flux_device=config_data.get('flux_device', 'cuda:0'),
176
+ ae_path=config_data.get('autoencoder_path', None),
177
+ ae_device=config_data.get('autoencoder_device', 'cuda:0'),
178
+ text_enc_path=config_data.get('text_enc_path', None),
179
+ text_enc_device=config_data.get('text_enc_device', 'cuda:0'),
180
+ flow_dtype="float16",
181
+ text_enc_dtype="bfloat16",
182
+ ae_dtype="bfloat16",
183
+ num_to_quant=config_data.get('num_to_quant', 20),
184
+ compile_extras=config_data.get('compile', False),
185
+ compile_blocks=config_data.get('compile', False),
186
+ quant_text_enc=(
187
+ None
188
+ if config_data.get('quant_text_enc', 'qfloat8') == "bf16"
189
+ else config_data.get('quant_text_enc', 'qfloat8')
190
+ ),
191
+ quant_ae=config_data.get('quant_ae', False),
192
+ offload_flow=config_data.get('offload_flow', False),
193
+ offload_ae=config_data.get('offload_ae', True),
194
+ offload_text_enc=config_data.get('offload_text_enc', True),
195
+ prequantized_flow=config_data.get('prequantized_flow', False),
196
+ quantize_modulation=config_data.get('quantize_modulation', True),
197
+ quantize_flow_embedder_layers=config_data.get(
198
+ 'quantize_flow_embedder_layers', False
199
+ ),
200
+ )
201
+ self.pipeline = FluxPipeline.load_pipeline_from_config(config)
202
+
203
+ # Initialize LoRA adapters
204
+ self.lora_adapters: Set[str] = set()
205
+ self.active_lora_adapter: Optional[str] = None
206
+
207
+ # Warm-up
208
+ self._warm_up()
209
+
210
+ # Testing
211
+ self._test_return_sync_response = False
212
+
213
+
214
+ def _warm_up(self):
215
+ for f, s in _ASPECT_RATIOS:
216
+ print(f"Warm-up for aspect ratio {f}:{s}")
217
+ width, height = _aspect_ratio_to_width_height(f"{f}:{s}")
218
+ self.pipeline.generate(
219
+ prompt="a quick brown fox",
220
+ height=height,
221
+ width=width,
222
+ guidance=3.5,
223
+ num_steps=1,
224
+ seed=0,
225
+ )
226
+
227
+ def _error_response(self, code: int, message: str) -> Response:
228
+ response_json = ErrorResponse(
229
+ error=Error(message=message),
230
+ ).json()
231
+ if self._test_return_sync_response:
232
+ return response_json
233
+ else:
234
+ return Response(
235
+ response_json,
236
+ status_code=code,
237
+ media_type="application/json",
238
+ )
239
+
240
+ def _image_response(
241
+ self, image_bytes: bytes, mime_type: str, billing_info: BillingInfo
242
+ ):
243
+ if self._test_return_sync_response:
244
+ return image_bytes
245
+ else:
246
+ headers = {'Fireworks-Billing-Properties': billing_info.json()}
247
+ return Response(
248
+ image_bytes, status_code=200, media_type=mime_type, headers=headers
249
+ )
250
+
251
+ @post('/text_to_image')
252
+ async def text_to_image(
253
+ self,
254
+ body: Text2ImageRequest,
255
+ accept: str = Header("image/jpeg"),
256
+ ):
257
+ mime_type = parse_accept_header(accept)
258
+ width, height = _aspect_ratio_to_width_height(body.aspect_ratio)
259
+ img_bio = self.pipeline.generate(
260
+ prompt=body.prompt,
261
+ height=height,
262
+ width=width,
263
+ guidance=body.guidance_scale,
264
+ num_steps=body.num_inference_steps,
265
+ seed=body.seed,
266
+ )
267
+
268
+ billing_info = BillingInfo(
269
+ steps=body.num_inference_steps,
270
+ height=height,
271
+ width=width,
272
+ )
273
+ return self._image_response(img_bio.getvalue(), mime_type, billing_info)
274
+
275
+ @property
276
+ def supported_addon_types(self):
277
+ return ['lora']
278
+
279
+ # Addon interface methods adjusted to remove ControlNet support
280
+ def load_addon(
281
+ self,
282
+ addon_account_id: str,
283
+ addon_model_id: str,
284
+ addon_type: str,
285
+ addon_data_path: os.PathLike,
286
+ ):
287
+ if addon_type not in self.supported_addon_types:
288
+ raise ValueError(
289
+ f"Invalid addon type {addon_type}. Supported types: {self.supported_addon_types}"
290
+ )
291
+
292
+ qualname = f"accounts/{addon_account_id}/models/{addon_model_id}"
293
+
294
+ if addon_type == 'lora':
295
+ self.pipeline.load_lora_weights(addon_data_path, adapter_name=qualname)
296
+ self.lora_adapters.add(qualname)
297
+ else:
298
+ raise NotImplementedError(
299
+ f'Addon support for type {addon_type} not implemented'
300
+ )
301
+
302
+ def unload_addon(
303
+ self, addon_account_id: str, addon_model_id: str, addon_type: str
304
+ ):
305
+ qualname = f"accounts/{addon_account_id}/models/{addon_model_id}"
306
+
307
+ if addon_type == 'lora':
308
+ assert qualname in self.lora_adapters
309
+ self.pipeline.delete_adapters([qualname])
310
+ self.lora_adapters.remove(qualname)
311
+ else:
312
+ raise NotImplementedError(
313
+ f'Addon support for type {addon_type} not implemented'
314
+ )
315
+
316
+ def activate_addon(self, addon_account_id: str, addon_model_id: str):
317
+ qualname = f"accounts/{addon_account_id}/models/{addon_model_id}"
318
+
319
+ if qualname in self.lora_adapters:
320
+ if self.active_lora_adapter is not None:
321
+ raise ValueError(
322
+ f"LoRA adapter {self.active_lora_adapter} already active. Multi-LoRA not yet supported"
323
+ )
324
+
325
+ self.active_lora_adapter = qualname
326
+ return
327
+
328
+ raise ValueError(f"Unknown addon {qualname}")
329
+
330
+ def deactivate_addon(self, addon_account_id: str, addon_model_id: str):
331
+ qualname = f"accounts/{addon_account_id}/models/{addon_model_id}"
332
+
333
+ if self.active_lora_adapter == qualname:
334
+ self.active_lora_adapter = None
335
+ else:
336
+ raise AssertionError(f'Addon {qualname} not loaded!')
337
+
338
+
339
+ if __name__ == "__flumina_main__":
340
+ f = FluminaModule()
341
+ flumina_main(f)
342
+
343
+ if __name__ == "__main__":
344
+ f = FluminaModule()
345
+ f._test_return_sync_response = True
346
+ import asyncio
347
+ out = asyncio.run(f.text_to_image(
348
+ body=Text2ImageRequest(
349
+ prompt="test"
350
+ ),
351
+ accept="*/*"
352
+ ))
353
+ with open("out_image.png", "wb") as f:
354
+ f.write(out)
355
+
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- git+https://github.com/aredden/torch-cublas-hgemm.git@master
2
  einops
3
  PyTurboJPEG
4
  pydantic
@@ -13,4 +12,4 @@ accelerate
13
  quanto
14
  pydash
15
  pybase64
16
- uvicorn
 
 
1
  einops
2
  PyTurboJPEG
3
  pydantic
 
12
  quanto
13
  pydash
14
  pybase64
15
+ uvicorn
t2i_output.jpg ADDED
t5-v1_1-xxl-encoder-bf16/LICENSE ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Apache License 2.0
3
+ spdx-id: Apache-2.0
4
+ redirect_from: /licenses/apache/
5
+ featured: true
6
+ hidden: false
7
+
8
+ description: A permissive license whose main conditions require preservation of copyright and license notices. Contributors provide an express grant of patent rights. Licensed works, modifications, and larger works may be distributed under different terms and without source code.
9
+
10
+ how: Create a text file (typically named LICENSE or LICENSE.txt) in the root of your source code and copy the text of the license into the file.
11
+
12
+ note: The Apache Software Foundation <a href="https://apache.org/foundation/license-faq.html#Apply-My-Software">recommends</a> taking the additional step of adding a boilerplate notice to the header of each source file. You can find the notice in the appendix at the very end of the license text.
13
+
14
+ using:
15
+ Kubernetes: https://github.com/kubernetes/kubernetes/blob/master/LICENSE
16
+ PDF.js: https://github.com/mozilla/pdf.js/blob/master/LICENSE
17
+ Swift: https://github.com/apple/swift/blob/main/LICENSE.txt
18
+
19
+ permissions:
20
+ - commercial-use
21
+ - modifications
22
+ - distribution
23
+ - patent-use
24
+ - private-use
25
+
26
+ conditions:
27
+ - include-copyright
28
+ - document-changes
29
+
30
+ limitations:
31
+ - trademark-use
32
+ - liability
33
+ - warranty
34
+
35
+ ---
36
+
37
+ Apache License
38
+ Version 2.0, January 2004
39
+ http://www.apache.org/licenses/
40
+
41
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
42
+
43
+ 1. Definitions.
44
+
45
+ "License" shall mean the terms and conditions for use, reproduction,
46
+ and distribution as defined by Sections 1 through 9 of this document.
47
+
48
+ "Licensor" shall mean the copyright owner or entity authorized by
49
+ the copyright owner that is granting the License.
50
+
51
+ "Legal Entity" shall mean the union of the acting entity and all
52
+ other entities that control, are controlled by, or are under common
53
+ control with that entity. For the purposes of this definition,
54
+ "control" means (i) the power, direct or indirect, to cause the
55
+ direction or management of such entity, whether by contract or
56
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
57
+ outstanding shares, or (iii) beneficial ownership of such entity.
58
+
59
+ "You" (or "Your") shall mean an individual or Legal Entity
60
+ exercising permissions granted by this License.
61
+
62
+ "Source" form shall mean the preferred form for making modifications,
63
+ including but not limited to software source code, documentation
64
+ source, and configuration files.
65
+
66
+ "Object" form shall mean any form resulting from mechanical
67
+ transformation or translation of a Source form, including but
68
+ not limited to compiled object code, generated documentation,
69
+ and conversions to other media types.
70
+
71
+ "Work" shall mean the work of authorship, whether in Source or
72
+ Object form, made available under the License, as indicated by a
73
+ copyright notice that is included in or attached to the work
74
+ (an example is provided in the Appendix below).
75
+
76
+ "Derivative Works" shall mean any work, whether in Source or Object
77
+ form, that is based on (or derived from) the Work and for which the
78
+ editorial revisions, annotations, elaborations, or other modifications
79
+ represent, as a whole, an original work of authorship. For the purposes
80
+ of this License, Derivative Works shall not include works that remain
81
+ separable from, or merely link (or bind by name) to the interfaces of,
82
+ the Work and Derivative Works thereof.
83
+
84
+ "Contribution" shall mean any work of authorship, including
85
+ the original version of the Work and any modifications or additions
86
+ to that Work or Derivative Works thereof, that is intentionally
87
+ submitted to Licensor for inclusion in the Work by the copyright owner
88
+ or by an individual or Legal Entity authorized to submit on behalf of
89
+ the copyright owner. For the purposes of this definition, "submitted"
90
+ means any form of electronic, verbal, or written communication sent
91
+ to the Licensor or its representatives, including but not limited to
92
+ communication on electronic mailing lists, source code control systems,
93
+ and issue tracking systems that are managed by, or on behalf of, the
94
+ Licensor for the purpose of discussing and improving the Work, but
95
+ excluding communication that is conspicuously marked or otherwise
96
+ designated in writing by the copyright owner as "Not a Contribution."
97
+
98
+ "Contributor" shall mean Licensor and any individual or Legal Entity
99
+ on behalf of whom a Contribution has been received by Licensor and
100
+ subsequently incorporated within the Work.
101
+
102
+ 2. Grant of Copyright License. Subject to the terms and conditions of
103
+ this License, each Contributor hereby grants to You a perpetual,
104
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
105
+ copyright license to reproduce, prepare Derivative Works of,
106
+ publicly display, publicly perform, sublicense, and distribute the
107
+ Work and such Derivative Works in Source or Object form.
108
+
109
+ 3. Grant of Patent License. Subject to the terms and conditions of
110
+ this License, each Contributor hereby grants to You a perpetual,
111
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
112
+ (except as stated in this section) patent license to make, have made,
113
+ use, offer to sell, sell, import, and otherwise transfer the Work,
114
+ where such license applies only to those patent claims licensable
115
+ by such Contributor that are necessarily infringed by their
116
+ Contribution(s) alone or by combination of their Contribution(s)
117
+ with the Work to which such Contribution(s) was submitted. If You
118
+ institute patent litigation against any entity (including a
119
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
120
+ or a Contribution incorporated within the Work constitutes direct
121
+ or contributory patent infringement, then any patent licenses
122
+ granted to You under this License for that Work shall terminate
123
+ as of the date such litigation is filed.
124
+
125
+ 4. Redistribution. You may reproduce and distribute copies of the
126
+ Work or Derivative Works thereof in any medium, with or without
127
+ modifications, and in Source or Object form, provided that You
128
+ meet the following conditions:
129
+
130
+ (a) You must give any other recipients of the Work or
131
+ Derivative Works a copy of this License; and
132
+
133
+ (b) You must cause any modified files to carry prominent notices
134
+ stating that You changed the files; and
135
+
136
+ (c) You must retain, in the Source form of any Derivative Works
137
+ that You distribute, all copyright, patent, trademark, and
138
+ attribution notices from the Source form of the Work,
139
+ excluding those notices that do not pertain to any part of
140
+ the Derivative Works; and
141
+
142
+ (d) If the Work includes a "NOTICE" text file as part of its
143
+ distribution, then any Derivative Works that You distribute must
144
+ include a readable copy of the attribution notices contained
145
+ within such NOTICE file, excluding those notices that do not
146
+ pertain to any part of the Derivative Works, in at least one
147
+ of the following places: within a NOTICE text file distributed
148
+ as part of the Derivative Works; within the Source form or
149
+ documentation, if provided along with the Derivative Works; or,
150
+ within a display generated by the Derivative Works, if and
151
+ wherever such third-party notices normally appear. The contents
152
+ of the NOTICE file are for informational purposes only and
153
+ do not modify the License. You may add Your own attribution
154
+ notices within Derivative Works that You distribute, alongside
155
+ or as an addendum to the NOTICE text from the Work, provided
156
+ that such additional attribution notices cannot be construed
157
+ as modifying the License.
158
+
159
+ You may add Your own copyright statement to Your modifications and
160
+ may provide additional or different license terms and conditions
161
+ for use, reproduction, or distribution of Your modifications, or
162
+ for any such Derivative Works as a whole, provided Your use,
163
+ reproduction, and distribution of the Work otherwise complies with
164
+ the conditions stated in this License.
165
+
166
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
167
+ any Contribution intentionally submitted for inclusion in the Work
168
+ by You to the Licensor shall be under the terms and conditions of
169
+ this License, without any additional terms or conditions.
170
+ Notwithstanding the above, nothing herein shall supersede or modify
171
+ the terms of any separate license agreement you may have executed
172
+ with Licensor regarding such Contributions.
173
+
174
+ 6. Trademarks. This License does not grant permission to use the trade
175
+ names, trademarks, service marks, or product names of the Licensor,
176
+ except as required for reasonable and customary use in describing the
177
+ origin of the Work and reproducing the content of the NOTICE file.
178
+
179
+ 7. Disclaimer of Warranty. Unless required by applicable law or
180
+ agreed to in writing, Licensor provides the Work (and each
181
+ Contributor provides its Contributions) on an "AS IS" BASIS,
182
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
183
+ implied, including, without limitation, any warranties or conditions
184
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
185
+ PARTICULAR PURPOSE. You are solely responsible for determining the
186
+ appropriateness of using or redistributing the Work and assume any
187
+ risks associated with Your exercise of permissions under this License.
188
+
189
+ 8. Limitation of Liability. In no event and under no legal theory,
190
+ whether in tort (including negligence), contract, or otherwise,
191
+ unless required by applicable law (such as deliberate and grossly
192
+ negligent acts) or agreed to in writing, shall any Contributor be
193
+ liable to You for damages, including any direct, indirect, special,
194
+ incidental, or consequential damages of any character arising as a
195
+ result of this License or out of the use or inability to use the
196
+ Work (including but not limited to damages for loss of goodwill,
197
+ work stoppage, computer failure or malfunction, or any and all
198
+ other commercial damages or losses), even if such Contributor
199
+ has been advised of the possibility of such damages.
200
+
201
+ 9. Accepting Warranty or Additional Liability. While redistributing
202
+ the Work or Derivative Works thereof, You may choose to offer,
203
+ and charge a fee for, acceptance of support, warranty, indemnity,
204
+ or other liability obligations and/or rights consistent with this
205
+ License. However, in accepting such obligations, You may act only
206
+ on Your own behalf and on Your sole responsibility, not on behalf
207
+ of any other Contributor, and only if You agree to indemnify,
208
+ defend, and hold each Contributor harmless for any liability
209
+ incurred by, or claims asserted against, such Contributor by reason
210
+ of your accepting any such warranty or additional liability.
211
+
212
+ END OF TERMS AND CONDITIONS
213
+
214
+ APPENDIX: How to apply the Apache License to your work.
215
+
216
+ To apply the Apache License to your work, attach the following
217
+ boilerplate notice, with the fields enclosed by brackets "[]"
218
+ replaced with your own identifying information. (Don't include
219
+ the brackets!) The text should be enclosed in the appropriate
220
+ comment syntax for the file format. We also recommend that a
221
+ file or class name and description of purpose be included on the
222
+ same "printed page" as the copyright notice for easier
223
+ identification within third-party archives.
224
+
225
+ Copyright [yyyy] [name of copyright owner]
226
+
227
+ Licensed under the Apache License, Version 2.0 (the "License");
228
+ you may not use this file except in compliance with the License.
229
+ You may obtain a copy of the License at
230
+
231
+ http://www.apache.org/licenses/LICENSE-2.0
232
+
233
+ Unless required by applicable law or agreed to in writing, software
234
+ distributed under the License is distributed on an "AS IS" BASIS,
235
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
236
+ See the License for the specific language governing permissions and
237
+ limitations under the License.
t5-v1_1-xxl-encoder-bf16/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ A single-safetensor version of Google's T5 v1.1 XXL encoder model in bfloat16 precision.
2
+
3
+ Intended to be used with text to image models such as PixArt.
t5-v1_1-xxl-encoder-bf16/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "city96/t5-v1_1-xxl-encoder-bf16",
3
+ "architectures": [
4
+ "T5EncoderModel"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 10240,
8
+ "d_kv": 64,
9
+ "d_model": 4096,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "num_decoder_layers": 24,
21
+ "num_heads": 64,
22
+ "num_layers": 24,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "bfloat16",
29
+ "transformers_version": "4.40.1",
30
+ "use_cache": true,
31
+ "vocab_size": 32128
32
+ }
t5-v1_1-xxl-encoder-bf16/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565cb2487351282e8e4dbeb88e63f4ad28217ce0439f5a8e6525a924807d2d9b
3
+ size 9524648592
t5-v1_1-xxl-encoder-bf16/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
t5-v1_1-xxl-encoder-bf16/spiece.model ADDED
Binary file (792 kB). View file
 
t5-v1_1-xxl-encoder-bf16/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "name_or_path": "t5-small"}