Spaces:
Running
Running
ffreemt
commited on
Commit
·
bd9d50e
1
Parent(s):
e4b455b
Update
Browse files- .ruff.toml +21 -0
- app.py +8 -2
.ruff.toml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Assume Python 3.10.
|
| 2 |
+
target-version = "py310"
|
| 3 |
+
# Decrease the maximum line length to 79 characters.
|
| 4 |
+
line-length = 300
|
| 5 |
+
|
| 6 |
+
# pyflakes, pycodestyle, isort
|
| 7 |
+
# flake8 YTT, pydocstyle D, pylint PLC
|
| 8 |
+
select = ["F", "E", "W", "I001", "YTT", "D", "PLC"]
|
| 9 |
+
# select = ["ALL"]
|
| 10 |
+
|
| 11 |
+
# E501 Line too long
|
| 12 |
+
# D102 Missing docstring in public method
|
| 13 |
+
# D100 Missing docstring in public module
|
| 14 |
+
# E501 Line too long
|
| 15 |
+
# D103 Missing docstring in public function
|
| 16 |
+
# D101 Missing docstring in public class
|
| 17 |
+
# `multi-line-summary-first-line` (D212)
|
| 18 |
+
# `one-blank-line-before-class` (D203)
|
| 19 |
+
extend-ignore = ["E501", "D100", "D101", "D102", "D103", "D212", "D203"]
|
| 20 |
+
|
| 21 |
+
exclude = [".venv"]
|
app.py
CHANGED
|
@@ -186,7 +186,7 @@ mpt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
|
|
| 186 |
mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
|
| 187 |
mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
|
| 188 |
"""
|
| 189 |
-
MODEL_FILENAME = "
|
| 190 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
|
| 191 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
| 192 |
DESTINATION_FOLDER = "models"
|
|
@@ -400,4 +400,10 @@ with gr.Blocks(
|
|
| 400 |
|
| 401 |
# concurrency_count=5, max_size=20
|
| 402 |
# max_size=36, concurrency_count=14
|
| 403 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
|
| 187 |
mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
|
| 188 |
"""
|
| 189 |
+
MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
|
| 190 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
|
| 191 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
| 192 |
DESTINATION_FOLDER = "models"
|
|
|
|
| 400 |
|
| 401 |
# concurrency_count=5, max_size=20
|
| 402 |
# max_size=36, concurrency_count=14
|
| 403 |
+
|
| 404 |
+
# concurrency_count > 1 requires more memory , max_size: queue size
|
| 405 |
+
# T4 medium: 30GB, model size: ~6G concurrency_count = 3
|
| 406 |
+
# leave one for api access
|
| 407 |
+
# reduce to 3 if OOM
|
| 408 |
+
|
| 409 |
+
block.queue(concurrency_count=1, max_size=5).launch(debug=True)
|