Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
bd9d50e
1
Parent(s):
e4b455b
Update
Browse files- .ruff.toml +21 -0
- app.py +8 -2
.ruff.toml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Assume Python 3.10.
|
2 |
+
target-version = "py310"
|
3 |
+
# Decrease the maximum line length to 79 characters.
|
4 |
+
line-length = 300
|
5 |
+
|
6 |
+
# pyflakes, pycodestyle, isort
|
7 |
+
# flake8 YTT, pydocstyle D, pylint PLC
|
8 |
+
select = ["F", "E", "W", "I001", "YTT", "D", "PLC"]
|
9 |
+
# select = ["ALL"]
|
10 |
+
|
11 |
+
# E501 Line too long
|
12 |
+
# D102 Missing docstring in public method
|
13 |
+
# D100 Missing docstring in public module
|
14 |
+
# E501 Line too long
|
15 |
+
# D103 Missing docstring in public function
|
16 |
+
# D101 Missing docstring in public class
|
17 |
+
# `multi-line-summary-first-line` (D212)
|
18 |
+
# `one-blank-line-before-class` (D203)
|
19 |
+
extend-ignore = ["E501", "D100", "D101", "D102", "D103", "D212", "D203"]
|
20 |
+
|
21 |
+
exclude = [".venv"]
|
app.py
CHANGED
@@ -186,7 +186,7 @@ mpt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
|
|
186 |
mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
|
187 |
mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
|
188 |
"""
|
189 |
-
MODEL_FILENAME = "
|
190 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
|
191 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
192 |
DESTINATION_FOLDER = "models"
|
@@ -400,4 +400,10 @@ with gr.Blocks(
|
|
400 |
|
401 |
# concurrency_count=5, max_size=20
|
402 |
# max_size=36, concurrency_count=14
|
403 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
|
187 |
mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
|
188 |
"""
|
189 |
+
MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
|
190 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
|
191 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
192 |
DESTINATION_FOLDER = "models"
|
|
|
400 |
|
401 |
# concurrency_count=5, max_size=20
|
402 |
# max_size=36, concurrency_count=14
|
403 |
+
|
404 |
+
# concurrency_count > 1 requires more memory , max_size: queue size
|
405 |
+
# T4 medium: 30GB, model size: ~6G concurrency_count = 3
|
406 |
+
# leave one for api access
|
407 |
+
# reduce to 3 if OOM
|
408 |
+
|
409 |
+
block.queue(concurrency_count=1, max_size=5).launch(debug=True)
|