ffreemt commited on
Commit
bd9d50e
1 Parent(s): e4b455b
Files changed (2) hide show
  1. .ruff.toml +21 -0
  2. app.py +8 -2
.ruff.toml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Assume Python 3.10.
2
+ target-version = "py310"
3
+ # Decrease the maximum line length to 79 characters.
4
+ line-length = 300
5
+
6
+ # pyflakes, pycodestyle, isort
7
+ # flake8 YTT, pydocstyle D, pylint PLC
8
+ select = ["F", "E", "W", "I001", "YTT", "D", "PLC"]
9
+ # select = ["ALL"]
10
+
11
+ # E501 Line too long
12
+ # D102 Missing docstring in public method
13
+ # D100 Missing docstring in public module
14
+ # E501 Line too long
15
+ # D103 Missing docstring in public function
16
+ # D101 Missing docstring in public class
17
+ # `multi-line-summary-first-line` (D212)
18
+ # `one-blank-line-before-class` (D203)
19
+ extend-ignore = ["E501", "D100", "D101", "D102", "D103", "D212", "D203"]
20
+
21
+ exclude = [".venv"]
app.py CHANGED
@@ -186,7 +186,7 @@ mpt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
186
  mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
187
  mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
188
  """
189
- MODEL_FILENAME = "mpt-30b-chat.ggmlv0.q4_1.bin"
190
  MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
191
  MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
192
  DESTINATION_FOLDER = "models"
@@ -400,4 +400,10 @@ with gr.Blocks(
400
 
401
  # concurrency_count=5, max_size=20
402
  # max_size=36, concurrency_count=14
403
- block.queue(concurrency_count=5, max_size=20).launch(debug=True)
 
 
 
 
 
 
 
186
  mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
187
  mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
188
  """
189
+ MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
190
  MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
191
  MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
192
  DESTINATION_FOLDER = "models"
 
400
 
401
  # concurrency_count=5, max_size=20
402
  # max_size=36, concurrency_count=14
403
+
404
+ # concurrency_count > 1 requires more memory , max_size: queue size
405
+ # T4 medium: 30GB, model size: ~6G concurrency_count = 3
406
+ # leave one for api access
407
+ # reduce to 3 if OOM
408
+
409
+ block.queue(concurrency_count=1, max_size=5).launch(debug=True)