Spaces:

mikeee
/

WizardCoder-15B-1.0-GGML

Runtime error

ffreemt commited on Jul 28, 2023

Commit

bd9d50e

1 Parent(s): e4b455b

Update

Files changed (2) hide show

.ruff.toml ADDED Viewed

+# Assume Python 3.10.
+target-version = "py310"
+# Decrease the maximum line length to 79 characters.
+line-length = 300
+# pyflakes, pycodestyle, isort
+# flake8 YTT, pydocstyle D, pylint PLC
+select = ["F", "E", "W", "I001", "YTT", "D", "PLC"]
+# select = ["ALL"]
+# E501 Line too long
+# D102 Missing docstring in public method
+# D100 Missing docstring in public module
+# E501 Line too long
+# D103 Missing docstring in public function
+# D101 Missing docstring in public class
+# `multi-line-summary-first-line` (D212)
+# `one-blank-line-before-class` (D203)
+extend-ignore = ["E501", "D100", "D101", "D102", "D103", "D212", "D203"]
+exclude = [".venv"]

app.py CHANGED Viewed

@@ -186,7 +186,7 @@ mpt-30b-chat.ggmlv0.q5_0.bin 	q5_0 	5 	20.60 GB 	23.10 GB
 mpt-30b-chat.ggmlv0.q5_1.bin 	q5_1 	5 	22.47 GB 	24.97 GB
 mpt-30b-chat.ggmlv0.q8_0.bin 	q8_0 	8 	31.83 GB 	34.33 GB
 """
-MODEL_FILENAME = "mpt-30b-chat.ggmlv0.q4_1.bin"
 MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin"  # 10.7G
 MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin"  # 11.9G
 DESTINATION_FOLDER = "models"
@@ -400,4 +400,10 @@ with gr.Blocks(
 # concurrency_count=5, max_size=20
 # max_size=36, concurrency_count=14
-block.queue(concurrency_count=5, max_size=20).launch(debug=True)

 mpt-30b-chat.ggmlv0.q5_1.bin 	q5_1 	5 	22.47 GB 	24.97 GB
 mpt-30b-chat.ggmlv0.q8_0.bin 	q8_0 	8 	31.83 GB 	34.33 GB
 """
+MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
 MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin"  # 10.7G
 MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin"  # 11.9G
 DESTINATION_FOLDER = "models"
 # concurrency_count=5, max_size=20
 # max_size=36, concurrency_count=14
+# concurrency_count > 1 requires more memory , max_size: queue size
+# T4 medium: 30GB, model size: ~6G concurrency_count = 3
+# leave one for api access
+# reduce to 3 if OOM
+block.queue(concurrency_count=1, max_size=5).launch(debug=True)