ffreemt commited on
Commit
2327177
1 Parent(s): 7e27981

Update threads=psutil.cpu_count(logical=False)

Browse files
Files changed (2) hide show
  1. app.py +19 -36
  2. requirements.txt +2 -1
app.py CHANGED
@@ -5,10 +5,11 @@ import os
5
  import time
6
  from dataclasses import asdict, dataclass
7
  from pathlib import Path
8
- from urllib.parse import urlparse
9
  from types import SimpleNamespace
 
10
 
11
  import gradio as gr
 
12
  from about_time import about_time
13
 
14
  # from ctransformers import AutoConfig, AutoModelForCausalLM
@@ -16,6 +17,12 @@ from ctransformers import AutoModelForCausalLM
16
  from huggingface_hub import hf_hub_download
17
  from loguru import logger
18
 
 
 
 
 
 
 
19
  os.environ["TZ"] = "Asia/Shanghai"
20
  try:
21
  time.tzset() # type: ignore # pylint: disable=no-member
@@ -230,36 +237,6 @@ def generate(
230
  **asdict(generation_config),
231
  )
232
 
233
- _ = '''
234
- _ = """full url: https://huggingface.co/TheBloke/mpt-30B-chat-GGML/blob/main/mpt-30b-chat.ggmlv0.q4_1.bin"""
235
-
236
- # https://huggingface.co/TheBloke/mpt-30B-chat-GGML
237
- _ = """
238
- mpt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
239
- mpt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
240
- mpt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
241
- mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
242
- mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
243
- """
244
- MODEL_FILENAME = "mpt-30b-chat.ggmlv0.q4_1.bin"
245
- MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
246
- MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
247
- MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
248
-
249
- # https://huggingface.co/TheBloke/WizardLM-13B-V1.0-Uncensored-GGML
250
- MODEL_FILENAME = "wizardlm-13b-v1.0-uncensored.ggmlv3.q4_1.bin" # 8.4G
251
- # '''
252
-
253
- URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"
254
- MODEL_FILENAME = Path(URL).name
255
- REPO_ID = "/".join(urlparse(url).path.strip('/').split('/')[:2]) # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
256
-
257
- DESTINATION_FOLDER = "models"
258
-
259
- logger.info(f"start dl, {REPO_ID=}, {MODEL_FILENAME=}, {DESTINATION_FOLDER=}")
260
- download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
261
- logger.info("done dl")
262
-
263
  # if "mpt" in model_filename:
264
  # config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
265
  # llm = AutoModelForCausalLM.from_pretrained(
@@ -278,7 +255,16 @@ llm = AutoModelForCausalLM.from_pretrained(
278
  )
279
  # """
280
 
281
- logger.debug(f"{os.cpu_count()=}")
 
 
 
 
 
 
 
 
 
282
  logger.info("load llm")
283
 
284
  _ = Path("models", MODEL_FILENAME).absolute().as_posix()
@@ -288,14 +274,11 @@ LLM = AutoModelForCausalLM.from_pretrained(
288
  REPO_ID, # DESTINATION_FOLDER, # model_path_or_repo_id: str required
289
  model_file=_,
290
  model_type="llama", # "starcoder", AutoConfig.from_pretrained("TheBloke/WizardLM-13B-V1.0-Uncensored-GGML")
291
- threads=os.cpu_count() // 2, # type: ignore
292
  )
293
 
294
  logger.info("done load llm")
295
 
296
- cpu_count = os.cpu_count() // 2 # type: ignore
297
- logger.debug(f"{cpu_count=}")
298
-
299
  GENERATION_CONFIG = GenerationConfig(
300
  temperature=0.2,
301
  top_k=0,
 
5
  import time
6
  from dataclasses import asdict, dataclass
7
  from pathlib import Path
 
8
  from types import SimpleNamespace
9
+ from urllib.parse import urlparse
10
 
11
  import gradio as gr
12
+ import psutil
13
  from about_time import about_time
14
 
15
  # from ctransformers import AutoConfig, AutoModelForCausalLM
 
17
  from huggingface_hub import hf_hub_download
18
  from loguru import logger
19
 
20
+ URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"
21
+ MODEL_FILENAME = Path(URL).name
22
+ REPO_ID = "/".join(urlparse(URL).path.strip('/').split('/')[:2]) # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
23
+
24
+ DESTINATION_FOLDER = "models"
25
+
26
  os.environ["TZ"] = "Asia/Shanghai"
27
  try:
28
  time.tzset() # type: ignore # pylint: disable=no-member
 
237
  **asdict(generation_config),
238
  )
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  # if "mpt" in model_filename:
241
  # config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
242
  # llm = AutoModelForCausalLM.from_pretrained(
 
255
  )
256
  # """
257
 
258
+ logger.info(f"start dl, {REPO_ID=}, {MODEL_FILENAME=}, {DESTINATION_FOLDER=}")
259
+ download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
260
+ logger.info("done dl")
261
+
262
+ logger.debug(f"{os.cpu_count()=} {psutil.cpu_count(logical=False)=}")
263
+ cpu_count = os.cpu_count() // 2 # type: ignore
264
+ cpu_count = psutil.cpu_count(logical=False)
265
+
266
+ logger.debug(f"{cpu_count=}")
267
+
268
  logger.info("load llm")
269
 
270
  _ = Path("models", MODEL_FILENAME).absolute().as_posix()
 
274
  REPO_ID, # DESTINATION_FOLDER, # model_path_or_repo_id: str required
275
  model_file=_,
276
  model_type="llama", # "starcoder", AutoConfig.from_pretrained("TheBloke/WizardLM-13B-V1.0-Uncensored-GGML")
277
+ threads=cpu_count,
278
  )
279
 
280
  logger.info("done load llm")
281
 
 
 
 
282
  GENERATION_CONFIG = GenerationConfig(
283
  temperature=0.2,
284
  top_k=0,
requirements.txt CHANGED
@@ -3,4 +3,5 @@ transformers==4.30.2
3
  huggingface_hub
4
  gradio
5
  loguru
6
- about-time
 
 
3
  huggingface_hub
4
  gradio
5
  loguru
6
+ about-time
7
+ psutil