ffreemt commited on
Commit
3dc1d68
1 Parent(s): 4e79398

Update run 7b when oktoto golay kaggle or cpu_count <=8

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +38 -35
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: llama-2-13b-ggml
3
  emoji: 🚀
4
  colorFrom: green
5
  colorTo: green
 
1
  ---
2
+ title: llama-2-7b-or-13b-ggml
3
  emoji: 🚀
4
  colorFrom: green
5
  colorTo: green
app.py CHANGED
@@ -11,11 +11,7 @@ from types import SimpleNamespace
11
  import gradio as gr
12
  import psutil
13
  from about_time import about_time
14
-
15
- # from ctransformers import AutoConfig, AutoModelForCausalLM
16
  from ctransformers import AutoModelForCausalLM
17
-
18
- # from huggingface_hub import hf_hub_download
19
  from dl_hf_model import dl_hf_model
20
  from loguru import logger
21
 
@@ -44,9 +40,21 @@ url = "https://huggingface.co/TheBloke/Llama-2-13B-GGML/blob/main/llama-2-13b.gg
44
  url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin" # 6.93G
45
  # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.binhttps://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin" # 7.87G
46
 
47
- url = "https://huggingface.co/localmodels/Llama-2-13B-Chat-ggml/blob/main/llama-2-13b-chat.ggmlv3.q4_K_S.bin" # 7.37G
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- prompt_template="""Below is an instruction that describes a task. Write a response that appropriately completes the request.
50
 
51
  ### Instruction: {user_prompt}
52
 
@@ -67,46 +75,37 @@ information.
67
  User: {prompt}
68
  Assistant: """
69
 
 
 
 
 
70
  prompt_template = """Question: {question}
71
  Answer: Let's work this out in a step by step way to be sure we have the right answer."""
72
 
73
  _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
74
  stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
75
 
76
- try:
77
- model_loc, file_size = dl_hf_model(url)
78
- except Exception as exc_:
79
- logger.error(exc_)
80
- raise SystemExit(1) from exc_
81
-
82
- logger.debug(f"{model_loc} {file_size}GB")
83
-
84
  logger.debug(f"{stop_string=}")
85
 
86
  _ = psutil.cpu_count(logical=False)
87
  cpu_count: int = int(_) if _ else 1
88
  logger.debug(f"{cpu_count=}")
89
 
90
- logger.info("load llm")
91
- _ = Path(model_loc).absolute().as_posix()
92
- logger.debug(f"model_file: {_}, exists: {Path(_).exists()}")
93
  LLM = None
94
 
95
- if "okteto" in platform.node():
96
- # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
97
- LLM = AutoModelForCausalLM.from_pretrained(
98
- "models/llama-2-13b-chat.ggmlv3.q2_K.bin",
99
- model_type="llama",
100
- threads=cpu_count,
101
- )
102
- else:
103
- LLM = AutoModelForCausalLM.from_pretrained(
104
- model_loc,
105
- model_type="llama",
106
- threads=cpu_count,
107
- )
108
 
109
- logger.info("done load llm")
110
 
111
  os.environ["TZ"] = "Asia/Shanghai"
112
  try:
@@ -306,9 +305,13 @@ css = """
306
  """
307
  etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
308
  examples = [
309
- ["Question: What NFL team won the Super Bowl in the year Justin Bieber was born?\n Answer: Let's work this out in a step by step way to be sure we have the right answer."],
 
 
310
  ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
311
- ["What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."],
 
 
312
  ["How to pick a lock? Provide detailed steps."],
313
  ["Explain the plot of Cinderella in a sentence."],
314
  [
@@ -372,7 +375,7 @@ with gr.Blocks(
372
  placeholder="Ask me anything (press Enter or click Submit to send)",
373
  show_label=False,
374
  container=False,
375
- # ).style(container=False)
376
  )
377
  with gr.Column(scale=1, min_width=50):
378
  with gr.Row():
@@ -388,7 +391,7 @@ with gr.Blocks(
388
  value=prompt_template,
389
  show_label=False,
390
  container=False,
391
- # ).style(container=False)
392
  )
393
  with gr.Column():
394
  with gr.Row():
 
11
  import gradio as gr
12
  import psutil
13
  from about_time import about_time
 
 
14
  from ctransformers import AutoModelForCausalLM
 
 
15
  from dl_hf_model import dl_hf_model
16
  from loguru import logger
17
 
 
40
  url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin" # 6.93G
41
  # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.binhttps://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin" # 7.87G
42
 
43
+ url = "https://huggingface.co/localmodels/Llama-2-13B-Chat-ggml/blob/main/llama-2-13b-chat.ggmlv3.q4_K_S.bin" # 7.37G
44
+
45
+ _ = (
46
+ "golay" in platform.node()
47
+ or "okteto" in platform.node()
48
+ or Path("/kaggle").exists()
49
+ or psutil.cpu_count(logical=False) <= 8
50
+ )
51
+
52
+ if _:
53
+ # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
54
+ url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin" # 2.87G
55
+
56
 
57
+ prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
58
 
59
  ### Instruction: {user_prompt}
60
 
 
75
  User: {prompt}
76
  Assistant: """
77
 
78
+ prompt_template = """System: You are a helpful assistant.
79
+ User: {prompt}
80
+ Assistant: """
81
+
82
  prompt_template = """Question: {question}
83
  Answer: Let's work this out in a step by step way to be sure we have the right answer."""
84
 
85
  _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
86
  stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
87
 
 
 
 
 
 
 
 
 
88
  logger.debug(f"{stop_string=}")
89
 
90
  _ = psutil.cpu_count(logical=False)
91
  cpu_count: int = int(_) if _ else 1
92
  logger.debug(f"{cpu_count=}")
93
 
 
 
 
94
  LLM = None
95
 
96
+ try:
97
+ model_loc, file_size = dl_hf_model(url)
98
+ except Exception as exc:
99
+ logger.erorr(exc)
100
+ raise SystemExit(1) from exc
101
+
102
+ LLM = AutoModelForCausalLM.from_pretrained(
103
+ model_loc,
104
+ model_type="llama",
105
+ threads=cpu_count,
106
+ )
 
 
107
 
108
+ logger.info(f"done load llm {model_loc=} {file_size=}G")
109
 
110
  os.environ["TZ"] = "Asia/Shanghai"
111
  try:
 
305
  """
306
  etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
307
  examples = [
308
+ [
309
+ "Question: What NFL team won the Super Bowl in the year Justin Bieber was born?\n Answer: Let's work this out in a step by step way to be sure we have the right answer."
310
+ ],
311
  ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
312
+ [
313
+ "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
314
+ ],
315
  ["How to pick a lock? Provide detailed steps."],
316
  ["Explain the plot of Cinderella in a sentence."],
317
  [
 
375
  placeholder="Ask me anything (press Enter or click Submit to send)",
376
  show_label=False,
377
  container=False,
378
+ # ).style(container=False)
379
  )
380
  with gr.Column(scale=1, min_width=50):
381
  with gr.Row():
 
391
  value=prompt_template,
392
  show_label=False,
393
  container=False,
394
+ # ).style(container=False)
395
  )
396
  with gr.Column():
397
  with gr.Row():