lalanikarim commited on
Commit
e67a4d6
1 Parent(s): 1e62eb5

added documentation for GPU and Mac support

Browse files
Files changed (1) hide show
  1. main.py +7 -1
main.py CHANGED
@@ -38,19 +38,25 @@ def create_chain(system_prompt):
38
  # callback_manager = CallbackManager([stream_handler])
39
 
40
  (repo_id, model_file_name) = ("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
41
- "mistral-7b-instruct-v0.1.Q5_0.gguf")
42
 
43
  model_path = hf_hub_download(repo_id=repo_id,
44
  filename=model_file_name,
45
  repo_type="model")
46
 
47
  # initialize LlamaCpp llm model
 
 
 
48
  llm = LlamaCpp(
49
  model_path=model_path,
50
  temperature=0,
51
  max_tokens=512,
52
  top_p=1,
53
  # callback_manager=callback_manager,
 
 
 
54
  verbose=False,
55
  streaming=True,
56
  stop=["Human:"]
 
38
  # callback_manager = CallbackManager([stream_handler])
39
 
40
  (repo_id, model_file_name) = ("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
41
+ "mistral-7b-instruct-v0.1.Q4_0.gguf")
42
 
43
  model_path = hf_hub_download(repo_id=repo_id,
44
  filename=model_file_name,
45
  repo_type="model")
46
 
47
  # initialize LlamaCpp llm model
48
+ # n_gpu_layers, n_batch, and n_ctx are for GPU support.
49
+ # When not set, CPU will be used.
50
+ # set 1 for mac m2, and higher numbers based on your GPU support
51
  llm = LlamaCpp(
52
  model_path=model_path,
53
  temperature=0,
54
  max_tokens=512,
55
  top_p=1,
56
  # callback_manager=callback_manager,
57
+ # n_gpu_layers=1,
58
+ # n_batch=512,
59
+ # n_ctx=4096,
60
  verbose=False,
61
  streaming=True,
62
  stop=["Human:"]