imenayadi commited on
Commit
d1cacb1
1 Parent(s): c23a4d2

update llama model

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. llama2_response_mail_generator.py +10 -0
app.py CHANGED
@@ -72,14 +72,14 @@ model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
72
  model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format
73
 
74
  # Download the model file
 
75
  model_path_llama = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, force_download=True)
76
-
77
  # Initialize the Llama model with appropriate settings for GPU
78
  lcpp_llm = Llama(
79
  model_path=model_path_llama,
80
  n_threads=2, # CPU cores to use
81
  n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
82
- n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM
83
  )
84
 
85
  def generate_email_response(email_prompt):
 
72
  model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format
73
 
74
  # Download the model file
75
+ print('downloading llama model...')
76
  model_path_llama = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, force_download=True)
77
+ print('finished download...')
78
  # Initialize the Llama model with appropriate settings for GPU
79
  lcpp_llm = Llama(
80
  model_path=model_path_llama,
81
  n_threads=2, # CPU cores to use
82
  n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
 
83
  )
84
 
85
  def generate_email_response(email_prompt):
llama2_response_mail_generator.py CHANGED
@@ -2,6 +2,16 @@ from huggingface_hub import hf_hub_download
2
 
3
  from llama_cpp import Llama
4
 
 
 
 
 
 
 
 
 
 
 
5
  def generate_email_response(email_prompt):
6
  # Check input received by the function
7
  print("Received prompt:", email_prompt)
 
2
 
3
  from llama_cpp import Llama
4
 
5
+
6
+
7
+ # Initialize the Llama model with appropriate settings for GPU
8
+ lcpp_llm = Llama(
9
+ model_path=model_path,
10
+ n_threads=2, # CPU cores to use
11
+ n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
12
+ n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM
13
+ )
14
+
15
  def generate_email_response(email_prompt):
16
  # Check input received by the function
17
  print("Received prompt:", email_prompt)