Tijmen2 commited on
Commit
034153d
·
verified ·
1 Parent(s): 7efaceb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -24,7 +24,7 @@ model = AutoModelForCausalLM.from_pretrained(
24
  device_map="auto",
25
  use_safetensors=True,
26
  trust_remote_code=True,
27
- load_in_8bit=True,
28
  torch_dtype=torch.bfloat16
29
  )
30
 
@@ -36,11 +36,13 @@ GREETING_MESSAGES = [
36
  "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
37
  ]
38
 
 
39
  def format_message(role: str, content: str) -> str:
40
  """Format a single message according to Llama-3 chat template."""
41
  return f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
42
 
43
- def generate_text(prompt: str, history: list, max_new_tokens=512, temperature=0.7, top_p=0.95, top_k=50):
 
44
  """
45
  Generate a response using the transformer model with proper Llama-3 chat formatting.
46
  """
@@ -81,9 +83,7 @@ def generate_text(prompt: str, history: list, max_new_tokens=512, temperature=0.
81
  max_new_tokens=max_new_tokens,
82
  do_sample=True,
83
  top_p=top_p,
84
- top_k=top_k,
85
  temperature=temperature,
86
- num_beams=1,
87
  )
88
 
89
  # Generate the response in a separate thread for streaming
 
24
  device_map="auto",
25
  use_safetensors=True,
26
  trust_remote_code=True,
27
+ load_in_4bit=True,
28
  torch_dtype=torch.bfloat16
29
  )
30
 
 
36
  "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
37
  ]
38
 
39
+
40
  def format_message(role: str, content: str) -> str:
41
  """Format a single message according to Llama-3 chat template."""
42
  return f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
43
 
44
+
45
+ def generate_text(prompt: str, history: list, max_new_tokens=512, temperature=0.7, top_p=0.95):
46
  """
47
  Generate a response using the transformer model with proper Llama-3 chat formatting.
48
  """
 
83
  max_new_tokens=max_new_tokens,
84
  do_sample=True,
85
  top_p=top_p,
 
86
  temperature=temperature,
 
87
  )
88
 
89
  # Generate the response in a separate thread for streaming