Tijmen2 commited on
Commit
eaf8443
·
verified ·
1 Parent(s): 3a2a3f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -4,19 +4,23 @@ from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
  import random
6
 
7
- # Initialize model
8
- model_path = hf_hub_download(
9
- repo_id="AstroMLab/AstroSage-8B-GGUF",
10
- filename="AstroSage-8B-Q8_0.gguf"
11
- )
 
 
 
 
 
 
 
 
 
 
12
 
13
- llm = Llama(
14
- model_path=model_path,
15
- n_ctx=2048,
16
- chat_format="llama-3",
17
- n_gpu_layers=-1, # ensure all layers are on GPU
18
- flash_attn=True,
19
- )
20
 
21
  # Placeholder responses for when context is empty
22
  GREETING_MESSAGES = [
 
4
  from huggingface_hub import hf_hub_download
5
  import random
6
 
7
+ @spaces.GPU
8
+ def initialize_model():
9
+ model_path = hf_hub_download(
10
+ repo_id="AstroMLab/AstroSage-8B-GGUF",
11
+ filename="AstroSage-8B-Q8_0.gguf"
12
+ )
13
+
14
+ llm = Llama(
15
+ model_path=model_path,
16
+ n_ctx=2048,
17
+ chat_format="llama-3",
18
+ n_gpu_layers=-1, # ensure all layers are on GPU
19
+ flash_attn=True,
20
+ )
21
+ return llm
22
 
23
+ llm = initialize_model()
 
 
 
 
 
 
24
 
25
  # Placeholder responses for when context is empty
26
  GREETING_MESSAGES = [