GGUF
Inference Endpoints
vdpappu commited on
Commit
e217c4e
·
verified ·
1 Parent(s): 838cd74

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +43 -3
README.md CHANGED
@@ -1,3 +1,43 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - iamtarun/python_code_instructions_18k_alpaca
5
+ ---
6
+ **Usage**
7
+ ```
8
+ from llama_cpp import Llama
9
+ from typing import Optional
10
+ import time
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ def generate_prompt(input_text: str, instruction: Optional[str] = None) -> str:
14
+ text = f"### Question: {input_text}\n\n### Answer: "
15
+ if instruction:
16
+ text = f"### Instruction: {instruction}\n\n{text}"
17
+ return text
18
+
19
+ # Set up the parameters
20
+ repo_id = "vdpappu/gemma2_coding_assistant_gguf"
21
+ filename = "gemma2_coding.gguf"
22
+ local_dir = "."
23
+
24
+ downloaded_file_path = hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir)
25
+ print(f"File downloaded to: {downloaded_file_path}")
26
+
27
+ # Load the model
28
+ llm = Llama(model_path=downloaded_file_path) #1 is thug
29
+ question = "Develop a Python program to clearly understand the concept of recursion."
30
+ prompt = generate_prompt(input_text=question)
31
+
32
+ start = time.time()
33
+ output = llm(prompt,
34
+ temperature=0.7,
35
+ top_p=0.9,
36
+ top_k=50,
37
+ repeat_penalty=1.5,
38
+ max_tokens=200,
39
+ stop=["Question:","<eos>"])
40
+ end = time.time()
41
+ print(f"Inference time: {end-start:.2f} seconds \n")
42
+ print(output['choices'][0]['text'])
43
+ ```