SMeyersMrOvkill commited on
Commit
9cf6b89
1 Parent(s): f2fbba5
Files changed (2) hide show
  1. app.py +5 -3
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
9
 
10
  def respond(
11
  message,
@@ -14,6 +14,7 @@ def respond(
14
  max_tokens,
15
  temperature,
16
  top_p,
 
17
  ):
18
  messages = [{"role": "system", "content": system_message}]
19
 
@@ -27,12 +28,13 @@ def respond(
27
 
28
  response = ""
29
 
30
- for message in client.chat_completion(
31
  messages,
32
  max_tokens=max_tokens,
33
  stream=True,
34
  temperature=temperature,
35
  top_p=top_p,
 
36
  ):
37
  token = message.choices[0].delta.content
38
 
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
 
7
 
8
+ llm = Llama.from_pretrained("bartowski/starcoder2-15b-instruct-v0.1-GGUF", filename="starcoder2-15b-instruct-v0.1-Q6_K.gguf", n_gpu_layers=99, n_ctx=16384)
9
 
10
  def respond(
11
  message,
 
14
  max_tokens,
15
  temperature,
16
  top_p,
17
+ top_k
18
  ):
19
  messages = [{"role": "system", "content": system_message}]
20
 
 
28
 
29
  response = ""
30
 
31
+ for message in llm.create_chat_completion(
32
  messages,
33
  max_tokens=max_tokens,
34
  stream=True,
35
  temperature=temperature,
36
  top_p=top_p,
37
+ top_k=42,
38
  ):
39
  token = message.choices[0].delta.content
40
 
requirements.txt CHANGED
@@ -1 +1,2 @@
1
- huggingface_hub==0.22.2
 
 
1
+ huggingface_hub==0.22.2
2
+ llama-cpp-python