Orion-zhen commited on
Commit
6a06ad0
·
verified ·
1 Parent(s): 5443589

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -0
app.py CHANGED
@@ -1,11 +1,35 @@
 
 
 
1
  import gradio as gr
 
 
 
 
 
2
  from huggingface_hub import InferenceClient
 
3
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
 
 
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
9
 
10
  def respond(
11
  message,
 
1
+ import json
2
+ import spaces
3
+ import subprocess
4
  import gradio as gr
5
+ from llama_cpp import Llama
6
+ from llama_cpp_agent.chat_history.messages import Roles
7
+ from llama_cpp_agent.chat_history import BasicChatHistory
8
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
9
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
10
  from huggingface_hub import InferenceClient
11
+ from huggingface_hub import hf_hub_download
12
 
13
  """
14
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
15
  """
16
+ SYSTEM = "You are a helpful math assistant. You should always provide your answer in Chinese."
17
+
18
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
19
+ hf_hub_download(
20
+ repo_id="bartowski/Qwen2.5-Math-7B-Instruct-GGUF",
21
+ filename="Qwen2.5-Math-7B-Instruct-Q8_0.gguf",
22
+ local_dir="./models"
23
+ )
24
+
25
+ llm = Llama(
26
+ model_path="models/Qwen2.5-Math-7B-Instruct-Q8_0.gguf",
27
+ flash_attn=True,
28
+ n_ctx=8192,
29
+ n_batch=1024
30
+ )
31
 
32
+ provider = LlamaCppPythonProvider(llm)
33
 
34
  def respond(
35
  message,