zhangtao commited on
Commit
e7850f4
·
1 Parent(s): dc7222b

完成代码编写,尝试部署

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -0
  2. app.py +26 -0
  3. requirements.txt +2 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.10
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ COPY . .
13
+
14
+ RUN wget https://huggingface.co/TheBloke/NeuralHermes-2.5-Mistral-7B-GGUF/resolve/main/neuralhermes-2.5-mistral-7b.Q5_K_M.gguf?download=true -O neuralhermes-2.5-mistral-7b.Q5_K_M.gguf
15
+
16
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ llm = Llama(model_path="./neuralhermes-2.5-mistral-7b.Q5_K_M.gguf",
4
+ chat_format="chatml")
5
+
6
+
7
+ def chat_completion(messages, history, system_prompt):
8
+ messages = [{"role": "system", "content": system_prompt}]
9
+ messages.extend(history)
10
+ messages.append({"role": "user", "content": messages})
11
+ response = llm.create_chat_completion(
12
+ messages=messages,
13
+ stream=True
14
+ )
15
+ partial_message = ""
16
+ for chunk in response:
17
+ if len(chunk['choices'][0]['delta']) != 0:
18
+ partial_message = partial_message + \
19
+ chunk['choices'][0]['delta']['content']
20
+ yield partial_message
21
+
22
+
23
+ gr.ChatInterface(chat_completion,
24
+ additional_inputs=[gr.Textbox(
25
+ "You are helpful AI.", label="System Prompt")]
26
+ ).launch(server_name="0.0.0.0")
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ llama-cpp-python
2
+ gradio