O S I H commited on
Commit
5f9470d
1 Parent(s): b830a37
Files changed (4) hide show
  1. Dockerfile +24 -0
  2. README.md +1 -1
  3. app.py +58 -0
  4. requirements.txt +2 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an alias for the base image for easier updates
2
+ FROM python:3.10 as base
3
+
4
+ # Set model
5
+ ENV MODEL=seyf1elislam/WestKunai-Hermes-7b
6
+ ENV QUANT=Q4_K_M
7
+ ENV CHAT_TEMPLATE=chatml
8
+
9
+ # Set the working directory
10
+ WORKDIR /app
11
+
12
+ # Install Python requirements
13
+ COPY ./requirements.txt /app/
14
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
15
+
16
+ # Download model
17
+ RUN MODEL_NAME_FILE=$(echo ${MODEL#*/} | tr '[:upper:]' '[:lower:]' | sed 's/-gguf$//') && \
18
+ wget https://huggingface.co/${MODEL}/resolve/main/${MODEL_NAME_FILE}.${QUANT}.gguf -O model.gguf
19
+
20
+ # Copy the rest of your application
21
+ COPY . .
22
+
23
+ # Command to run the application
24
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: WestKunai Hermes 7b
3
- emoji: 🚀
4
  colorFrom: pink
5
  colorTo: yellow
6
  sdk: docker
 
1
  ---
2
  title: WestKunai Hermes 7b
3
+ emoji: ⚔︎
4
  colorFrom: pink
5
  colorTo: yellow
6
  sdk: docker
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import gradio as gr
4
+ from llama_cpp import Llama
5
+
6
+ # Get environment variables
7
+ model_id = os.getenv('MODEL')
8
+ quant = os.getenv('QUANT')
9
+ chat_template = os.getenv('CHAT_TEMPLATE')
10
+
11
+ # Interface variables
12
+ model_name = model_id.split('/')[1].split('-GGUF')[0]
13
+ title = f"{model_name}"
14
+ description = f"Chat with <a href=\"https://huggingface.co/{model_id}\">{model_name}</a> in GGUF format ({quant})!"
15
+
16
+ # Initialize the LLM
17
+ llm = Llama(model_path="model.gguf",
18
+ n_ctx=32768,
19
+ n_threads=2,
20
+ n_vocab=32002,
21
+ n_gpu_layers=-1,
22
+ chat_format=chat_template)
23
+
24
+ # Function for streaming chat completions
25
+ def chat_stream_completion(message, history, system_prompt):
26
+ messages_prompts = [{"role": "system", "content": system_prompt}]
27
+ for human, assistant in history:
28
+ messages_prompts.append({"role": "user", "content": human})
29
+ messages_prompts.append({"role": "assistant", "content": assistant})
30
+ messages_prompts.append({"role": "user", "content": message})
31
+
32
+ response = llm.create_chat_completion(
33
+ messages=messages_prompts,
34
+ stream=True
35
+ )
36
+ message_repl = ""
37
+ for chunk in response:
38
+ if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
39
+ message_repl = message_repl + chunk['choices'][0]["delta"]["content"]
40
+ yield message_repl
41
+
42
+ # Gradio chat interface
43
+ gr.ChatInterface(
44
+ fn=chat_stream_completion,
45
+ title=title,
46
+ description=description,
47
+ additional_inputs=[gr.Textbox("You are helpful assistant.")],
48
+ additional_inputs_accordion="📝 System prompt",
49
+ examples=[
50
+ ['Can you solve the equation 2x + 3 = 11 for x?'],
51
+ ['Write an epic poem about Ancient Rome.'],
52
+ ['Who was the first person to walk on the Moon?'],
53
+ ['Use a list comprehension to create a list of squares for numbers from 1 to 10.'],
54
+ ['Recommend some popular science fiction books.'],
55
+ ['Can you write a short story about a time-traveling detective?']
56
+ ],
57
+ theme = gr.themes.Base()
58
+ ).queue().launch(server_name="0.0.0.0")
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ llama-cpp-python
2
+ gradio