macadeliccc commited on
Commit
141c0b0
โ€ข
1 Parent(s): e4c7a8f
Files changed (3) hide show
  1. Dockerfile +24 -0
  2. app.py +52 -0
  3. requirements.txt +2 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10 as base
2
+
3
+ # Set model
4
+ ENV MODEL=macadeliccc/piccolo-2x7b-GGUF
5
+ ENV QUANT=q4_k_m
6
+ ENV CHAT_TEMPLATE=chatml
7
+
8
+ # Set the working directory
9
+ WORKDIR /app
10
+
11
+ # Install Python requirements
12
+ COPY ./requirements.txt /app/
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ # Download model
16
+ RUN MODEL_NAME_FILE=$(echo ${MODEL#*/} | tr '[:upper:]' '[:lower:]' | sed 's/-gguf$//') && \
17
+ wget https://huggingface.co/${MODEL}/resolve/main/${MODEL_NAME_FILE}.${QUANT}.gguf -O model.gguf
18
+
19
+ # Copy the rest of your application
20
+ COPY . .
21
+
22
+ # Command to run the application
23
+ CMD ["python", "app.py"]
24
+
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import gradio as gr
4
+ from llama_cpp import Llama
5
+
6
+ # Get environment variables
7
+ model_id = os.getenv('MODEL')
8
+ quant = os.getenv('QUANT')
9
+ chat_template = os.getenv('CHAT_TEMPLATE')
10
+
11
+ # Interface variables
12
+ model_name = model_id.split('/')[1].split('-GGUF')[0]
13
+ title = f"๐Ÿ”ฎ {model_name}"
14
+ description = f"Chat with <a href=\"https://huggingface.co/{model_id}\">{model_name}</a> in GGUF format ({quant})!"
15
+
16
+ # Initialize the LLM
17
+ llm = Llama(model_path="model.gguf",
18
+ n_ctx=4096,
19
+ n_threads=2,
20
+ chat_format=chat_template)
21
+
22
+ # Function for streaming chat completions
23
+ def chat_stream_completion(message, history, system_prompt):
24
+ messages_prompts = [{"role": "system", "content": system_prompt}]
25
+ for human, assistant in history:
26
+ messages_prompts.append({"role": "user", "content": human})
27
+ messages_prompts.append({"role": "assistant", "content": assistant})
28
+ messages_prompts.append({"role": "user", "content": message})
29
+
30
+ response = llm.create_chat_completion(
31
+ messages=messages_prompts,
32
+ stream=True
33
+ )
34
+ message_repl = ""
35
+ for chunk in response:
36
+ if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
37
+ message_repl = message_repl + chunk['choices'][0]["delta"]["content"]
38
+ yield message_repl
39
+
40
+ # Gradio chat interface
41
+ gr.ChatInterface(
42
+ fn=chat_stream_completion,
43
+ title=title,
44
+ description=description,
45
+ additional_inputs=[gr.Textbox("You are helpful assistant.")],
46
+ additional_inputs_accordion="๐Ÿ“ System prompt",
47
+ examples=[
48
+ ["What is a Large Language Model?"],
49
+ ["What's 9+2-1?"],
50
+ ["Write Python code to print the Fibonacci sequence"]
51
+ ]
52
+ ).queue().launch(server_name="0.0.0.0")
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ llama-cpp-python
2
+ gradio