Spaces:
Sleeping
Sleeping
Ved Gupta
commited on
Commit
•
8afc3f4
0
Parent(s):
initial
Browse files- .gitignore +3 -0
- Dockerfile +5 -0
- README.md +27 -0
- file.txt +14 -0
- models/gpt-3.5-turbo.yaml +30 -0
- models/luna-ai-llama2.tmpl +2 -0
- setup.sh +6 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
*.gguf
|
2 |
+
Modelfile
|
3 |
+
models/*.gguf
|
Dockerfile
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM quay.io/go-skynet/local-ai:v2.7.0-ffmpeg-core
|
2 |
+
|
3 |
+
EXPOSE 8080
|
4 |
+
|
5 |
+
CMD ["phi-2"]
|
README.md
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: llm-api
|
3 |
+
emoji: ⚡
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
app_port: 8080
|
9 |
+
---
|
10 |
+
|
11 |
+
|
12 |
+
```bash
|
13 |
+
curl https://innovatorved-llm-api.hf.space/v1/models
|
14 |
+
|
15 |
+
curl https://innovatorved-llm-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
|
16 |
+
"model": "gpt-3.5-turbo",
|
17 |
+
"messages": [{"role": "user", "content": "How are you?"}],
|
18 |
+
"temperature": 0.9
|
19 |
+
}'
|
20 |
+
|
21 |
+
curl https://innovatorved-llm-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
|
22 |
+
"model": "luna-ai-llama2",
|
23 |
+
"messages": [{"role": "user", "content": "How are you?"}],
|
24 |
+
"temperature": 0.9
|
25 |
+
}'
|
26 |
+
|
27 |
+
```
|
file.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Dockerfile
|
2 |
+
FROM quay.io/go-skynet/local-ai:latest
|
3 |
+
|
4 |
+
WORKDIR /app
|
5 |
+
|
6 |
+
RUN mkdir models
|
7 |
+
|
8 |
+
RUN wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
|
9 |
+
RUN wget -q "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_0.gguf" -O models/luna-ai-llama2
|
10 |
+
COPY models/* models/
|
11 |
+
|
12 |
+
EXPOSE 8080
|
13 |
+
|
14 |
+
CMD ["--models-path", "/app/models", "--context-size", "700", "--threads", "4"]
|
models/gpt-3.5-turbo.yaml
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: gpt-3.5-turbo
|
2 |
+
# Default model parameters
|
3 |
+
parameters:
|
4 |
+
# Relative to the models path
|
5 |
+
model: mistral-7b-instruct-v0.2.Q4_0.gguf
|
6 |
+
# temperature
|
7 |
+
temperature: 0.3
|
8 |
+
# all the OpenAI request options here..
|
9 |
+
|
10 |
+
# Default context size
|
11 |
+
context_size: 512
|
12 |
+
threads: 10
|
13 |
+
|
14 |
+
# Enable prompt caching
|
15 |
+
prompt_cache_path: "alpaca-cache"
|
16 |
+
prompt_cache_all: true
|
17 |
+
|
18 |
+
# stopwords (if supported by the backend)
|
19 |
+
stopwords:
|
20 |
+
- "HUMAN:"
|
21 |
+
- "### Response:"
|
22 |
+
# define chat roles
|
23 |
+
roles:
|
24 |
+
assistant: '### Response:'
|
25 |
+
system: '### System Instruction:'
|
26 |
+
user: '### Instruction:'
|
27 |
+
template:
|
28 |
+
# template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
|
29 |
+
completion: completion
|
30 |
+
chat: chat
|
models/luna-ai-llama2.tmpl
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
{{.Input}}
|
2 |
+
### Response:
|
setup.sh
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mkdir models
|
2 |
+
|
3 |
+
wget "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
|
4 |
+
|
5 |
+
docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
|
6 |
+
|