Ved Gupta commited on
Commit
8afc3f4
0 Parent(s):
Files changed (7) hide show
  1. .gitignore +3 -0
  2. Dockerfile +5 -0
  3. README.md +27 -0
  4. file.txt +14 -0
  5. models/gpt-3.5-turbo.yaml +30 -0
  6. models/luna-ai-llama2.tmpl +2 -0
  7. setup.sh +6 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.gguf
2
+ Modelfile
3
+ models/*.gguf
Dockerfile ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ FROM quay.io/go-skynet/local-ai:v2.7.0-ffmpeg-core
2
+
3
+ EXPOSE 8080
4
+
5
+ CMD ["phi-2"]
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: llm-api
3
+ emoji: ⚡
4
+ colorFrom: indigo
5
+ colorTo: yellow
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 8080
9
+ ---
10
+
11
+
12
+ ```bash
13
+ curl https://innovatorved-llm-api.hf.space/v1/models
14
+
15
+ curl https://innovatorved-llm-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
16
+ "model": "gpt-3.5-turbo",
17
+ "messages": [{"role": "user", "content": "How are you?"}],
18
+ "temperature": 0.9
19
+ }'
20
+
21
+ curl https://innovatorved-llm-api.hf.space/v1/chat/completions -H "Content-Type: application/json" -d '{
22
+ "model": "luna-ai-llama2",
23
+ "messages": [{"role": "user", "content": "How are you?"}],
24
+ "temperature": 0.9
25
+ }'
26
+
27
+ ```
file.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile
2
+ FROM quay.io/go-skynet/local-ai:latest
3
+
4
+ WORKDIR /app
5
+
6
+ RUN mkdir models
7
+
8
+ RUN wget -q "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
9
+ RUN wget -q "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_0.gguf" -O models/luna-ai-llama2
10
+ COPY models/* models/
11
+
12
+ EXPOSE 8080
13
+
14
+ CMD ["--models-path", "/app/models", "--context-size", "700", "--threads", "4"]
models/gpt-3.5-turbo.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: gpt-3.5-turbo
2
+ # Default model parameters
3
+ parameters:
4
+ # Relative to the models path
5
+ model: mistral-7b-instruct-v0.2.Q4_0.gguf
6
+ # temperature
7
+ temperature: 0.3
8
+ # all the OpenAI request options here..
9
+
10
+ # Default context size
11
+ context_size: 512
12
+ threads: 10
13
+
14
+ # Enable prompt caching
15
+ prompt_cache_path: "alpaca-cache"
16
+ prompt_cache_all: true
17
+
18
+ # stopwords (if supported by the backend)
19
+ stopwords:
20
+ - "HUMAN:"
21
+ - "### Response:"
22
+ # define chat roles
23
+ roles:
24
+ assistant: '### Response:'
25
+ system: '### System Instruction:'
26
+ user: '### Instruction:'
27
+ template:
28
+ # template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
29
+ completion: completion
30
+ chat: chat
models/luna-ai-llama2.tmpl ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {{.Input}}
2
+ ### Response:
setup.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ mkdir models
2
+
3
+ wget "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf" -O models/mistral-7b-instruct-v0.2.Q4_0.gguf
4
+
5
+ docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
6
+