Spaces:
Sleeping
Sleeping
robinroy03
commited on
Commit
•
17aa59f
1
Parent(s):
03d9166
migrated to groq -- suuuuuper fast!
Browse files- .gitignore +2 -1
- Dockerfile +23 -6
- main.py +25 -10
- requirements.txt +4 -0
.gitignore
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
__pycache__
|
2 |
venv
|
3 |
.vscode
|
|
|
4 |
# script for some housekeeping
|
5 |
-
f.py
|
|
|
1 |
__pycache__
|
2 |
venv
|
3 |
.vscode
|
4 |
+
.env
|
5 |
# script for some housekeeping
|
6 |
+
f.py
|
Dockerfile
CHANGED
@@ -1,9 +1,26 @@
|
|
1 |
-
FROM ollama/ollama
|
2 |
|
3 |
-
RUN mkdir -p /.ollama && chmod 777 /.ollama
|
4 |
|
5 |
-
ENV OLLAMA_MAX_LOADED_MODELS=20 --OLLAMA_NUM_PARALLEL=20
|
6 |
-
ENV OLLAMA_HOST "0.0.0.0:7860"
|
7 |
|
8 |
-
CMD ["serve"]
|
9 |
-
EXPOSE 7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# FROM ollama/ollama
|
2 |
|
3 |
+
# RUN mkdir -p /.ollama && chmod 777 /.ollama
|
4 |
|
5 |
+
# ENV OLLAMA_MAX_LOADED_MODELS=20 --OLLAMA_NUM_PARALLEL=20
|
6 |
+
# ENV OLLAMA_HOST "0.0.0.0:7860"
|
7 |
|
8 |
+
# CMD ["serve"]
|
9 |
+
# EXPOSE 7860
|
10 |
+
|
11 |
+
FROM python:3
|
12 |
+
|
13 |
+
RUN useradd -m -u 1000 user
|
14 |
+
USER user
|
15 |
+
ENV HOME=/home/user \
|
16 |
+
PATH=/home/user/.local/bin:$PATH
|
17 |
+
|
18 |
+
COPY --chown=user . $HOME/LLM_API
|
19 |
+
|
20 |
+
WORKDIR $HOME/LLM_API
|
21 |
+
|
22 |
+
RUN mkdir $HOME/.cache
|
23 |
+
|
24 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
25 |
+
|
26 |
+
CMD ["gunicorn", "-w", "5", "-b", "0.0.0.0:7860","main:app"]
|
main.py
CHANGED
@@ -1,24 +1,39 @@
|
|
1 |
from flask import Flask
|
2 |
from flask import request
|
3 |
-
from
|
|
|
|
|
4 |
|
5 |
app = Flask(__name__)
|
|
|
|
|
|
|
6 |
|
7 |
-
llm = Ollama(model="phi3")
|
8 |
|
9 |
-
@app.route(
|
10 |
def completion():
|
11 |
"""
|
12 |
{
|
13 |
-
|
14 |
-
|
15 |
}
|
16 |
"""
|
17 |
-
|
18 |
message = request.get_json()
|
19 |
-
llm_output = llm.invoke(message['text'])
|
20 |
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
# curl -v -X POST 'http://127.0.0.1:8000/
|
|
|
1 |
from flask import Flask
|
2 |
from flask import request
|
3 |
+
from groq import Groq
|
4 |
+
|
5 |
+
import os
|
6 |
|
7 |
app = Flask(__name__)
|
8 |
+
client = Groq(
|
9 |
+
api_key=os.environ.get("GROQ_API_KEY")
|
10 |
+
)
|
11 |
|
|
|
12 |
|
13 |
+
@app.route("/api/generate", methods=['POST'])
|
14 |
def completion():
|
15 |
"""
|
16 |
{
|
17 |
+
"model": "llama3-70b-8192",
|
18 |
+
"prompt": "why is the sky blue?"
|
19 |
}
|
20 |
"""
|
21 |
+
|
22 |
message = request.get_json()
|
|
|
23 |
|
24 |
+
model = message['model']
|
25 |
+
prompt = message['prompt']
|
26 |
+
|
27 |
+
chat_completion = client.chat.completions.create(
|
28 |
+
messages=[
|
29 |
+
{
|
30 |
+
"role": "user",
|
31 |
+
"content": prompt,
|
32 |
+
}
|
33 |
+
],
|
34 |
+
model=model,
|
35 |
+
)
|
36 |
+
|
37 |
+
return chat_completion.choices[0].message.content
|
38 |
|
39 |
+
# curl -v -X POST 'http://127.0.0.1:8000/api/generate' --header 'Content-Type: application/json' --data '{"model": "llama3-70b-8192", "prompt": "why is sky blue?"}'
|
requirements.txt
CHANGED
@@ -9,6 +9,7 @@ charset-normalizer==3.3.2
|
|
9 |
click==8.1.7
|
10 |
dataclasses-json==0.6.6
|
11 |
diskcache==5.6.3
|
|
|
12 |
dnspython==2.6.1
|
13 |
email_validator==2.1.1
|
14 |
fastapi==0.111.0
|
@@ -18,6 +19,7 @@ Flask==3.0.3
|
|
18 |
frozenlist==1.4.1
|
19 |
fsspec==2024.5.0
|
20 |
greenlet==3.0.3
|
|
|
21 |
gunicorn==22.0.0
|
22 |
h11==0.14.0
|
23 |
httpcore==1.0.5
|
@@ -58,6 +60,7 @@ referencing==0.35.1
|
|
58 |
requests==2.31.0
|
59 |
rich==13.7.1
|
60 |
rpds-py==0.18.1
|
|
|
61 |
shellingham==1.5.4
|
62 |
sniffio==1.3.1
|
63 |
SQLAlchemy==2.0.30
|
@@ -76,4 +79,5 @@ uvloop==0.19.0
|
|
76 |
watchfiles==0.21.0
|
77 |
websockets==12.0
|
78 |
Werkzeug==3.0.3
|
|
|
79 |
yarl==1.9.4
|
|
|
9 |
click==8.1.7
|
10 |
dataclasses-json==0.6.6
|
11 |
diskcache==5.6.3
|
12 |
+
distro==1.9.0
|
13 |
dnspython==2.6.1
|
14 |
email_validator==2.1.1
|
15 |
fastapi==0.111.0
|
|
|
19 |
frozenlist==1.4.1
|
20 |
fsspec==2024.5.0
|
21 |
greenlet==3.0.3
|
22 |
+
groq==0.8.0
|
23 |
gunicorn==22.0.0
|
24 |
h11==0.14.0
|
25 |
httpcore==1.0.5
|
|
|
60 |
requests==2.31.0
|
61 |
rich==13.7.1
|
62 |
rpds-py==0.18.1
|
63 |
+
setuptools==70.0.0
|
64 |
shellingham==1.5.4
|
65 |
sniffio==1.3.1
|
66 |
SQLAlchemy==2.0.30
|
|
|
79 |
watchfiles==0.21.0
|
80 |
websockets==12.0
|
81 |
Werkzeug==3.0.3
|
82 |
+
wheel==0.43.0
|
83 |
yarl==1.9.4
|