robinroy03 commited on
Commit
17aa59f
1 Parent(s): 03d9166

migrated to groq -- suuuuuper fast!

Browse files
Files changed (4) hide show
  1. .gitignore +2 -1
  2. Dockerfile +23 -6
  3. main.py +25 -10
  4. requirements.txt +4 -0
.gitignore CHANGED
@@ -1,5 +1,6 @@
1
  __pycache__
2
  venv
3
  .vscode
 
4
  # script for some housekeeping
5
- f.py
 
1
  __pycache__
2
  venv
3
  .vscode
4
+ .env
5
  # script for some housekeeping
6
+ f.py
Dockerfile CHANGED
@@ -1,9 +1,26 @@
1
- FROM ollama/ollama
2
 
3
- RUN mkdir -p /.ollama && chmod 777 /.ollama
4
 
5
- ENV OLLAMA_MAX_LOADED_MODELS=20 --OLLAMA_NUM_PARALLEL=20
6
- ENV OLLAMA_HOST "0.0.0.0:7860"
7
 
8
- CMD ["serve"]
9
- EXPOSE 7860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FROM ollama/ollama
2
 
3
+ # RUN mkdir -p /.ollama && chmod 777 /.ollama
4
 
5
+ # ENV OLLAMA_MAX_LOADED_MODELS=20 --OLLAMA_NUM_PARALLEL=20
6
+ # ENV OLLAMA_HOST "0.0.0.0:7860"
7
 
8
+ # CMD ["serve"]
9
+ # EXPOSE 7860
10
+
11
+ FROM python:3
12
+
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ COPY --chown=user . $HOME/LLM_API
19
+
20
+ WORKDIR $HOME/LLM_API
21
+
22
+ RUN mkdir $HOME/.cache
23
+
24
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
25
+
26
+ CMD ["gunicorn", "-w", "5", "-b", "0.0.0.0:7860","main:app"]
main.py CHANGED
@@ -1,24 +1,39 @@
1
  from flask import Flask
2
  from flask import request
3
- from langchain_community.llms import Ollama
 
 
4
 
5
  app = Flask(__name__)
 
 
 
6
 
7
- llm = Ollama(model="phi3")
8
 
9
- @app.route('/completion', methods=['POST'])
10
  def completion():
11
  """
12
  {
13
- user: <username>,
14
- text: <text>
15
  }
16
  """
17
-
18
  message = request.get_json()
19
- llm_output = llm.invoke(message['text'])
20
 
21
- print(llm_output)
22
- return llm_output
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # curl -v -X POST 'http://127.0.0.1:8000/completion' --header 'Content-Type: application/json' --data '{"user": "test-user", "text": "What do you know about 3D graphics"}'
 
1
  from flask import Flask
2
  from flask import request
3
+ from groq import Groq
4
+
5
+ import os
6
 
7
  app = Flask(__name__)
8
+ client = Groq(
9
+ api_key=os.environ.get("GROQ_API_KEY")
10
+ )
11
 
 
12
 
13
+ @app.route("/api/generate", methods=['POST'])
14
  def completion():
15
  """
16
  {
17
+ "model": "llama3-70b-8192",
18
+ "prompt": "why is the sky blue?"
19
  }
20
  """
21
+
22
  message = request.get_json()
 
23
 
24
+ model = message['model']
25
+ prompt = message['prompt']
26
+
27
+ chat_completion = client.chat.completions.create(
28
+ messages=[
29
+ {
30
+ "role": "user",
31
+ "content": prompt,
32
+ }
33
+ ],
34
+ model=model,
35
+ )
36
+
37
+ return chat_completion.choices[0].message.content
38
 
39
+ # curl -v -X POST 'http://127.0.0.1:8000/api/generate' --header 'Content-Type: application/json' --data '{"model": "llama3-70b-8192", "prompt": "why is sky blue?"}'
requirements.txt CHANGED
@@ -9,6 +9,7 @@ charset-normalizer==3.3.2
9
  click==8.1.7
10
  dataclasses-json==0.6.6
11
  diskcache==5.6.3
 
12
  dnspython==2.6.1
13
  email_validator==2.1.1
14
  fastapi==0.111.0
@@ -18,6 +19,7 @@ Flask==3.0.3
18
  frozenlist==1.4.1
19
  fsspec==2024.5.0
20
  greenlet==3.0.3
 
21
  gunicorn==22.0.0
22
  h11==0.14.0
23
  httpcore==1.0.5
@@ -58,6 +60,7 @@ referencing==0.35.1
58
  requests==2.31.0
59
  rich==13.7.1
60
  rpds-py==0.18.1
 
61
  shellingham==1.5.4
62
  sniffio==1.3.1
63
  SQLAlchemy==2.0.30
@@ -76,4 +79,5 @@ uvloop==0.19.0
76
  watchfiles==0.21.0
77
  websockets==12.0
78
  Werkzeug==3.0.3
 
79
  yarl==1.9.4
 
9
  click==8.1.7
10
  dataclasses-json==0.6.6
11
  diskcache==5.6.3
12
+ distro==1.9.0
13
  dnspython==2.6.1
14
  email_validator==2.1.1
15
  fastapi==0.111.0
 
19
  frozenlist==1.4.1
20
  fsspec==2024.5.0
21
  greenlet==3.0.3
22
+ groq==0.8.0
23
  gunicorn==22.0.0
24
  h11==0.14.0
25
  httpcore==1.0.5
 
60
  requests==2.31.0
61
  rich==13.7.1
62
  rpds-py==0.18.1
63
+ setuptools==70.0.0
64
  shellingham==1.5.4
65
  sniffio==1.3.1
66
  SQLAlchemy==2.0.30
 
79
  watchfiles==0.21.0
80
  websockets==12.0
81
  Werkzeug==3.0.3
82
+ wheel==0.43.0
83
  yarl==1.9.4