dorogan commited on
Commit
1e2a35a
·
1 Parent(s): de83ed6

Update: basic methods and endpoints were added

Browse files
Files changed (4) hide show
  1. Dockerfile +11 -0
  2. app.py +27 -0
  3. model.py +24 -0
  4. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-buster
2
+
3
+ WORKDIR /app/commandr-api-local
4
+
5
+ COPY . .
6
+
7
+ RUN pip3 install --upgrade pip
8
+
9
+ RUN pip3 install -r requirements.txt
10
+
11
+ RUN python3 app.py
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+ from model import get_answer_from_llm
5
+
6
+
7
+ class Prompt(BaseModel):
8
+ prompt: str = ''
9
+
10
+
11
+ app = FastAPI(
12
+ title='CommandRLLMAPI'
13
+ )
14
+
15
+
16
+ @app.post("/completion/")
17
+ def get_answer(question: Prompt.prompt):
18
+ answer = get_answer_from_llm(question)
19
+ return answer
20
+
21
+
22
+ if __name__ == '__main__':
23
+ uvicorn.run(
24
+ app,
25
+ host='0.0.0.0',
26
+ port=8081
27
+ )
model.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import torch
3
+
4
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
5
+
6
+ model_id = "CohereForAI/c4ai-command-r-v01-4bit"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+ model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
9
+ ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
10
+
11
+
12
+ async def get_answer_from_llm(question: str = None):
13
+ # Format message with the command-r chat template
14
+ messages = [{"role": "user", "content": f"{question}"}]
15
+ input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
16
+ gen_tokens = model.generate(
17
+ input_ids,
18
+ max_new_tokens=100,
19
+ do_sample=True,
20
+ temperature=0.3,
21
+ )
22
+
23
+ gen_text = await tokenizer.decode(gen_tokens[0])
24
+ return gen_text
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers>=4.39.1
3
+ bitsandbytes
4
+ accelerate
5
+ tokenizers
6
+ pydantic
7
+ fastapi
8
+ uvicorn