Initial
Browse files- Dockerfile +7 -0
- __pycache__/inference.cpython-310.pyc +0 -0
- __pycache__/main.cpython-310.pyc +0 -0
- __pycache__/test.cpython-310.pyc +0 -0
- inference.py +33 -0
- main.py +53 -0
- test.py +53 -0
Dockerfile
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY . .
|
6 |
+
|
7 |
+
CMD ["uvicorn", "app.inference:app", "--host", "0.0.0.0", "--port", "7860"]
|
__pycache__/inference.cpython-310.pyc
ADDED
Binary file (1.16 kB). View file
|
|
__pycache__/main.cpython-310.pyc
ADDED
Binary file (1.46 kB). View file
|
|
__pycache__/test.cpython-310.pyc
ADDED
Binary file (1.58 kB). View file
|
|
inference.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import InferenceClient
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from pydantic import BaseModel
|
4 |
+
|
5 |
+
client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
6 |
+
|
7 |
+
class Message(BaseModel):
|
8 |
+
role: str
|
9 |
+
content: str
|
10 |
+
|
11 |
+
history = [
|
12 |
+
{
|
13 |
+
"role": "assistant",
|
14 |
+
"content": "You are a helpful assistant called Awerbot. You will help visitors of a website with their inquiries, Awersoft is a Software Development company, to contact to Awersoft the visitors should use contact@awersoft.com",
|
15 |
+
}]
|
16 |
+
|
17 |
+
|
18 |
+
app = FastAPI()
|
19 |
+
|
20 |
+
@app.post('/chat')
|
21 |
+
async def chat(messages: list[Message]):
|
22 |
+
|
23 |
+
for message in messages:
|
24 |
+
history.append({'role':'user', 'content':message.content})
|
25 |
+
|
26 |
+
output = client.chat_completion(
|
27 |
+
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
28 |
+
messages=history,
|
29 |
+
max_tokens=100,
|
30 |
+
stop="assistant"
|
31 |
+
)
|
32 |
+
|
33 |
+
return output
|
main.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
3 |
+
from fastapi import FastAPI
|
4 |
+
from pydantic import BaseModel
|
5 |
+
torch.random.manual_seed(0)
|
6 |
+
|
7 |
+
class Message(BaseModel):
|
8 |
+
role: str
|
9 |
+
content: str
|
10 |
+
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(
|
12 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
13 |
+
device_map="cpu",
|
14 |
+
torch_dtype="auto",
|
15 |
+
trust_remote_code=True,
|
16 |
+
)
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
|
18 |
+
|
19 |
+
history = [
|
20 |
+
{"role": "assistant", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."},
|
21 |
+
]
|
22 |
+
|
23 |
+
pipe = pipeline(
|
24 |
+
"text-generation",
|
25 |
+
model=model,
|
26 |
+
tokenizer=tokenizer,
|
27 |
+
)
|
28 |
+
|
29 |
+
generation_args = {
|
30 |
+
"max_new_tokens": 500,
|
31 |
+
"return_full_text": False,
|
32 |
+
"temperature": 0.0,
|
33 |
+
"do_sample": False,
|
34 |
+
}
|
35 |
+
|
36 |
+
|
37 |
+
def chat(messages: list[Message]) -> str:
|
38 |
+
|
39 |
+
for message in messages:
|
40 |
+
history.append({'role':'user', 'content':message.content})
|
41 |
+
|
42 |
+
generated_text = pipe(history, **generation_args)
|
43 |
+
|
44 |
+
print('Generated Text', generated_text)
|
45 |
+
history.append({'role':'assistant', 'content':generated_text[0]['generated_text']})
|
46 |
+
return generated_text[0]['generated_text']
|
47 |
+
|
48 |
+
app = FastAPI()
|
49 |
+
|
50 |
+
@app.post('/chat')
|
51 |
+
async def root(messages: list[Message]):
|
52 |
+
return chat(messages)
|
53 |
+
|
test.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
3 |
+
from fastapi import FastAPI
|
4 |
+
from pydantic import BaseModel
|
5 |
+
torch.random.manual_seed(0)
|
6 |
+
|
7 |
+
class Message(BaseModel):
|
8 |
+
role: str
|
9 |
+
content: str
|
10 |
+
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(
|
12 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
13 |
+
device_map="cpu",
|
14 |
+
torch_dtype="auto",
|
15 |
+
trust_remote_code=True,
|
16 |
+
)
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
|
18 |
+
|
19 |
+
history = [
|
20 |
+
{"role": "assistant", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."},
|
21 |
+
]
|
22 |
+
|
23 |
+
pipe = pipeline(
|
24 |
+
"text-generation",
|
25 |
+
model=model,
|
26 |
+
tokenizer=tokenizer,
|
27 |
+
)
|
28 |
+
|
29 |
+
generation_args = {
|
30 |
+
"max_new_tokens": 500,
|
31 |
+
"return_full_text": False,
|
32 |
+
"temperature": 0.0,
|
33 |
+
"do_sample": False,
|
34 |
+
}
|
35 |
+
|
36 |
+
|
37 |
+
def chat(messages: list[Message]) -> str:
|
38 |
+
|
39 |
+
for message in messages:
|
40 |
+
history.append({'role':'user', 'content':message.content})
|
41 |
+
|
42 |
+
generated_text = pipe(history, **generation_args)
|
43 |
+
|
44 |
+
print('Generated Text', generated_text)
|
45 |
+
history.append({'role':'assistant', 'content':generated_text[0]['generated_text']})
|
46 |
+
return generated_text[0]['generated_text']
|
47 |
+
|
48 |
+
app = FastAPI()
|
49 |
+
|
50 |
+
@app.post('/chat')
|
51 |
+
async def root(messages: list[Message]):
|
52 |
+
return chat(messages)
|
53 |
+
|