Initial
Browse files- Dockerfile +5 -1
- inference.py +0 -33
- main.py +18 -38
- main_.py +53 -0
- requirements.txt +1 -0
Dockerfile
CHANGED
@@ -2,6 +2,10 @@ FROM python:3.9
|
|
2 |
|
3 |
WORKDIR /code
|
4 |
|
|
|
|
|
|
|
|
|
5 |
COPY . .
|
6 |
|
7 |
-
CMD ["uvicorn", "
|
|
|
2 |
|
3 |
WORKDIR /code
|
4 |
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
COPY . .
|
10 |
|
11 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
inference.py
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
from huggingface_hub import InferenceClient
|
2 |
-
from fastapi import FastAPI
|
3 |
-
from pydantic import BaseModel
|
4 |
-
|
5 |
-
client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
6 |
-
|
7 |
-
class Message(BaseModel):
|
8 |
-
role: str
|
9 |
-
content: str
|
10 |
-
|
11 |
-
history = [
|
12 |
-
{
|
13 |
-
"role": "assistant",
|
14 |
-
"content": "You are a helpful assistant called Awerbot. You will help visitors of a website with their inquiries, Awersoft is a Software Development company, to contact to Awersoft the visitors should use contact@awersoft.com",
|
15 |
-
}]
|
16 |
-
|
17 |
-
|
18 |
-
app = FastAPI()
|
19 |
-
|
20 |
-
@app.post('/chat')
|
21 |
-
async def chat(messages: list[Message]):
|
22 |
-
|
23 |
-
for message in messages:
|
24 |
-
history.append({'role':'user', 'content':message.content})
|
25 |
-
|
26 |
-
output = client.chat_completion(
|
27 |
-
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
28 |
-
messages=history,
|
29 |
-
max_tokens=100,
|
30 |
-
stop="assistant"
|
31 |
-
)
|
32 |
-
|
33 |
-
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
CHANGED
@@ -1,53 +1,33 @@
|
|
1 |
-
import
|
2 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
3 |
from fastapi import FastAPI
|
4 |
from pydantic import BaseModel
|
5 |
-
|
|
|
6 |
|
7 |
class Message(BaseModel):
|
8 |
role: str
|
9 |
content: str
|
10 |
|
11 |
-
model = AutoModelForCausalLM.from_pretrained(
|
12 |
-
"microsoft/Phi-3-mini-4k-instruct",
|
13 |
-
device_map="cpu",
|
14 |
-
torch_dtype="auto",
|
15 |
-
trust_remote_code=True,
|
16 |
-
)
|
17 |
-
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
|
18 |
-
|
19 |
history = [
|
20 |
-
{
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
"text-generation",
|
25 |
-
model=model,
|
26 |
-
tokenizer=tokenizer,
|
27 |
-
)
|
28 |
|
29 |
-
generation_args = {
|
30 |
-
"max_new_tokens": 500,
|
31 |
-
"return_full_text": False,
|
32 |
-
"temperature": 0.0,
|
33 |
-
"do_sample": False,
|
34 |
-
}
|
35 |
|
|
|
36 |
|
37 |
-
|
|
|
38 |
|
39 |
for message in messages:
|
40 |
history.append({'role':'user', 'content':message.content})
|
41 |
-
|
42 |
-
generated_text = pipe(history, **generation_args)
|
43 |
-
|
44 |
-
print('Generated Text', generated_text)
|
45 |
-
history.append({'role':'assistant', 'content':generated_text[0]['generated_text']})
|
46 |
-
return generated_text[0]['generated_text']
|
47 |
-
|
48 |
-
app = FastAPI()
|
49 |
-
|
50 |
-
@app.post('/chat')
|
51 |
-
async def root(messages: list[Message]):
|
52 |
-
return chat(messages)
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import InferenceClient
|
|
|
2 |
from fastapi import FastAPI
|
3 |
from pydantic import BaseModel
|
4 |
+
|
5 |
+
client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
6 |
|
7 |
class Message(BaseModel):
|
8 |
role: str
|
9 |
content: str
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
history = [
|
12 |
+
{
|
13 |
+
"role": "assistant",
|
14 |
+
"content": "You are a helpful assistant called Awerbot. You will help visitors of a website with their inquiries, Awersoft is a Software Development company, to contact to Awersoft the visitors should use contact@awersoft.com",
|
15 |
+
}]
|
|
|
|
|
|
|
|
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
app = FastAPI()
|
19 |
|
20 |
+
@app.post('/chat')
|
21 |
+
async def chat(messages: list[Message]):
|
22 |
|
23 |
for message in messages:
|
24 |
history.append({'role':'user', 'content':message.content})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
+
output = client.chat_completion(
|
27 |
+
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
28 |
+
messages=history,
|
29 |
+
max_tokens=100,
|
30 |
+
stop="assistant"
|
31 |
+
)
|
32 |
+
|
33 |
+
return output
|
main_.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
3 |
+
from fastapi import FastAPI
|
4 |
+
from pydantic import BaseModel
|
5 |
+
torch.random.manual_seed(0)
|
6 |
+
|
7 |
+
class Message(BaseModel):
|
8 |
+
role: str
|
9 |
+
content: str
|
10 |
+
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(
|
12 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
13 |
+
device_map="cpu",
|
14 |
+
torch_dtype="auto",
|
15 |
+
trust_remote_code=True,
|
16 |
+
)
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
|
18 |
+
|
19 |
+
history = [
|
20 |
+
{"role": "assistant", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."},
|
21 |
+
]
|
22 |
+
|
23 |
+
pipe = pipeline(
|
24 |
+
"text-generation",
|
25 |
+
model=model,
|
26 |
+
tokenizer=tokenizer,
|
27 |
+
)
|
28 |
+
|
29 |
+
generation_args = {
|
30 |
+
"max_new_tokens": 500,
|
31 |
+
"return_full_text": False,
|
32 |
+
"temperature": 0.0,
|
33 |
+
"do_sample": False,
|
34 |
+
}
|
35 |
+
|
36 |
+
|
37 |
+
def chat(messages: list[Message]) -> str:
|
38 |
+
|
39 |
+
for message in messages:
|
40 |
+
history.append({'role':'user', 'content':message.content})
|
41 |
+
|
42 |
+
generated_text = pipe(history, **generation_args)
|
43 |
+
|
44 |
+
print('Generated Text', generated_text)
|
45 |
+
history.append({'role':'assistant', 'content':generated_text[0]['generated_text']})
|
46 |
+
return generated_text[0]['generated_text']
|
47 |
+
|
48 |
+
app = FastAPI()
|
49 |
+
|
50 |
+
@app.post('/chat')
|
51 |
+
async def root(messages: list[Message]):
|
52 |
+
return chat(messages)
|
53 |
+
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
huggingface_hub
|