codermate commited on
Commit
493aac0
1 Parent(s): af84479
Dockerfile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY . .
6
+
7
+ CMD ["uvicorn", "app.inference:app", "--host", "0.0.0.0", "--port", "7860"]
__pycache__/inference.cpython-310.pyc ADDED
Binary file (1.16 kB). View file
 
__pycache__/main.cpython-310.pyc ADDED
Binary file (1.46 kB). View file
 
__pycache__/test.cpython-310.pyc ADDED
Binary file (1.58 kB). View file
 
inference.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+
5
+ client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct")
6
+
7
+ class Message(BaseModel):
8
+ role: str
9
+ content: str
10
+
11
+ history = [
12
+ {
13
+ "role": "assistant",
14
+ "content": "You are a helpful assistant called Awerbot. You will help visitors of a website with their inquiries, Awersoft is a Software Development company, to contact to Awersoft the visitors should use contact@awersoft.com",
15
+ }]
16
+
17
+
18
+ app = FastAPI()
19
+
20
+ @app.post('/chat')
21
+ async def chat(messages: list[Message]):
22
+
23
+ for message in messages:
24
+ history.append({'role':'user', 'content':message.content})
25
+
26
+ output = client.chat_completion(
27
+ model="meta-llama/Meta-Llama-3-8B-Instruct",
28
+ messages=history,
29
+ max_tokens=100,
30
+ stop="assistant"
31
+ )
32
+
33
+ return output
main.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel
5
+ torch.random.manual_seed(0)
6
+
7
+ class Message(BaseModel):
8
+ role: str
9
+ content: str
10
+
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ "microsoft/Phi-3-mini-4k-instruct",
13
+ device_map="cpu",
14
+ torch_dtype="auto",
15
+ trust_remote_code=True,
16
+ )
17
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
18
+
19
+ history = [
20
+ {"role": "assistant", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."},
21
+ ]
22
+
23
+ pipe = pipeline(
24
+ "text-generation",
25
+ model=model,
26
+ tokenizer=tokenizer,
27
+ )
28
+
29
+ generation_args = {
30
+ "max_new_tokens": 500,
31
+ "return_full_text": False,
32
+ "temperature": 0.0,
33
+ "do_sample": False,
34
+ }
35
+
36
+
37
+ def chat(messages: list[Message]) -> str:
38
+
39
+ for message in messages:
40
+ history.append({'role':'user', 'content':message.content})
41
+
42
+ generated_text = pipe(history, **generation_args)
43
+
44
+ print('Generated Text', generated_text)
45
+ history.append({'role':'assistant', 'content':generated_text[0]['generated_text']})
46
+ return generated_text[0]['generated_text']
47
+
48
+ app = FastAPI()
49
+
50
+ @app.post('/chat')
51
+ async def root(messages: list[Message]):
52
+ return chat(messages)
53
+
test.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel
5
+ torch.random.manual_seed(0)
6
+
7
+ class Message(BaseModel):
8
+ role: str
9
+ content: str
10
+
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ "microsoft/Phi-3-mini-4k-instruct",
13
+ device_map="cpu",
14
+ torch_dtype="auto",
15
+ trust_remote_code=True,
16
+ )
17
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
18
+
19
+ history = [
20
+ {"role": "assistant", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."},
21
+ ]
22
+
23
+ pipe = pipeline(
24
+ "text-generation",
25
+ model=model,
26
+ tokenizer=tokenizer,
27
+ )
28
+
29
+ generation_args = {
30
+ "max_new_tokens": 500,
31
+ "return_full_text": False,
32
+ "temperature": 0.0,
33
+ "do_sample": False,
34
+ }
35
+
36
+
37
+ def chat(messages: list[Message]) -> str:
38
+
39
+ for message in messages:
40
+ history.append({'role':'user', 'content':message.content})
41
+
42
+ generated_text = pipe(history, **generation_args)
43
+
44
+ print('Generated Text', generated_text)
45
+ history.append({'role':'assistant', 'content':generated_text[0]['generated_text']})
46
+ return generated_text[0]['generated_text']
47
+
48
+ app = FastAPI()
49
+
50
+ @app.post('/chat')
51
+ async def root(messages: list[Message]):
52
+ return chat(messages)
53
+