codermate commited on
Commit
4bba7a5
1 Parent(s): 0b2e4eb
Files changed (5) hide show
  1. Dockerfile +5 -1
  2. inference.py +0 -33
  3. main.py +18 -38
  4. main_.py +53 -0
  5. requirements.txt +1 -0
Dockerfile CHANGED
@@ -2,6 +2,10 @@ FROM python:3.9
2
 
3
  WORKDIR /code
4
 
 
 
 
 
5
  COPY . .
6
 
7
- CMD ["uvicorn", "inference:app", "--host", "0.0.0.0", "--port", "7860"]
 
2
 
3
  WORKDIR /code
4
 
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
  COPY . .
10
 
11
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
inference.py DELETED
@@ -1,33 +0,0 @@
1
- from huggingface_hub import InferenceClient
2
- from fastapi import FastAPI
3
- from pydantic import BaseModel
4
-
5
- client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct")
6
-
7
- class Message(BaseModel):
8
- role: str
9
- content: str
10
-
11
- history = [
12
- {
13
- "role": "assistant",
14
- "content": "You are a helpful assistant called Awerbot. You will help visitors of a website with their inquiries, Awersoft is a Software Development company, to contact to Awersoft the visitors should use contact@awersoft.com",
15
- }]
16
-
17
-
18
- app = FastAPI()
19
-
20
- @app.post('/chat')
21
- async def chat(messages: list[Message]):
22
-
23
- for message in messages:
24
- history.append({'role':'user', 'content':message.content})
25
-
26
- output = client.chat_completion(
27
- model="meta-llama/Meta-Llama-3-8B-Instruct",
28
- messages=history,
29
- max_tokens=100,
30
- stop="assistant"
31
- )
32
-
33
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py CHANGED
@@ -1,53 +1,33 @@
1
- import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  from fastapi import FastAPI
4
  from pydantic import BaseModel
5
- torch.random.manual_seed(0)
 
6
 
7
  class Message(BaseModel):
8
  role: str
9
  content: str
10
 
11
- model = AutoModelForCausalLM.from_pretrained(
12
- "microsoft/Phi-3-mini-4k-instruct",
13
- device_map="cpu",
14
- torch_dtype="auto",
15
- trust_remote_code=True,
16
- )
17
- tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
18
-
19
  history = [
20
- {"role": "assistant", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."},
21
- ]
22
-
23
- pipe = pipeline(
24
- "text-generation",
25
- model=model,
26
- tokenizer=tokenizer,
27
- )
28
 
29
- generation_args = {
30
- "max_new_tokens": 500,
31
- "return_full_text": False,
32
- "temperature": 0.0,
33
- "do_sample": False,
34
- }
35
 
 
36
 
37
- def chat(messages: list[Message]) -> str:
 
38
 
39
  for message in messages:
40
  history.append({'role':'user', 'content':message.content})
41
-
42
- generated_text = pipe(history, **generation_args)
43
-
44
- print('Generated Text', generated_text)
45
- history.append({'role':'assistant', 'content':generated_text[0]['generated_text']})
46
- return generated_text[0]['generated_text']
47
-
48
- app = FastAPI()
49
-
50
- @app.post('/chat')
51
- async def root(messages: list[Message]):
52
- return chat(messages)
53
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
 
2
  from fastapi import FastAPI
3
  from pydantic import BaseModel
4
+
5
+ client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct")
6
 
7
  class Message(BaseModel):
8
  role: str
9
  content: str
10
 
 
 
 
 
 
 
 
 
11
  history = [
12
+ {
13
+ "role": "assistant",
14
+ "content": "You are a helpful assistant called Awerbot. You will help visitors of a website with their inquiries, Awersoft is a Software Development company, to contact to Awersoft the visitors should use contact@awersoft.com",
15
+ }]
 
 
 
 
16
 
 
 
 
 
 
 
17
 
18
+ app = FastAPI()
19
 
20
+ @app.post('/chat')
21
+ async def chat(messages: list[Message]):
22
 
23
  for message in messages:
24
  history.append({'role':'user', 'content':message.content})
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ output = client.chat_completion(
27
+ model="meta-llama/Meta-Llama-3-8B-Instruct",
28
+ messages=history,
29
+ max_tokens=100,
30
+ stop="assistant"
31
+ )
32
+
33
+ return output
main_.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel
5
+ torch.random.manual_seed(0)
6
+
7
+ class Message(BaseModel):
8
+ role: str
9
+ content: str
10
+
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ "microsoft/Phi-3-mini-4k-instruct",
13
+ device_map="cpu",
14
+ torch_dtype="auto",
15
+ trust_remote_code=True,
16
+ )
17
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
18
+
19
+ history = [
20
+ {"role": "assistant", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."},
21
+ ]
22
+
23
+ pipe = pipeline(
24
+ "text-generation",
25
+ model=model,
26
+ tokenizer=tokenizer,
27
+ )
28
+
29
+ generation_args = {
30
+ "max_new_tokens": 500,
31
+ "return_full_text": False,
32
+ "temperature": 0.0,
33
+ "do_sample": False,
34
+ }
35
+
36
+
37
+ def chat(messages: list[Message]) -> str:
38
+
39
+ for message in messages:
40
+ history.append({'role':'user', 'content':message.content})
41
+
42
+ generated_text = pipe(history, **generation_args)
43
+
44
+ print('Generated Text', generated_text)
45
+ history.append({'role':'assistant', 'content':generated_text[0]['generated_text']})
46
+ return generated_text[0]['generated_text']
47
+
48
+ app = FastAPI()
49
+
50
+ @app.post('/chat')
51
+ async def root(messages: list[Message]):
52
+ return chat(messages)
53
+
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ huggingface_hub