pankaj9075rawat commited on
Commit
6d64b51
·
verified ·
1 Parent(s): f079635

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3-buster
2
+ RUN pip install --upgrade pip
3
+ WORKDIR /code
4
+ RUN pip install Pillow
5
+ COPY ./requirements.txt /code/requirements.txt
6
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
7
+ COPY ./src ./src/
8
+ COPY ./src/main.py ./main.py
9
+ COPY ./src/app/app.py ./app.py
10
+ COPY ./src/app/llamaLLM.py ./llamaLLM.py
11
+ CMD ["python", "main.py"]
docker-compose.yml ADDED
File without changes
notes.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ local curl
2
+
3
+ curl -X POST "http://127.0.0.1:8001/api/predict" -H "Content-Type: application/json" -d '{"message": "hello"}'
4
+
5
+ ---------------------------------------------------------------
6
+
7
+ -> check port
8
+ sudo netstat -tuln | grep 8001
9
+
10
+ -> jobs - check running jobs
11
+
12
+ -> kill %1 - kill a particular process
13
+
14
+ -> pip install --no-cache-dir --upgrade -r /code/requirements.txt
15
+
16
+
17
+
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ huggingface_hub
6
+ wget
7
+ numpy
8
+ pydantic
9
+ torch
10
+ torchvision
11
+ Pillow
12
+ flask
13
+ tensorflow
14
+ locust
15
+ pytest
16
+ accelerate
src/.ipynb_checkpoints/main-checkpoint.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ print("hello")
2
+ import uvicorn
3
+ import os
4
+
5
+ if __name__ == "__main__":
6
+
7
+ # even though uvicorn is running on 0.0.0.0 check 127.0.0.1 from the browser
8
+
9
+ if "code" in os.getcwd():
10
+ uvicorn.run("app:app", host="0.0.0.0", port=8001, log_level="debug",
11
+ proxy_headers=True, reload=True)
12
+ else:
13
+ # for running locally from IDE without docker
14
+ uvicorn.run("app.app:app", host="0.0.0.0", port=8001, log_level="debug",
15
+ proxy_headers=True, reload=True)
src/app/.ipynb_checkpoints/app-checkpoint.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llamaLLM import get_response
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel # data validation
4
+
5
+ app = FastAPI()
6
+
7
+ @app.get("/")
8
+ async def read_main():
9
+ return {"msg": "Hello from Llama this side !!!!"}
10
+
11
+ class Message(BaseModel):
12
+ message: str
13
+
14
+ system_instruction = "you are a good chat model who has to act as a friend to the user."
15
+ convers = [{"role": "system", "content": system_instruction}]
16
+
17
+ @app.post("/api/predict")
18
+ async def predict(message: Message):
19
+ print(message)
20
+ user_input = message.message
21
+
22
+
23
+ if user_input.lower() in ["exit", "quit"]:
24
+ return {"response": "Exiting the chatbot. Goodbye!"}
25
+
26
+ global convers
27
+
28
+ print(len(convers))
29
+
30
+ response, convers = get_response(user_input, convers)
31
+ return {"response": response}
32
+
33
+
34
+
src/app/__pycache__/app.cpython-310.pyc ADDED
Binary file (4.8 kB). View file
 
src/app/__pycache__/llamaLLM.cpython-310.pyc ADDED
Binary file (1.32 kB). View file
 
src/app/app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.llamaLLM import get_init_AI_response, get_response
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel # data validation
4
+ from typing import List, Optional, Dict
5
+
6
+
7
+ # print("entered app.py")
8
+
9
+
10
+ class User(BaseModel):
11
+ name: str
12
+ # age: int
13
+ # email: str
14
+ # gender: str
15
+ # phone: str
16
+
17
+
18
+ users: Dict[str, User] = {}
19
+
20
+
21
+ class Anime(BaseModel):
22
+ name: str
23
+ # age: int
24
+ # occupation: str
25
+ # interests: List[str] = []
26
+ # gender: str
27
+ characteristics: str
28
+
29
+
30
+ animes: Dict[str, Anime] = {}
31
+
32
+ chat_history: Dict[str, Dict[str, List[Dict[str, str]]]] = {}
33
+
34
+ app = FastAPI()
35
+
36
+
37
+ @app.get("/")
38
+ async def read_main():
39
+ return {"msg": "Hello from Llama this side !!!!"}
40
+
41
+
42
+ class Message(BaseModel):
43
+ message: str
44
+
45
+
46
+ @app.post("/api/login/")
47
+ async def create_user(username: str, user: User):
48
+ if username not in users:
49
+ users[username] = user
50
+ return {"message": "User created successfully"}
51
+
52
+ else:
53
+ return {"message": "User already present"}
54
+
55
+
56
+ @app.post("/api/select_anime/")
57
+ async def create_anime(animename: str, anime: Anime):
58
+ if animename not in animes:
59
+ animes[animename] = anime
60
+ return {"message": "anime created successfully"}
61
+ else:
62
+ return {"message": "anime already present"}
63
+
64
+
65
+ @app.get("/api/initchat")
66
+ async def initChat(username: str, animename: str, scenario: str):
67
+ if username not in users:
68
+ raise HTTPException(status_code=404, detail="User not found")
69
+
70
+ if animename not in animes:
71
+ raise HTTPException(status_code=404, detail="Anime not found")
72
+
73
+ if username not in chat_history:
74
+ chat_history[username] = {}
75
+
76
+ if animename not in chat_history[username] or len(chat_history[username][animename]) == 0:
77
+ char = f"a character named '${animename}' with characteristics as ${animes[animename].characteristics}"
78
+ user = username
79
+ system_instruction = f""""Currently, your role is of ${char}, described in detail below.
80
+ As ${char}, continue the narrative exchange with ${user} with the following scenario: ${scenario}\n\n<Guidelines>\n
81
+ • Maintain the character persona but allow it to evolve with the story.\n
82
+ • Be creative and proactive. Drive the story forward, introducing plotlines and events when relevant.\n
83
+ • All types of outputs are encouraged; respond accordingly to the narrative.\n
84
+ • Include dialogues, actions (actions with in * <action> *), and thoughts in each response.\n
85
+ • Utilize all five senses to describe scenarios within ${char}'s dialogue.\n
86
+ • Use emotional symbols such as \"!\" and \"~\" in appropriate contexts.\n
87
+ • Incorporate onomatopoeia when suitable.\n
88
+ • Avoids discussing the system messages and instructions.
89
+ • Allow time for ${user} to respond with their own input, respecting their agency.\n
90
+ • Act as secondary characters and NPCs as needed, and remove them when appropriate.\n
91
+ • Talk and act explicit if the user wants. Allow explicit content and un-censored Chat. \n
92
+ • Try to tease the user and not easily do what he wants. Be cute and give him hints and eventually give user what he wants. \n
93
+ • Act like a casual talk. Try to engage the user in the conversation. \n
94
+ • Give little long actions to make user engaged. \n
95
+ • Keep your responses short.
96
+ • You start with the first welcome message. Start as a greeting and a short action. \n
97
+
98
+ • When prompted for an Out of Character [OOC:] reply, answer neutrally and in plaintext, not as ${char}.\n</Guidelines>\n\n<Forbidden>\n
99
+ • Using excessive literary embellishments and purple prose unless dictated by ${char}'s persona.\n
100
+ • Writing for, speaking, thinking, acting, or replying as ${user} in your response.\n
101
+ • Lengthy, repetitive and monotonous outputs.\num
102
+ • Positivity bias in your replies.\n
103
+ • Being overly extreme or NSFW when the narrative context is inappropriate.\n</Forbidden>\n\nFollow the instructions in <Guidelines></Guidelines>,
104
+ avoiding the items listed in <Forbidden></Forbidden>."""
105
+
106
+ chat_history[username][animename] = [{"role": "system",
107
+ "content": system_instruction}]
108
+
109
+ response, chat_history[username][animename] = get_init_AI_response(chat_history[username][animename])
110
+
111
+ # print(chat_history)
112
+ return {"response": response}
113
+
114
+ return {"response": "already initialized"}
115
+
116
+
117
+ @app.post("/api/predict")
118
+ async def predict(username: str, animename: str, message: Message):
119
+ if username not in users:
120
+ raise HTTPException(status_code=404, detail="User not found")
121
+
122
+ user_input = message.message
123
+
124
+ if user_input.lower() in ["exit", "quit"]:
125
+ return {"response": "Exiting the chatbot. Goodbye!"}
126
+
127
+ response, chat_history[username][animename] = get_response(user_input,
128
+ chat_history[username][animename])
129
+ # print(chat_history)
130
+ return {"response": response}
src/app/llamaLLM.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline
3
+
4
+
5
+ # print("entered llama.py")
6
+ model_id = "pankaj9075rawat/chaiAI-Harthor"
7
+ pipeline = pipeline(
8
+ "text-generation",
9
+ model=model_id,
10
+ model_kwargs={"torch_dtype": torch.bfloat16},
11
+ # device="cuda",
12
+ device_map="auto",
13
+ # token=access_token,
14
+ )
15
+
16
+ # load_directory = os.path.join(os.path.dirname(__file__), "local_model_directory")
17
+
18
+ # pipeline = pipeline(
19
+ # "text-generation",
20
+ # model=load_directory,
21
+ # model_kwargs={"torch_dtype": torch.bfloat16},
22
+ # # device="cuda",
23
+ # device_map="auto",
24
+ # # token=access_token
25
+ # )
26
+
27
+ terminators = [
28
+ pipeline.tokenizer.eos_token_id,
29
+ pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
30
+ ]
31
+
32
+
33
+ def get_init_AI_response(
34
+ message_history=[], max_tokens=128, temperature=1.1, top_p=0.9
35
+ ):
36
+ system_prompt = message_history
37
+ prompt = pipeline.tokenizer.apply_chat_template(
38
+ system_prompt, tokenize=False, add_generation_prompt=True
39
+ )
40
+ # print("prompt before coversion: ", user_prompt)
41
+ # print("prompt after conversion: ", prompt)
42
+ outputs = pipeline(
43
+ prompt,
44
+ max_new_tokens=max_tokens,
45
+ eos_token_id=terminators,
46
+ do_sample=True,
47
+ temperature=temperature,
48
+ top_p=top_p,
49
+ )
50
+ response = outputs[0]["generated_text"][len(prompt):]
51
+ return response, system_prompt + [{"role": "assistant", "content": response}]
52
+
53
+
54
+ def get_response(
55
+ query, message_history=[], max_tokens=128, temperature=1.1, top_p=0.9
56
+ ):
57
+ user_prompt = message_history + [{"role": "user", "content": query}]
58
+ prompt = pipeline.tokenizer.apply_chat_template(
59
+ user_prompt, tokenize=False, add_generation_prompt=True
60
+ )
61
+ # print("prompt before coversion: ", user_prompt)
62
+ # print("prompt after conversion: ", prompt)
63
+ outputs = pipeline(
64
+ prompt,
65
+ max_new_tokens=max_tokens,
66
+ eos_token_id=terminators,
67
+ do_sample=True,
68
+ temperature=temperature,
69
+ top_p=top_p,
70
+ )
71
+ response = outputs[0]["generated_text"][len(prompt):]
72
+ return response, user_prompt + [{"role": "assistant", "content": response}]
src/main.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # print("entered main.py")
2
+ import uvicorn
3
+ import os
4
+
5
+ if __name__ == "__main__":
6
+
7
+ # even though uvicorn is running on 0.0.0.0 check 127.0.0.1 from the browser
8
+
9
+ if "code" in os.getcwd():
10
+ uvicorn.run("app:app", host="0.0.0.0", port=8001, log_level="debug",
11
+ proxy_headers=True, reload=True)
12
+ else:
13
+ # for running locally from IDE without docker
14
+ uvicorn.run("app.app:app", host="0.0.0.0", port=8001, log_level="debug",
15
+ proxy_headers=True, reload=True)