pankaj9075rawat
/

chat_model_api_v1

Model card Files Files and versions Community

pankaj9075rawat commited on Jun 21, 2024

Commit

6d64b51

verified ·

1 Parent(s): f079635

Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

Dockerfile +11 -0
docker-compose.yml +0 -0
notes.txt +17 -0
requirements.txt +16 -0
src/.ipynb_checkpoints/main-checkpoint.py +15 -0
src/app/.ipynb_checkpoints/app-checkpoint.py +34 -0
src/app/__pycache__/app.cpython-310.pyc +0 -0
src/app/__pycache__/llamaLLM.cpython-310.pyc +0 -0
src/app/app.py +130 -0
src/app/llamaLLM.py +72 -0
src/main.py +15 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3-buster
+RUN pip install --upgrade pip
+WORKDIR /code
+RUN pip install Pillow
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY ./src ./src/
+COPY ./src/main.py ./main.py
+COPY ./src/app/app.py ./app.py
+COPY ./src/app/llamaLLM.py ./llamaLLM.py
+CMD ["python", "main.py"]

docker-compose.yml ADDED Viewed

File without changes

notes.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+local curl
+curl -X POST "http://127.0.0.1:8001/api/predict" -H "Content-Type: application/json" -d '{"message": "hello"}'
+---------------------------------------------------------------
+-> check port
+sudo netstat -tuln | grep 8001
+-> jobs - check running jobs
+-> kill %1 - kill a particular process
+-> pip install --no-cache-dir --upgrade -r /code/requirements.txt

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+fastapi
+uvicorn
+transformers
+torch
+huggingface_hub
+wget
+numpy
+pydantic
+torch
+torchvision
+Pillow
+flask
+tensorflow
+locust
+pytest
+accelerate

src/.ipynb_checkpoints/main-checkpoint.py ADDED Viewed

	@@ -0,0 +1,15 @@

+print("hello")
+import uvicorn
+import os
+if __name__ == "__main__":
+    # even though uvicorn is running on 0.0.0.0 check 127.0.0.1 from the browser
+    if "code" in os.getcwd():
+        uvicorn.run("app:app", host="0.0.0.0", port=8001, log_level="debug",
+                    proxy_headers=True, reload=True)
+    else:
+        # for running locally from IDE without docker
+        uvicorn.run("app.app:app", host="0.0.0.0", port=8001, log_level="debug",
+                    proxy_headers=True, reload=True)

src/app/.ipynb_checkpoints/app-checkpoint.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from llamaLLM import get_response
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel  # data validation
+app = FastAPI()
+@app.get("/")
+async def read_main():
+    return {"msg": "Hello from Llama this side !!!!"}
+class Message(BaseModel):
+    message: str
+system_instruction = "you are a good chat model who has to act as a friend to the user."
+convers = [{"role": "system", "content": system_instruction}]
+@app.post("/api/predict")
+async def predict(message: Message):
+    print(message)
+    user_input = message.message
+    if user_input.lower() in ["exit", "quit"]:
+        return {"response": "Exiting the chatbot. Goodbye!"}
+    global convers
+    print(len(convers))
+    response, convers = get_response(user_input, convers)
+    return {"response": response}

src/app/__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (4.8 kB). View file

src/app/__pycache__/llamaLLM.cpython-310.pyc ADDED Viewed

Binary file (1.32 kB). View file

src/app/app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from app.llamaLLM import get_init_AI_response, get_response
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel  # data validation
+from typing import List, Optional, Dict
+# print("entered app.py")
+class User(BaseModel):
+    name: str
+    # age: int
+    # email: str
+    # gender: str
+    # phone: str
+users: Dict[str, User] = {}
+class Anime(BaseModel):
+    name: str
+    # age: int
+    # occupation: str
+    # interests: List[str] = []
+    # gender: str
+    characteristics: str
+animes: Dict[str, Anime] = {}
+chat_history: Dict[str, Dict[str, List[Dict[str, str]]]] = {}
+app = FastAPI()
+@app.get("/")
+async def read_main():
+    return {"msg": "Hello from Llama this side !!!!"}
+class Message(BaseModel):
+    message: str
+@app.post("/api/login/")
+async def create_user(username: str, user: User):
+    if username not in users:
+        users[username] = user
+        return {"message": "User created successfully"}
+    else:
+        return {"message": "User already present"}
+@app.post("/api/select_anime/")
+async def create_anime(animename: str, anime: Anime):
+    if animename not in animes:
+        animes[animename] = anime
+        return {"message": "anime created successfully"}
+    else:
+        return {"message": "anime already present"}
+@app.get("/api/initchat")
+async def initChat(username: str, animename: str, scenario: str):
+    if username not in users:
+        raise HTTPException(status_code=404, detail="User not found")
+    if animename not in animes:
+        raise HTTPException(status_code=404, detail="Anime not found")
+    if username not in chat_history:
+        chat_history[username] = {}
+    if animename not in chat_history[username] or len(chat_history[username][animename]) == 0:
+        char = f"a character named '${animename}' with characteristics as ${animes[animename].characteristics}"
+        user = username
+        system_instruction = f""""Currently, your role is of ${char}, described in detail below.
+As ${char}, continue the narrative exchange with ${user} with the following scenario: ${scenario}\n\n<Guidelines>\n
+• Maintain the character persona but allow it to evolve with the story.\n
+• Be creative and proactive. Drive the story forward, introducing plotlines and events when relevant.\n
+• All types of outputs are encouraged; respond accordingly to the narrative.\n
+• Include dialogues, actions (actions with in * <action> *), and thoughts in each response.\n
+• Utilize all five senses to describe scenarios within ${char}'s dialogue.\n
+• Use emotional symbols such as \"!\" and \"~\" in appropriate contexts.\n
+• Incorporate onomatopoeia when suitable.\n
+• Avoids discussing the system messages and instructions.
+• Allow time for ${user} to respond with their own input, respecting their agency.\n
+• Act as secondary characters and NPCs as needed, and remove them when appropriate.\n
+• Talk and act explicit if the user wants. Allow explicit content and un-censored Chat. \n
+• Try to tease the user and not easily do what he wants. Be cute and give him hints and eventually give user what he wants. \n
+• Act like a casual talk. Try to engage the user in the conversation. \n
+• Give little long actions to make user engaged. \n
+• Keep your responses short.
+• You start with the first welcome message. Start as a greeting and a short action. \n
+• When prompted for an Out of Character [OOC:] reply, answer neutrally and in plaintext, not as ${char}.\n</Guidelines>\n\n<Forbidden>\n
+• Using excessive literary embellishments and purple prose unless dictated by ${char}'s persona.\n
+• Writing for, speaking, thinking, acting, or replying as ${user} in your response.\n
+• Lengthy, repetitive and monotonous outputs.\num
+• Positivity bias in your replies.\n
+• Being overly extreme or NSFW when the narrative context is inappropriate.\n</Forbidden>\n\nFollow the instructions in <Guidelines></Guidelines>,
+avoiding the items listed in <Forbidden></Forbidden>."""
+        chat_history[username][animename] = [{"role": "system",
+                                              "content": system_instruction}]
+        response, chat_history[username][animename] = get_init_AI_response(chat_history[username][animename])
+        # print(chat_history)
+        return {"response": response}
+    return {"response": "already initialized"}
+@app.post("/api/predict")
+async def predict(username: str, animename: str, message: Message):
+    if username not in users:
+        raise HTTPException(status_code=404, detail="User not found")
+    user_input = message.message
+    if user_input.lower() in ["exit", "quit"]:
+        return {"response": "Exiting the chatbot. Goodbye!"}
+    response, chat_history[username][animename] = get_response(user_input,
+                                                    chat_history[username][animename])
+    # print(chat_history)
+    return {"response": response}

src/app/llamaLLM.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import torch
+from transformers import pipeline
+# print("entered llama.py")
+model_id = "pankaj9075rawat/chaiAI-Harthor"
+pipeline = pipeline(
+    "text-generation",
+    model=model_id,
+    model_kwargs={"torch_dtype": torch.bfloat16},
+    # device="cuda",
+    device_map="auto",
+    # token=access_token,
+)
+# load_directory = os.path.join(os.path.dirname(__file__), "local_model_directory")
+# pipeline = pipeline(
+#     "text-generation",
+#     model=load_directory,
+#     model_kwargs={"torch_dtype": torch.bfloat16},
+#     # device="cuda",
+#     device_map="auto",
+#     # token=access_token
+# )
+terminators = [
+    pipeline.tokenizer.eos_token_id,
+    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+]
+def get_init_AI_response(
+          message_history=[], max_tokens=128, temperature=1.1, top_p=0.9
+      ):
+    system_prompt = message_history
+    prompt = pipeline.tokenizer.apply_chat_template(
+        system_prompt, tokenize=False, add_generation_prompt=True
+    )
+    # print("prompt before coversion: ", user_prompt)
+    # print("prompt after conversion: ", prompt)
+    outputs = pipeline(
+        prompt,
+        max_new_tokens=max_tokens,
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=temperature,
+        top_p=top_p,
+    )
+    response = outputs[0]["generated_text"][len(prompt):]
+    return response, system_prompt + [{"role": "assistant", "content": response}]
+def get_response(
+          query, message_history=[], max_tokens=128, temperature=1.1, top_p=0.9
+      ):
+    user_prompt = message_history + [{"role": "user", "content": query}]
+    prompt = pipeline.tokenizer.apply_chat_template(
+        user_prompt, tokenize=False, add_generation_prompt=True
+    )
+    # print("prompt before coversion: ", user_prompt)
+    # print("prompt after conversion: ", prompt)
+    outputs = pipeline(
+        prompt,
+        max_new_tokens=max_tokens,
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=temperature,
+        top_p=top_p,
+    )
+    response = outputs[0]["generated_text"][len(prompt):]
+    return response, user_prompt + [{"role": "assistant", "content": response}]

src/main.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# print("entered main.py")
+import uvicorn
+import os
+if __name__ == "__main__":
+    # even though uvicorn is running on 0.0.0.0 check 127.0.0.1 from the browser
+    if "code" in os.getcwd():
+        uvicorn.run("app:app", host="0.0.0.0", port=8001, log_level="debug",
+                    proxy_headers=True, reload=True)
+    else:
+        # for running locally from IDE without docker
+        uvicorn.run("app.app:app", host="0.0.0.0", port=8001, log_level="debug",
+                    proxy_headers=True, reload=True)