omkar56 commited on
Commit
bbe2304
·
1 Parent(s): 044ce10

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +64 -0
main.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, Request, UploadFile, Body, Depends, HTTPException
2
+ from fastapi.security.api_key import APIKeyHeader
3
+ from typing import Optional
4
+ from gpt4all import GPT4All
5
+ from huggingface_hub import hf_hub_download
6
+
7
+ """
8
+ [Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
9
+ [Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
10
+ """
11
+
12
+ model_path = "models"
13
+ model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
14
+ hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
15
+
16
+ print("Start the model init process")
17
+ model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
18
+ print("Finish the model init process")
19
+
20
+ model.config["promptTemplate"] = "[INST] {0} [/INST]"
21
+ model.config["systemPrompt"] = "You are an helpful AI content generator. your name is 'ErTake AI'. Whenever user greets you, you always have to introduce yourself as 'ErTake AI'. Don't introduce yourself as Bing, I will punish you if you intorduce yourself other than 'ErTake AI'. Carefully listen to the user's message/query and provide correct answers/content. Don't hallucinate if you don't know the answer. Give your output only in english language. If the user asks to create images you need to simply deny the request saying \"I'm sorry, but I don't have the capability to create images. I can assist you with generating text based content. If you have any text-based content requests, feel free to let me know!\""
22
+ model._is_chat_session_activated = False
23
+
24
+ max_new_tokens = 2048
25
+
26
+ def generater(message, history, temperature, top_p, top_k):
27
+ prompt = "<s>"
28
+ for user_message, assistant_message in history:
29
+ prompt += model.config["promptTemplate"].format(user_message)
30
+ prompt += assistant_message + "</s>"
31
+ prompt += model.config["promptTemplate"].format(message)
32
+ outputs = []
33
+ for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
34
+ outputs.append(token)
35
+ yield "".join(outputs)
36
+ print("[outputs]",outputs)
37
+ return outputs
38
+
39
+ API_KEY = os.environ.get("API_KEY")
40
+
41
+ app = FastAPI()
42
+ api_key_header = APIKeyHeader(name="api_key", auto_error=False)
43
+
44
+ def get_api_key(api_key: Optional[str] = Depends(api_key_header)):
45
+ if api_key is None or api_key != API_KEY:
46
+ raise HTTPException(status_code=401, detail="Unauthorized access")
47
+ return api_key
48
+
49
+ @app.post("/api/v1/generate_text", response_model=dict)
50
+ def generate_text(
51
+ request: Request,
52
+ body: dict = Body(...),
53
+ api_key: str = Depends(get_api_key)
54
+ ):
55
+ message = body.get("prompt", "")
56
+ # sys_prompt = body.get("sysPrompt", "")
57
+ temperature = body.get("temperature", 0.5)
58
+ top_p = body.get("top_p", 0.95)
59
+ # max_new_tokens = body.get("max_new_tokens",512)
60
+ # repetition_penalty = body.get("repetition_penalty", 1.0)
61
+ history = [] # You might need to handle this based on your actual usage
62
+ generatedOutput = generater(message, history, temperature, top_p, )
63
+
64
+ return {"generated_text": generatedOutput}