fastapi-mixtral-46.7b

Sleeping

App Files Files Community

OjciecTadeusz commited on Nov 19, 2024

Commit

e5928ae

•

1 Parent(s): b58b5c3

Update main.py

Browse files

Files changed (1) hide show

main.py +208 -152

main.py CHANGED Viewed

@@ -1,165 +1,221 @@
-from fastapi import FastAPI, HTTPException, Depends
-from fastapi.security.api_key import APIKeyHeader
 from pydantic import BaseModel
-from huggingface_hub import InferenceClient, HfApi
-from typing import List, Optional
-import os
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
-# Initialize FastAPI app
 app = FastAPI()
-# Get HuggingFace token from environment variable
-HF_TOKEN = os.getenv("HF_TOKEN")
-if not HF_TOKEN:
-    raise ValueError("HF_TOKEN environment variable is not set")
-# Setup API key authorization
-API_KEY_NAME = "Authorization"
-api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=True)
-# Initialize HuggingFace client
-try:
-    client = InferenceClient(
-        "mistralai/Mixtral-8x7B-Instruct-v0.1",
-        token=HF_TOKEN
     )
-    # Verify token is valid
-    hf_api = HfApi(token=HF_TOKEN)
-    hf_api.whoami()
-except Exception as e:
-    raise ValueError(f"Failed to initialize HuggingFace client: {str(e)}")
-class ChatMessage(BaseModel):
-    role: str
-    content: str
-class GenerationRequest(BaseModel):
-    prompt: str
-    message: Optional[str] = None
-    system_message: Optional[str] = None
-    history: Optional[List[ChatMessage]] = None
-    temperature: Optional[float] = 0.7
-    top_p: Optional[float] = 0.95
-def format_prompt(message: str, history: List[ChatMessage] = None, system_message: str = None) -> str:
-    prompt = ""
-    if system_message:
-        prompt += f"<s>[INST] {system_message} [/INST]</s>"
-    if history:
-        for msg in history:
-            if msg.role == "user":
-                prompt += f"<s>[INST] {msg.content} [/INST]"
-            else:
-                prompt += f" {msg.content}</s>"
-    prompt += f"<s>[INST] {message} [/INST]"
-    return prompt
-async def verify_token(api_key_header: str = Depends(api_key_header)):
-    if not api_key_header.startswith("Bearer "):
-        raise HTTPException(
-            status_code=401,
-            detail="Bearer token missing"
-        )
-    token = api_key_header.replace("Bearer ", "")
-    if token != HF_TOKEN:
-        raise HTTPException(
-            status_code=401,
-            detail="Invalid authentication credentials"
-        )
-    return token
 @app.post("/generate/")
-async def generate_text(
-    request: GenerationRequest,
-    token: str = Depends(verify_token)
-):
     try:
-        message = request.prompt if request.prompt else request.message
-        if not message:
-            return [
-                {
-                    "msg": "MSG!"
-                }
-            ]
-        formatted_prompt = format_prompt(
-            message=message,
-            history=request.history,
-            system_message=request.system_message
-        )
-        response = client.text_generation(
-            formatted_prompt,
-            temperature=max(request.temperature, 0.01),
-            top_p=request.top_p,
-            max_new_tokens=1048,
-            do_sample=True,
-            return_full_text=False
-        )
-        if not response:
-            return [
-                {
-                    "detail": [
-                        {
-                            # "type": "server_error",
-                            "loc": ["server"],
-                            "msg": "No response received from model",
-                            "input": None
-                        }
-                    ]
-                }
-            ]
-        # Construct the custom JSON response
-        return [
-            {
-                "msg": response
-                # "msg": [
-                #     {
-                #         # "type": "success",
-                #         # "loc":[
-                #         #    "body",
-                #         #    "prompt"
-                #         # ],
-                #         # "loc": ["body"],
-                #         # "msg": [
-                #         #     response,
-                #         #     formatted_prompt
-                #         # ],
-                #     }
-                # ]
-            }
-        ]
     except Exception as e:
-        return [
-            {
-                "detail": [
-                    {
-                        "type": "server_error",
-                        "loc": ["server"],
-                        "msg": f"Error generating response: {str(e)}",
-                        "input": None
-                    }
-                ]
-            }
-        ]
-@app.get("/health")
-async def health_check():
-    return {
-        "status": "healthy",
-        "huggingface_client": "initialized",
-        "auth_required": True
-    }

+from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from huggingface_hub import InferenceClient
+import uvicorn
 app = FastAPI()
+client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+class Item(BaseModel):
+    prompt: str
+    history: list
+    system_prompt: str
+    temperature: float = 0.01
+    top_p: float = 1.0
+    details: bool = True
+    return_full_text: bool = False
+    stream: bool = False
+def format_prompt(message, history):
+    prompt = "<s>"
+    for user_prompt, bot_response in history:
+        prompt += f"[INST] {user_prompt} [/INST]"
+        prompt += f" {bot_response}</s> "
+    prompt += f"[INST] {message} [/INST]"
+    return prompt
+def generate(item: Item):
+    temperature = float(item.temperature)
+    if temperature < 1e-2:
+        temperature = 1e-2
+    top_p = float(item.top_p)
+    generate_kwargs = dict(
+        temperature=temperature,
+        max_new_tokens=1048,
+        top_p=top_p,
+        repetition_penalty=1.0,
+        do_sample=True,
+        seed=42,
     )
+    formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
+    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=item.stream, details=item.details, return_full_text=item.return_full_text)
+    output = ""
+    for response in stream:
+        output += response.token.text
+    return output
 @app.post("/generate/")
+async def generate_text(item: Item):
     try:
+        response = generate(item)
+        return {"response": response}
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# from fastapi import FastAPI, HTTPException, Depends
+# from fastapi.security.api_key import APIKeyHeader
+# from pydantic import BaseModel
+# from huggingface_hub import InferenceClient, HfApi
+# from typing import List, Optional
+# import os
+# from dotenv import load_dotenv
+# # Load environment variables
+# load_dotenv()
+# # Initialize FastAPI app
+# app = FastAPI()
+# # Get HuggingFace token from environment variable
+# HF_TOKEN = os.getenv("HF_TOKEN")
+# if not HF_TOKEN:
+#     raise ValueError("HF_TOKEN environment variable is not set")
+# # Setup API key authorization
+# API_KEY_NAME = "Authorization"
+# api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=True)
+# # Initialize HuggingFace client
+# try:
+#     client = InferenceClient(
+#         "mistralai/Mixtral-8x7B-Instruct-v0.1",
+#         token=HF_TOKEN
+#     )
+#     # Verify token is valid
+#     hf_api = HfApi(token=HF_TOKEN)
+#     hf_api.whoami()
+# except Exception as e:
+#     raise ValueError(f"Failed to initialize HuggingFace client: {str(e)}")
+# class ChatMessage(BaseModel):
+#     role: str
+#     content: str
+# class GenerationRequest(BaseModel):
+#     prompt: str
+#     message: Optional[str] = None
+#     system_message: Optional[str] = None
+#     history: Optional[List[ChatMessage]] = None
+#     temperature: Optional[float] = 0.7
+#     top_p: Optional[float] = 0.95
+# def format_prompt(message: str, history: List[ChatMessage] = None, system_message: str = None) -> str:
+#     prompt = ""
+#     if system_message:
+#         prompt += f"<s>[INST] {system_message} [/INST]</s>"
+#     if history:
+#         for msg in history:
+#             if msg.role == "user":
+#                 prompt += f"<s>[INST] {msg.content} [/INST]"
+#             else:
+#                 prompt += f" {msg.content}</s>"
+#     prompt += f"<s>[INST] {message} [/INST]"
+#     return prompt
+# async def verify_token(api_key_header: str = Depends(api_key_header)):
+#     if not api_key_header.startswith("Bearer "):
+#         raise HTTPException(
+#             status_code=401,
+#             detail="Bearer token missing"
+#         )
+#     token = api_key_header.replace("Bearer ", "")
+#     if token != HF_TOKEN:
+#         raise HTTPException(
+#             status_code=401,
+#             detail="Invalid authentication credentials"
+#         )
+#     return token
+# @app.post("/generate/")
+# async def generate_text(
+#     request: GenerationRequest,
+#     token: str = Depends(verify_token)
+# ):
+#     try:
+#         message = request.prompt if request.prompt else request.message
+#         if not message:
+#             return [
+#                 {
+#                     "msg": "MSG!"
+#                 }
+#             ]
+#         formatted_prompt = format_prompt(
+#             message=message,
+#             history=request.history,
+#             system_message=request.system_message
+#         )
+#         response = client.text_generation(
+#             formatted_prompt,
+#             temperature=max(request.temperature, 0.01),
+#             top_p=request.top_p,
+#             max_new_tokens=1048,
+#             do_sample=True,
+#             return_full_text=False
+#         )
+#         if not response:
+#             return [
+#                 {
+#                     "detail": [
+#                         {
+#                             # "type": "server_error",
+#                             "loc": ["server"],
+#                             "msg": "No response received from model",
+#                             "input": None
+#                         }
+#                     ]
+#                 }
+#             ]
+#         # Construct the custom JSON response
+#         return [
+#             {
+#                 "msg": response
+#                 # "msg": [
+#                 #     {
+#                 #         # "type": "success",
+#                 #         # "loc":[
+#                 #         #    "body",
+#                 #         #    "prompt"
+#                 #         # ],
+#                 #         # "loc": ["body"],
+#                 #         # "msg": [
+#                 #         #     response,
+#                 #         #     formatted_prompt
+#                 #         # ],
+#                 #     }
+#                 # ]
+#             }
+#         ]
+#     except Exception as e:
+#         return [
+#             {
+#                 "detail": [
+#                     {
+#                         "type": "server_error",
+#                         "loc": ["server"],
+#                         "msg": f"Error generating response: {str(e)}",
+#                         "input": None
+#                     }
+#                 ]
+#             }
+#         ]
+# @app.get("/health")
+# async def health_check():
+#     return {
+#         "status": "healthy",
+#         "huggingface_client": "initialized",
+#         "auth_required": True
+#     }