Spaces:

Marroco93
/

PacmanAI-2

Sleeping

App Files Files Community

Marroco93 commited on Apr 1, 2024

Commit

ce8dee8

1 Parent(s): a0ed03b

mistralai again

Browse files

Files changed (1) hide show

main.py +12 -21

main.py CHANGED Viewed

@@ -5,39 +5,29 @@ from huggingface_hub import InferenceClient
 import uvicorn
 from typing import Generator
 import json  # Asegúrate de que esta línea esté al principio del archivo
-import torch
 app = FastAPI()
-# Initialize the InferenceClient with the Gemma-7b model
-client = InferenceClient("google/gemma-7b")
 class Item(BaseModel):
     prompt: str
     history: list
     system_prompt: str
     temperature: float = 0.8
-    max_new_tokens: int = 8000
     top_p: float = 0.15
     repetition_penalty: float = 1.0
 def format_prompt(message, history):
-    prompt = "<bos>"
-    # Add history to the prompt if there's any
-    if history:
-        for entry in history:
-            role = "user" if entry['role'] == "user" else "model"
-            prompt += f"<start_of_turn>{role}\n{entry['content']}<end_of_turn>"
-    # Add the current message
-    prompt += f"<start_of_turn>user\n{message}<end_of_turn><start_of_turn>model\n"
     return prompt
-# No changes needed in the format_prompt function unless the new model requires different prompt formatting
 def generate_stream(item: Item) -> Generator[bytes, None, None]:
     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
     generate_kwargs = {
@@ -51,16 +41,17 @@ def generate_stream(item: Item) -> Generator[bytes, None, None]:
     # Stream the response from the InferenceClient
     for response in client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True):
-        # Check if the 'details' flag and response structure are the same for the new model
         chunk = {
             "text": response.token.text,
-            "complete": response.generated_text is not None
         }
         yield json.dumps(chunk).encode("utf-8") + b"\n"
 @app.post("/generate/")
 async def generate_text(item: Item):
     return StreamingResponse(generate_stream(item), media_type="application/x-ndjson")
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import uvicorn
 from typing import Generator
 import json  # Asegúrate de que esta línea esté al principio del archivo
 app = FastAPI()
+# Initialize the InferenceClient with your model
+client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
 class Item(BaseModel):
     prompt: str
     history: list
     system_prompt: str
     temperature: float = 0.8
+    max_new_tokens: int = 9000
     top_p: float = 0.15
     repetition_penalty: float = 1.0
 def format_prompt(message, history):
+    prompt = "<s>"
+    for user_prompt, bot_response in history:
+        prompt += f"[INST] {user_prompt} [/INST]"
+        prompt += f" {bot_response}</s> "
+    prompt += f"[INST] {message} [/INST]"
     return prompt
 def generate_stream(item: Item) -> Generator[bytes, None, None]:
     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
     generate_kwargs = {
     # Stream the response from the InferenceClient
     for response in client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True):
+        # This assumes 'details=True' gives you a structure where you can access the text like this
         chunk = {
             "text": response.token.text,
+            "complete": response.generated_text is not None  # Adjust based on how you detect completion
         }
         yield json.dumps(chunk).encode("utf-8") + b"\n"
 @app.post("/generate/")
 async def generate_text(item: Item):
+    # Stream response back to the client
     return StreamingResponse(generate_stream(item), media_type="application/x-ndjson")
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)