Spaces:

loleg
/

fastapi-apertus

Runtime error

Oleg Lavrovsky commited on Sep 13

Commit

f8c7edf

unverified ·

1 Parent(s): 41dfffc

Completion API

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ from contextlib import asynccontextmanager
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, ValidationError
 from torch import cuda
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -40,10 +41,16 @@ class ModelResponse(BaseModel):
     confidence: float
     processing_time: float
 class Completion(BaseModel):
-    model: str
-    prompt: str
-    max_tokens: int = 65536
 @asynccontextmanager
 async def lifespan(app: FastAPI):
@@ -104,13 +111,10 @@ def fit_to_length(text, min_length=3, max_length=100):
     return text
-def get_model_reponse(query: str):
     """Process the text content."""
     # Prepare the model input
-    messages_think = [
-        {"role": "user", "content": query}
-    ]
     text = tokenizer.apply_chat_template(
         messages_think,
         tokenize=False,
@@ -144,9 +148,7 @@ async def completion(data: Completion):
         raise HTTPException(status_code=503, detail="Model not loaded")
     try:
-        text = fit_to_length(data.prompt, 3, data.max_tokens)
-        result = get_model_reponse(text, model)
         return {
             "choices": [
@@ -181,7 +183,10 @@ async def predict(q: str):
         text = fit_to_length(input_data.text, input_data.min_length, input_data.max_length)
-        result = get_model_reponse(text, model)
         # Checkpoint
         processing_time = time.time() - start_time

 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, ValidationError
+from typing import List, Optional
 from torch import cuda
 from transformers import AutoModelForCausalLM, AutoTokenizer
     confidence: float
     processing_time: float
+class ChatMessage(BaseModel):
+    role: str
+    content: str
 class Completion(BaseModel):
+    model: str = "apertus"
+    messages: List[ChatMessage]
+    max_tokens: Optional[int] = 512
+    temperature: Optional[float] = 0.1
+    top_p: Optional[float] = 0.9
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     return text
+def get_model_reponse(messages_think):
     """Process the text content."""
     # Prepare the model input
     text = tokenizer.apply_chat_template(
         messages_think,
         tokenize=False,
         raise HTTPException(status_code=503, detail="Model not loaded")
     try:
+        result = get_model_reponse(data)
         return {
             "choices": [
         text = fit_to_length(input_data.text, input_data.min_length, input_data.max_length)
+        messages_think = [
+            {"role": "user", "content": text}
+        ]
+        result = get_model_reponse(messages_think)
         # Checkpoint
         processing_time = time.time() - start_time