Spaces:

theachyuttiwari
/

lfqa1

Build error

App Files Files Community

Achyut Tiwari commited on Jun 8, 2022

Commit

8c7ad44

•

1 Parent(s): e0ccdc1

Add files via upload

Browse files

Files changed (4) hide show

lfqa_server/Dockerfile +19 -0
lfqa_server/__init__.py +0 -0
lfqa_server/main.py +130 -0
lfqa_server/requirements.txt +7 -0

lfqa_server/Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM nvidia/cuda:11.2.2-runtime-ubuntu20.04
+#set up environment
+RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y curl
+RUN apt-get install unzip
+RUN apt-get -y install python3
+RUN apt-get -y install python3-pip
+WORKDIR /code
+ENV HF_HOME=/code/cache
+COPY ./requirements.txt /code/requirements.txt
+RUN pip3 install torch==1.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
+RUN pip3 install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY ./main.py /code/app/main.py
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8080"]

lfqa_server/__init__.py ADDED Viewed

File without changes

lfqa_server/main.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import torch
+from fastapi import FastAPI, Depends, status
+from fastapi.responses import PlainTextResponse
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import time
+from typing import Dict, List, Optional
+import jwt
+from decouple import config
+from fastapi import Request, HTTPException
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+JWT_SECRET = config("secret")
+JWT_ALGORITHM = config("algorithm")
+app = FastAPI()
+app.ready = False
+device = ("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained('vblagoje/bart_lfqa')
+model = AutoModelForSeq2SeqLM.from_pretrained('vblagoje/bart_lfqa').to(device)
+_ = model.eval()
+class JWTBearer(HTTPBearer):
+    def __init__(self, auto_error: bool = True):
+        super(JWTBearer, self).__init__(auto_error=auto_error)
+    async def __call__(self, request: Request):
+        credentials: HTTPAuthorizationCredentials = await super(JWTBearer, self).__call__(request)
+        if credentials:
+            if not credentials.scheme == "Bearer":
+                raise HTTPException(status_code=403, detail="Invalid authentication scheme.")
+            if not self.verify_jwt(credentials.credentials):
+                raise HTTPException(status_code=403, detail="Invalid token or expired token.")
+            return credentials.credentials
+        else:
+            raise HTTPException(status_code=403, detail="Invalid authorization code.")
+    def verify_jwt(self, jwtoken: str) -> bool:
+        isTokenValid: bool = False
+        try:
+            payload = decodeJWT(jwtoken)
+        except:
+            payload = None
+        if payload:
+            isTokenValid = True
+        return isTokenValid
+def token_response(token: str):
+    return {
+        "access_token": token
+    }
+def signJWT(user_id: str) -> Dict[str, str]:
+    payload = {
+        "user_id": user_id,
+        "expires": time.time() + 6000
+    }
+    token = jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
+    return token_response(token)
+def decodeJWT(token: str) -> dict:
+    try:
+        decoded_token = jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
+        return decoded_token if decoded_token["expires"] >= time.time() else None
+    except:
+        return {}
+class LFQAParameters(BaseModel):
+    min_length: int = 50
+    max_length: int = 250
+    do_sample: bool = False
+    early_stopping: bool = True
+    num_beams: int = 8
+    temperature: float = 1.0
+    top_k: float = None
+    top_p: float = None
+    no_repeat_ngram_size: int = 3
+    num_return_sequences: int = 1
+class InferencePayload(BaseModel):
+    model_input: str
+    parameters: Optional[LFQAParameters] = LFQAParameters()
+@app.on_event("startup")
+def startup():
+    app.ready = True
+@app.get("/healthz")
+def healthz():
+    if app.ready:
+        return PlainTextResponse("ok")
+    return PlainTextResponse("service unavailable", status_code=status.HTTP_503_SERVICE_UNAVAILABLE)
+@app.post("/generate/", dependencies=[Depends(JWTBearer())])
+def generate(context: InferencePayload):
+    model_input = tokenizer(context.model_input, truncation=True, padding=True, return_tensors="pt")
+    param = context.parameters
+    generated_answers_encoded = model.generate(input_ids=model_input["input_ids"].to(device),
+                                               attention_mask=model_input["attention_mask"].to(device),
+                                               min_length=param.min_length,
+                                               max_length=param.max_length,
+                                               do_sample=param.do_sample,
+                                               early_stopping=param.early_stopping,
+                                               num_beams=param.num_beams,
+                                               temperature=param.temperature,
+                                               top_k=param.top_k,
+                                               top_p=param.top_p,
+                                               no_repeat_ngram_size=param.no_repeat_ngram_size,
+                                               num_return_sequences=param.num_return_sequences)
+    answers = tokenizer.batch_decode(generated_answers_encoded, skip_special_tokens=True,
+                                     clean_up_tokenization_spaces=True)
+    results = []
+    for answer in answers:
+        results.append({"generated_text": answer})
+    return results

lfqa_server/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+datasets
+transformers
+fastapi
+faiss-gpu
+uvicorn[standard]
+PyJWT==1.7.1
+python-decouple==3.3