Spaces:

Bagi4
/

textClassificator

Runtime error

App Files Files Community

Bagi4 commited on Nov 24, 2023

Commit

f791980

•

1 Parent(s): e155850

feat: new model

Browse files

Files changed (1) hide show

main.py +47 -3

main.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import logging
 import uvicorn
 from fastapi import FastAPI
 from pydantic import BaseModel
 from transformers import pipeline
@@ -13,6 +16,17 @@ logging.basicConfig(
     datefmt='%Y-%m-%d %H:%M:%S'
 )
 classifier = pipeline("zero-shot-classification", model="models/classificator", use_fast=False)
 app = FastAPI()
@@ -28,16 +42,46 @@ class ResponseData(BaseModel):
     scores: list[float]
 @app.post("/classify", response_model=ResponseData, tags=["Classificator"])
 async def classify_text(data: RequestData):
-    result = classifier(data.sequence, data.labels, multi_label=data.multiLabel)
     logging.info(result)
     return result
 @app.get("/ping", tags=["TEST"])
-def ping():
     return "pong"

 import logging
 import uvicorn
+from transformers import AutoTokenizer, AutoModel
+import torch
+import torch.nn.functional as F
 from fastapi import FastAPI
 from pydantic import BaseModel
 from transformers import pipeline
     datefmt='%Y-%m-%d %H:%M:%S'
 )
 classifier = pipeline("zero-shot-classification", model="models/classificator", use_fast=False)
+def mean_pooling(model_output, attention_mask):
+    token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
 app = FastAPI()
     scores: list[float]
+def classify(data: RequestData):
+    return classifier(data.sequence, data.labels, multi_label=data.multiLabel)
+def similarity(data: RequestData):
+    sentences = [data.sequence]
+    sentences.extend(data.labels)
+    encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
+    with torch.no_grad():
+        model_output = model(**encoded_input)
+    sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
+    sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
+    text_probs = sentence_embeddings[:1] @ sentence_embeddings[1:].T
+    return text_probs.tolist()[0]
 @app.post("/classify", response_model=ResponseData, tags=["Classificator"])
 async def classify_text(data: RequestData):
+    result = classify(data)
     logging.info(result)
     return result
+@app.post("/similarity", response_model=ResponseData, tags=["Similarity"])
+async def classify_text(data: RequestData):
+    result = similarity(data)
+    logging.info(result)
+    return ResponseData.model_validate({
+        "sequence": data.sequence,
+        "labels": data.labels,
+        "scores": result
+    })
 @app.get("/ping", tags=["TEST"])
+async def ping():
     return "pong"