Spaces:

service-internal
/

sentiment-analysis

Sleeping

App Files Files Community

service-internal commited on 30 days ago

Commit

b053f3d

verified ·

1 Parent(s): a2004de

Upload main.py

Browse files

Files changed (1) hide show

main.py +50 -44

main.py CHANGED Viewed

@@ -1,44 +1,50 @@
-from fastapi import FastAPI, Request
-from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
-from scipy.special import softmax
-import numpy as np
-import uvicorn
-app = FastAPI()
-# Load model and tokenizer
-MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
-tokenizer = AutoTokenizer.from_pretrained(MODEL)
-config = AutoConfig.from_pretrained(MODEL)
-model = AutoModelForSequenceClassification.from_pretrained(MODEL)
-# Preprocessing function
-def preprocess(text):
-    tokens = []
-    for t in text.split():
-        if t.startswith("@") and len(t) > 1:
-            t = "@user"
-        elif t.startswith("http"):
-            t = "http"
-        tokens.append(t)
-    return " ".join(tokens)
-# Inference route
-@app.post("/analyze")
-async def analyze(request: Request):
-    data = await request.json()
-    text = preprocess(data.get("text", ""))
-    encoded_input = tokenizer(text, return_tensors='pt')
-    output = model(**encoded_input)
-    scores = output[0][0].detach().numpy()
-    scores = softmax(scores)
-    ranking = np.argsort(scores)[::-1]
-    result = []
-    for i in ranking:
-        label = config.id2label[i]
-        score = round(float(scores[i]), 4)
-        result.append({"label": label, "score": score})
-    return {"result": result}

+from fastapi import FastAPI, Request
+from transformers import AutoModelForSequenceClassification, AutoConfig, RobertaTokenizer
+from scipy.special import softmax
+import numpy as np
+import os
+app = FastAPI()
+# Set HF cache and home directory to writable path
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache"
+os.environ["HF_HOME"] = "/tmp/hf-home"
+# Model and tokenizer setup
+MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
+TOKENIZER_MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
+tokenizer = RobertaTokenizer.from_pretrained(TOKENIZER_MODEL)
+config = AutoConfig.from_pretrained(MODEL)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+# Preprocessing
+def preprocess(text):
+    tokens = []
+    for t in text.split():
+        if t.startswith("@") and len(t) > 1:
+            t = "@user"
+        elif t.startswith("http"):
+            t = "http"
+        tokens.append(t)
+    return " ".join(tokens)
+# Endpoint
+@app.post("/analyze")
+async def analyze(request: Request):
+    data = await request.json()
+    text = preprocess(data.get("text", ""))
+    encoded_input = tokenizer(text, return_tensors='pt')
+    output = model(**encoded_input)
+    scores = output[0][0].detach().numpy()
+    scores = softmax(scores)
+    ranking = np.argsort(scores)[::-1]
+    result = []
+    for i in ranking:
+        label = config.id2label[i]
+        score = round(float(scores[i]), 4)
+        result.append({"label": label, "score": score})
+    return {"result": result}