Upload 3 files
Browse files- best_model.pt +3 -0
- inference.py +51 -0
- model_meta.json +43 -0
best_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:015516272417da498a4797a4dcb97d006bdba009a2583d6739853968bcce434b
|
| 3 |
+
size 860265152
|
inference.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# inference.py —— drop this next to best_model.pt for your webapp
|
| 3 |
+
import re, json, torch, torch.nn.functional as F
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from PIL import Image
|
| 6 |
+
from torchvision import transforms
|
| 7 |
+
from transformers import BertTokenizer
|
| 8 |
+
# Import your model class here:
|
| 9 |
+
# from model import MultimodalSentimentModel
|
| 10 |
+
|
| 11 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 12 |
+
META = json.load(open("deployment/model_meta.json"))
|
| 13 |
+
CONFIG = META["config"]
|
| 14 |
+
|
| 15 |
+
tokenizer = BertTokenizer.from_pretrained(CONFIG["BERT_MODEL"])
|
| 16 |
+
|
| 17 |
+
img_transform = transforms.Compose([
|
| 18 |
+
transforms.Resize((CONFIG["IMAGE_SIZE"], CONFIG["IMAGE_SIZE"])),
|
| 19 |
+
transforms.ToTensor(),
|
| 20 |
+
transforms.Normalize(META["img_mean"], META["img_std"]),
|
| 21 |
+
])
|
| 22 |
+
|
| 23 |
+
def load_model():
|
| 24 |
+
model = MultimodalSentimentModel(CONFIG).to(DEVICE)
|
| 25 |
+
ckpt = torch.load("deployment/best_model.pt", map_location=DEVICE)
|
| 26 |
+
model.load_state_dict(ckpt["model_state"])
|
| 27 |
+
model.eval()
|
| 28 |
+
return model
|
| 29 |
+
|
| 30 |
+
def predict(model, text: str, image_path: str) -> dict:
|
| 31 |
+
text = re.sub(r"http\S+", "", text)
|
| 32 |
+
text = re.sub(r"@\w+", "", text)
|
| 33 |
+
text = re.sub(r"#(\w+)", r"\1", text).strip() or "no text"
|
| 34 |
+
|
| 35 |
+
enc = tokenizer(text, max_length=CONFIG["MAX_TEXT_LEN"],
|
| 36 |
+
padding="max_length", truncation=True, return_tensors="pt")
|
| 37 |
+
input_ids = enc["input_ids"].to(DEVICE)
|
| 38 |
+
attention_mask = enc["attention_mask"].to(DEVICE)
|
| 39 |
+
|
| 40 |
+
img = img_transform(Image.open(image_path).convert("RGB")).unsqueeze(0).to(DEVICE)
|
| 41 |
+
|
| 42 |
+
with torch.no_grad():
|
| 43 |
+
logits = model(input_ids, attention_mask, img)
|
| 44 |
+
probs = F.softmax(logits, dim=-1).cpu().numpy()[0]
|
| 45 |
+
|
| 46 |
+
pred_idx = probs.argmax()
|
| 47 |
+
return {
|
| 48 |
+
"label" : META["label_names"][pred_idx],
|
| 49 |
+
"confidence" : float(probs[pred_idx]),
|
| 50 |
+
"probabilities": {n: float(p) for n, p in zip(META["label_names"], probs)},
|
| 51 |
+
}
|
model_meta.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"ZIP_PATH": "MVSA_Single.zip",
|
| 4 |
+
"EXTRACT_DIR": "./mvsa_single",
|
| 5 |
+
"OUTPUT_DIR": "./outputs",
|
| 6 |
+
"BERT_MODEL": "bert-base-uncased",
|
| 7 |
+
"MAX_TEXT_LEN": 128,
|
| 8 |
+
"IMAGE_SIZE": 224,
|
| 9 |
+
"HIDDEN_DIM": 512,
|
| 10 |
+
"DROPOUT": 0.3,
|
| 11 |
+
"NUM_CLASSES": 3,
|
| 12 |
+
"BATCH_SIZE": 16,
|
| 13 |
+
"NUM_EPOCHS": 20,
|
| 14 |
+
"LR": 2e-05,
|
| 15 |
+
"WEIGHT_DECAY": 0.0001,
|
| 16 |
+
"PATIENCE": 4,
|
| 17 |
+
"GRAD_CLIP": 1.0,
|
| 18 |
+
"TEST_SIZE": 0.15,
|
| 19 |
+
"VAL_SIZE": 0.15
|
| 20 |
+
},
|
| 21 |
+
"label_map": {
|
| 22 |
+
"negative": 0,
|
| 23 |
+
"neutral": 1,
|
| 24 |
+
"positive": 2
|
| 25 |
+
},
|
| 26 |
+
"label_names": [
|
| 27 |
+
"Negative",
|
| 28 |
+
"Neutral",
|
| 29 |
+
"Positive"
|
| 30 |
+
],
|
| 31 |
+
"img_mean": [
|
| 32 |
+
0.485,
|
| 33 |
+
0.456,
|
| 34 |
+
0.406
|
| 35 |
+
],
|
| 36 |
+
"img_std": [
|
| 37 |
+
0.229,
|
| 38 |
+
0.224,
|
| 39 |
+
0.225
|
| 40 |
+
],
|
| 41 |
+
"test_acc": 0.6963064295485636,
|
| 42 |
+
"test_f1": 0.6968211025395353
|
| 43 |
+
}
|