TechyCode commited on
Commit
39cf95a
·
verified ·
1 Parent(s): f0cf5be

Upload 3 files

Browse files
Files changed (3) hide show
  1. best_model.pt +3 -0
  2. inference.py +51 -0
  3. model_meta.json +43 -0
best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:015516272417da498a4797a4dcb97d006bdba009a2583d6739853968bcce434b
3
+ size 860265152
inference.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # inference.py —— drop this next to best_model.pt for your webapp
3
+ import re, json, torch, torch.nn.functional as F
4
+ from pathlib import Path
5
+ from PIL import Image
6
+ from torchvision import transforms
7
+ from transformers import BertTokenizer
8
+ # Import your model class here:
9
+ # from model import MultimodalSentimentModel
10
+
11
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ META = json.load(open("deployment/model_meta.json"))
13
+ CONFIG = META["config"]
14
+
15
+ tokenizer = BertTokenizer.from_pretrained(CONFIG["BERT_MODEL"])
16
+
17
+ img_transform = transforms.Compose([
18
+ transforms.Resize((CONFIG["IMAGE_SIZE"], CONFIG["IMAGE_SIZE"])),
19
+ transforms.ToTensor(),
20
+ transforms.Normalize(META["img_mean"], META["img_std"]),
21
+ ])
22
+
23
+ def load_model():
24
+ model = MultimodalSentimentModel(CONFIG).to(DEVICE)
25
+ ckpt = torch.load("deployment/best_model.pt", map_location=DEVICE)
26
+ model.load_state_dict(ckpt["model_state"])
27
+ model.eval()
28
+ return model
29
+
30
+ def predict(model, text: str, image_path: str) -> dict:
31
+ text = re.sub(r"http\S+", "", text)
32
+ text = re.sub(r"@\w+", "", text)
33
+ text = re.sub(r"#(\w+)", r"\1", text).strip() or "no text"
34
+
35
+ enc = tokenizer(text, max_length=CONFIG["MAX_TEXT_LEN"],
36
+ padding="max_length", truncation=True, return_tensors="pt")
37
+ input_ids = enc["input_ids"].to(DEVICE)
38
+ attention_mask = enc["attention_mask"].to(DEVICE)
39
+
40
+ img = img_transform(Image.open(image_path).convert("RGB")).unsqueeze(0).to(DEVICE)
41
+
42
+ with torch.no_grad():
43
+ logits = model(input_ids, attention_mask, img)
44
+ probs = F.softmax(logits, dim=-1).cpu().numpy()[0]
45
+
46
+ pred_idx = probs.argmax()
47
+ return {
48
+ "label" : META["label_names"][pred_idx],
49
+ "confidence" : float(probs[pred_idx]),
50
+ "probabilities": {n: float(p) for n, p in zip(META["label_names"], probs)},
51
+ }
model_meta.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "ZIP_PATH": "MVSA_Single.zip",
4
+ "EXTRACT_DIR": "./mvsa_single",
5
+ "OUTPUT_DIR": "./outputs",
6
+ "BERT_MODEL": "bert-base-uncased",
7
+ "MAX_TEXT_LEN": 128,
8
+ "IMAGE_SIZE": 224,
9
+ "HIDDEN_DIM": 512,
10
+ "DROPOUT": 0.3,
11
+ "NUM_CLASSES": 3,
12
+ "BATCH_SIZE": 16,
13
+ "NUM_EPOCHS": 20,
14
+ "LR": 2e-05,
15
+ "WEIGHT_DECAY": 0.0001,
16
+ "PATIENCE": 4,
17
+ "GRAD_CLIP": 1.0,
18
+ "TEST_SIZE": 0.15,
19
+ "VAL_SIZE": 0.15
20
+ },
21
+ "label_map": {
22
+ "negative": 0,
23
+ "neutral": 1,
24
+ "positive": 2
25
+ },
26
+ "label_names": [
27
+ "Negative",
28
+ "Neutral",
29
+ "Positive"
30
+ ],
31
+ "img_mean": [
32
+ 0.485,
33
+ 0.456,
34
+ 0.406
35
+ ],
36
+ "img_std": [
37
+ 0.229,
38
+ 0.224,
39
+ 0.225
40
+ ],
41
+ "test_acc": 0.6963064295485636,
42
+ "test_f1": 0.6968211025395353
43
+ }