Icarevic commited on
Commit
5bf80cd
·
verified ·
1 Parent(s): b3ab79e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -46
app.py CHANGED
@@ -8,16 +8,12 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
 
9
  print("Pokrećem aplikaciju...")
10
 
11
- # --- Učitavanje SVM pipelinea ---
12
- print("Učitavam SVM pipeline...")
13
  svm_pipeline = joblib.load("svm_pipeline.pkl")
14
 
15
- # --- Učitavanje riječnika za CNN i GRU ---
16
- print("Učitavam riječnik...")
17
  with open("word2idx.json", "r", encoding="utf-8") as f:
18
  word2idx = json.load(f)
19
 
20
- # --- Definicija CNN modela ---
21
  class CNNModel(nn.Module):
22
  def __init__(self, vocab_size, embed_dim=300, num_classes=3, kernel_sizes=[3,4,5], num_filters=128):
23
  super(CNNModel, self).__init__()
@@ -27,7 +23,6 @@ class CNNModel(nn.Module):
27
  ])
28
  self.dropout = nn.Dropout(0.5)
29
  self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes)
30
-
31
  def forward(self, x):
32
  x = self.embedding(x).unsqueeze(1)
33
  convs = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
@@ -36,124 +31,119 @@ class CNNModel(nn.Module):
36
  x = self.dropout(x)
37
  return self.fc(x)
38
 
39
- # --- Definicija GRU modela ---
40
  class GRUModel(nn.Module):
41
  def __init__(self, vocab_size, embed_dim=300, hidden_dim=256, num_layers=1, num_classes=3):
42
  super(GRUModel, self).__init__()
43
  self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
44
  self.gru = nn.GRU(embed_dim, hidden_dim, num_layers=num_layers, batch_first=True)
45
  self.fc = nn.Linear(hidden_dim, num_classes)
46
-
47
  def forward(self, x):
48
  x = self.embedding(x)
49
  _, h_n = self.gru(x)
50
  out = self.fc(h_n[-1])
51
  return out
52
 
53
- # --- Učitavanje CNN i GRU modela ---
54
  vocab_size = len(word2idx) + 1
55
  embed_dim = 300
56
  num_classes = 3
57
 
58
- print("Učitavam CNN model...")
59
  cnn_model = CNNModel(vocab_size, embed_dim, num_classes)
60
  cnn_model.load_state_dict(torch.load("cnn_model.pt", map_location=torch.device('cpu')))
61
  cnn_model.eval()
62
 
63
- print("Učitavam GRU model...")
64
  gru_model = GRUModel(vocab_size, embed_dim, hidden_dim=256, num_layers=1, num_classes=num_classes)
65
  gru_model.load_state_dict(torch.load("gru_model.pt", map_location=torch.device('cpu')))
66
  gru_model.eval()
67
 
68
- # --- Učitavanje BERTić modela i tokenizer ---
69
- print("Učitavam BERTić model i tokenizer...")
70
  bert_tokenizer = AutoTokenizer.from_pretrained("my_finetuned_model")
71
  bert_model = AutoModelForSequenceClassification.from_pretrained("my_finetuned_model")
72
  bert_model.eval()
73
 
74
- # --- Rječnik za mapiranje oznaka ---
75
  label_names = {0: 'pozitivno', 1: 'neutralno', 2: 'negativno'}
76
 
77
- # --- Pretvaranje teksta u indekse za CNN i GRU ---
78
  def text_to_indices(text, max_len=100):
79
  tokens = text.lower().split()
80
- print(f"Tokeni: {tokens}")
81
  indices = [word2idx.get(token, 0) for token in tokens]
82
- print(f"Indeksi: {indices}")
83
  if len(indices) < max_len:
84
  indices += [0] * (max_len - len(indices))
85
  else:
86
  indices = indices[:max_len]
87
  tensor = torch.tensor([indices], dtype=torch.long)
88
- print(f"Tensor shape: {tensor.shape}")
89
  return tensor
90
 
91
- # --- Funkcije za predikciju ---
92
-
93
  def predict_svm(text):
94
- print(f"Predikcija SVM za tekst: {text}")
95
  proba = svm_pipeline.predict_proba([text])[0]
96
  pred = svm_pipeline.classes_[proba.argmax()]
97
- print(f"SVM predikcija: {pred}, povjerenje: {proba.max():.2f}")
98
  return f"{label_names[pred]} (p={proba.max():.2f})"
99
 
100
  def predict_cnn(text):
101
- print(f"Predikcija CNN za tekst: {text}")
102
  with torch.no_grad():
103
  inputs = text_to_indices(text)
104
  outputs = cnn_model(inputs)
105
- print(f"CNN output: {outputs}")
106
  probs = F.softmax(outputs, dim=1)
107
  pred = torch.argmax(probs, dim=1).item()
108
  confidence = probs[0][pred].item()
109
- print(f"CNN predikcija: {pred}, povjerenje: {confidence:.2f}")
110
  return f"{label_names[pred]} (p={confidence:.2f})"
111
 
112
  def predict_gru(text):
113
- print(f"Predikcija GRU za tekst: {text}")
114
  with torch.no_grad():
115
  inputs = text_to_indices(text)
116
  outputs = gru_model(inputs)
117
- print(f"GRU output: {outputs}")
118
  probs = F.softmax(outputs, dim=1)
119
  pred = torch.argmax(probs, dim=1).item()
120
  confidence = probs[0][pred].item()
121
- print(f"GRU predikcija: {pred}, povjerenje: {confidence:.2f}")
122
  return f"{label_names[pred]} (p={confidence:.2f})"
123
 
124
  def predict_bert(text):
125
- print(f"Predikcija BERTić za tekst: {text}")
126
  inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
127
  with torch.no_grad():
128
  outputs = bert_model(**inputs)
129
- print(f"BERTić output logits: {outputs.logits}")
130
  probs = F.softmax(outputs.logits, dim=1)
131
  pred = torch.argmax(probs, dim=1).item()
132
  confidence = probs[0][pred].item()
133
- print(f"BERTić predikcija: {pred}, povjerenje: {confidence:.2f}")
134
  return f"{label_names[pred]} (p={confidence:.2f})"
135
 
136
- # --- Gradio sučelje ---
137
  def predict_all(text):
138
  return (
139
  predict_svm(text),
140
  predict_cnn(text),
141
  predict_gru(text),
142
- predict_bert(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  )
144
 
145
- demo = gr.Interface(
146
- fn=predict_all,
147
- inputs=gr.Textbox(lines=3, placeholder="Upiši tekst za klasifikaciju..."),
148
- outputs=[
149
- gr.Textbox(label="SVM (RBF)"),
150
- gr.Textbox(label="CNN"),
151
- gr.Textbox(label="GRU"),
152
- gr.Textbox(label="BERTić")
153
- ],
154
- title="Demo klasifikacije teksta",
155
- description="Predikcije koriste SVM, CNN, GRU i BERTić modele."
156
- )
 
 
 
 
 
 
 
 
157
 
158
  if __name__ == "__main__":
159
- demo.launch(share=True, debug=True)
 
8
 
9
  print("Pokrećem aplikaciju...")
10
 
11
+ # --- Učitavanje modela i riječnika ---
 
12
  svm_pipeline = joblib.load("svm_pipeline.pkl")
13
 
 
 
14
  with open("word2idx.json", "r", encoding="utf-8") as f:
15
  word2idx = json.load(f)
16
 
 
17
  class CNNModel(nn.Module):
18
  def __init__(self, vocab_size, embed_dim=300, num_classes=3, kernel_sizes=[3,4,5], num_filters=128):
19
  super(CNNModel, self).__init__()
 
23
  ])
24
  self.dropout = nn.Dropout(0.5)
25
  self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes)
 
26
  def forward(self, x):
27
  x = self.embedding(x).unsqueeze(1)
28
  convs = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
 
31
  x = self.dropout(x)
32
  return self.fc(x)
33
 
 
34
  class GRUModel(nn.Module):
35
  def __init__(self, vocab_size, embed_dim=300, hidden_dim=256, num_layers=1, num_classes=3):
36
  super(GRUModel, self).__init__()
37
  self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
38
  self.gru = nn.GRU(embed_dim, hidden_dim, num_layers=num_layers, batch_first=True)
39
  self.fc = nn.Linear(hidden_dim, num_classes)
 
40
  def forward(self, x):
41
  x = self.embedding(x)
42
  _, h_n = self.gru(x)
43
  out = self.fc(h_n[-1])
44
  return out
45
 
 
46
  vocab_size = len(word2idx) + 1
47
  embed_dim = 300
48
  num_classes = 3
49
 
 
50
  cnn_model = CNNModel(vocab_size, embed_dim, num_classes)
51
  cnn_model.load_state_dict(torch.load("cnn_model.pt", map_location=torch.device('cpu')))
52
  cnn_model.eval()
53
 
 
54
  gru_model = GRUModel(vocab_size, embed_dim, hidden_dim=256, num_layers=1, num_classes=num_classes)
55
  gru_model.load_state_dict(torch.load("gru_model.pt", map_location=torch.device('cpu')))
56
  gru_model.eval()
57
 
 
 
58
  bert_tokenizer = AutoTokenizer.from_pretrained("my_finetuned_model")
59
  bert_model = AutoModelForSequenceClassification.from_pretrained("my_finetuned_model")
60
  bert_model.eval()
61
 
 
62
  label_names = {0: 'pozitivno', 1: 'neutralno', 2: 'negativno'}
63
 
 
64
  def text_to_indices(text, max_len=100):
65
  tokens = text.lower().split()
 
66
  indices = [word2idx.get(token, 0) for token in tokens]
 
67
  if len(indices) < max_len:
68
  indices += [0] * (max_len - len(indices))
69
  else:
70
  indices = indices[:max_len]
71
  tensor = torch.tensor([indices], dtype=torch.long)
 
72
  return tensor
73
 
 
 
74
  def predict_svm(text):
 
75
  proba = svm_pipeline.predict_proba([text])[0]
76
  pred = svm_pipeline.classes_[proba.argmax()]
 
77
  return f"{label_names[pred]} (p={proba.max():.2f})"
78
 
79
  def predict_cnn(text):
 
80
  with torch.no_grad():
81
  inputs = text_to_indices(text)
82
  outputs = cnn_model(inputs)
 
83
  probs = F.softmax(outputs, dim=1)
84
  pred = torch.argmax(probs, dim=1).item()
85
  confidence = probs[0][pred].item()
 
86
  return f"{label_names[pred]} (p={confidence:.2f})"
87
 
88
  def predict_gru(text):
 
89
  with torch.no_grad():
90
  inputs = text_to_indices(text)
91
  outputs = gru_model(inputs)
 
92
  probs = F.softmax(outputs, dim=1)
93
  pred = torch.argmax(probs, dim=1).item()
94
  confidence = probs[0][pred].item()
 
95
  return f"{label_names[pred]} (p={confidence:.2f})"
96
 
97
  def predict_bert(text):
 
98
  inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
99
  with torch.no_grad():
100
  outputs = bert_model(**inputs)
 
101
  probs = F.softmax(outputs.logits, dim=1)
102
  pred = torch.argmax(probs, dim=1).item()
103
  confidence = probs[0][pred].item()
 
104
  return f"{label_names[pred]} (p={confidence:.2f})"
105
 
 
106
  def predict_all(text):
107
  return (
108
  predict_svm(text),
109
  predict_cnn(text),
110
  predict_gru(text),
111
+ predict_bert(text),
112
+ )
113
+
114
+ def clear_all():
115
+ return "", "", "", "", ""
116
+
117
+ with gr.Blocks() as demo:
118
+ # Naslov veći, centriran
119
+ gr.Markdown(
120
+ """
121
+ <h1 style="text-align: center; font-size: 48px; margin-bottom: 5px;">Analiza sentimenta</h1>
122
+ <p style="text-align: center; font-size: 16px; margin-top: 0;">Predikcije koriste SVM, CNN, GRU i BERTić modele.</p>
123
+ """,
124
+ elem_id="naslov"
125
  )
126
 
127
+ input_text = gr.Textbox(lines=3, label="Unesite rečenicu za analizu:")
128
+
129
+ with gr.Row():
130
+ submit_btn = gr.Button("Submit", variant="primary")
131
+ clear_btn = gr.Button("Clear", variant="secondary")
132
+
133
+ with gr.Row():
134
+ with gr.Column():
135
+ gr.Markdown("### Machine Learning")
136
+ svm_output = gr.Textbox(label="SVM (RBF)")
137
+ with gr.Column():
138
+ gr.Markdown("### Deep Learning")
139
+ cnn_output = gr.Textbox(label="CNN")
140
+ gru_output = gr.Textbox(label="GRU")
141
+ with gr.Column():
142
+ gr.Markdown("### Transformers")
143
+ bert_output = gr.Textbox(label="BERTić")
144
+
145
+ submit_btn.click(fn=predict_all, inputs=input_text, outputs=[svm_output, cnn_output, gru_output, bert_output])
146
+ clear_btn.click(fn=clear_all, inputs=None, outputs=[input_text, svm_output, cnn_output, gru_output, bert_output])
147
 
148
  if __name__ == "__main__":
149
+ demo.launch(share=True)