File size: 2,683 Bytes
5285b7f
 
 
 
42c019f
5285b7f
 
 
fe9a4a8
5285b7f
42c019f
86f28e8
 
42c019f
86f28e8
a7e030a
 
42c019f
 
 
5285b7f
568e03b
86f28e8
 
 
 
42c019f
86f28e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d29b90a
42c019f
84d265e
42c019f
 
 
 
 
 
 
 
 
 
7725e91
42c019f
 
5285b7f
7725e91
42c019f
5285b7f
 
42c019f
568e03b
42c019f
9656b2d
7725e91
42c019f
5285b7f
 
 
42c019f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import datasets
import numpy as np
import torch
import transformers
from config import epochs, batch_size, learning_rate, id2label
from model import tokenizer, multitask_model
from mtm import MultitaskTrainer, NLPDataCollator, DataLoaderWithTaskname
import pandas as pd
from datasets import Dataset, DatasetDict
from data_predict import convert_to_stsb_features,convert_to_features
import gradio as gr
from huggingface_hub import hf_hub_download,snapshot_download

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Version 1 -  Croatian Document + Slovenian Document.
model_link = hf_hub_download(repo_id="FFZG-cleopatra/Croatian-Document-News-Sentiment-Classifier",filename = "pytorch_model.bin")

multitask_model.load_state_dict(torch.load(model_link, map_location=device))
multitask_model.to(device)

def predict_sentiment(sentence = "Volim ti"):
    # gather everyone if you want to have a single DatasetDict
    document = DatasetDict({
        # "train": Dataset.from_pandas(df_document_sl_hr_train),
        # "valid": Dataset.from_pandas(df_document_sl_hr_valid),
        "test": Dataset.from_dict({"content":[sentence]})
    })
    
    dataset_dict = {
        "document": document,
    }
    
    for task_name, dataset in dataset_dict.items():
        print(task_name)
        print(dataset_dict[task_name]["test"][0])
        print()
    
    
    convert_func_dict = {
        "document": convert_to_stsb_features,
        # "paragraph": convert_to_stsb_features,
        # "sentence": convert_to_stsb_features,
    }
    
    features_dict = convert_to_features(dataset_dict, convert_func_dict)
   
    predictions = []
    
    for _, batch in enumerate(features_dict["document"]['test']):
        for key, value in batch.items():
            batch[key] = batch[key].to(device)
        
        task_model = multitask_model.get_model("document")
        classifier_output = task_model.forward(
                torch.unsqueeze(batch["input_ids"], 0),
                torch.unsqueeze(batch["attention_mask"], 0),)
        
        print(tokenizer.decode(batch["input_ids"],skip_special_tokens=True))
        print("logits:",classifier_output.logits)
        prediction =torch.max(classifier_output.logits, axis=1)
        predictions.append(prediction.indices.item())
    
    print("predictions:", predictions[0] , id2label[predictions[0]] )
    return id2label[predictions[0]]


interface = gr.Interface(
    fn=predict_sentiment,
    inputs='text',
    outputs=['label'],
    title='Croatian News Sentiment Analysis 1.0',
    description='Get the positive/neutral/negative sentiment for the given input.'
)


interface.launch(inline = False)