File size: 7,757 Bytes
700864a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ecc8d1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# -*- coding: utf-8 -*-
"""final_gradio.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1Laxh069wv-cX4NqcOnNB2AyEDW0gmvbL
"""

from google.colab import drive
drive.mount('/content/drive')

!pip install gradio
!pip install transformers==3.0.2
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import seaborn as sns
import transformers
import json
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaModel, RobertaTokenizer
import logging
logging.basicConfig(level=logging.ERROR)
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

class RobertaClass(torch.nn.Module):
    def __init__(self):
        super(RobertaClass, self).__init__()
        self.l1 = RobertaModel.from_pretrained("roberta-base")
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, 5)

    def forward(self, input_ids, attention_mask, token_type_ids):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output
model = RobertaClass()
model.to(device)
#url = f"https://github.com/udacity/deep-learning-v2-pytorch/blob/master/convolutional-neural-networks/mnist-mlp/model.pt"
#!wget --no-cache --backups=1 {url}
#model.load_state_dict(torch.load("/content/model.pt"))
model.load_state_dict(torch.load("/content/drive/MyDrive/avicenna_model.pt"))
model.eval()


class SyllogismData(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.Premises
        self.targets = self.data.label
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }

MAX_LEN = 256
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
LEARNING_RATE = 1e-05
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', truncation=True, do_lower_case=True)

def avicenna(input):
	p1=input


	data = [[p1, 10]]
	testnew_df = pd.DataFrame(data, columns=['Premises', 'label'])
	data
	test_size = 1
	test_data=testnew_df
	testing_set = SyllogismData(test_data, tokenizer, MAX_LEN)
	test_params = {'batch_size': VALID_BATCH_SIZE,
					'shuffle': False,
					'num_workers': 0
					}
	testing_loader = DataLoader(testing_set, **test_params)
	model.eval()
	n_correct = 0; n_wrong = 0; total = 0; tr_loss=0; nb_tr_steps=0; nb_tr_examples=0
	with torch.no_grad():
		for _, data in tqdm(enumerate(testing_loader, 0)):
			ids = data['ids'].to(device, dtype = torch.long)
			mask = data['mask'].to(device, dtype = torch.long)
			token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
			targets = data['targets'].to(device, dtype = torch.long)
			outputs = model(ids, mask, token_type_ids).squeeze()
			big_val, big_idx = torch.max(outputs.data, dim=0)
	return big_idx.item()
	#valid(model, testing_loader)
	#print(big_idx)

import gradio as gr
iface = gr.Interface(
   fn = avicenna,
   title="Syllogistic NLI",
   description="Select pair of sentences and see if the Avicenna-trained model can gauge the relation correctly",
   #false
   #inputs = gr.inputs.Dropdown(["Buildings should not be constructed on organic soils. All buildings should be constructed on stiff underlying soil with enough strength.","The launching of satellites while still contributing to national prestige is a significant economic activity. For each type of economic activity, there is some threshold for the launch cost per payload mass beyond which the economic activity will not be sustainable."], label="Select an example from Avicenna test set"),
   #true
   inputs = gr.inputs.Dropdown(["All humans are mortal. Socrates is a human.","Avicenna wrote the famous book the Canon of Medicine. The Canon of Medicine has influenced modern medicine","Police found signs of active bleeding before death around the corpse. A large volume of blood released from a body may indicate that the individual has died of exsanguination.","During the first trimester of pregnancy, the body undergoes hormonal fluctuations. Hormonal changes regularly are followed by extreme tiredness.","Influenza spreads the virus to the lungs. Garlic is useful to cure all infections.","With a single currency, there will no longer be a cost involved in changing currencies. It was no longer cost-effective for the government to convert metals into coins.","Pain and tension around your head and neck are known as tension headaches.	Tension headaches are dull pain, tightness, or pressure around your head and neck.","Heavy rain can cause flooding.	Many different health conditions can cause heavy breathing.","Eating foods that are in the Mediterranean diet helps with healthy weight loss and metabolism.	Ana knows how to eat healthy and lose weight with the Mediterranean diet."], label="Select an example"),
   #f and t
   #inputs = gr.inputs.Dropdown(["All humans are mortal. Socrates is a human.","Avicenna wrote the famous book the Canon of Medicine. The Canon of Medicine has influenced modern medicine","Police found signs of active bleeding before death around the corpse. A large volume of blood released from a body may indicate that the individual has died of exsanguination.","During the first trimester of pregnancy, the body undergoes hormonal fluctuations. Hormonal changes regularly are followed by extreme tiredness.","Influenza spreads the virus to the lungs. Garlic is useful to cure all infections.","With a single currency, there will no longer be a cost involved in changing currencies. It was no longer cost-effective for the government to convert metals into coins.","Pain and tension around your head and neck are known as tension headaches.	Tension headaches are dull pain, tightness, or pressure around your head and neck.","Heavy rain can cause flooding.	Many different health conditions can cause heavy breathing.","Eating foods that are in the Mediterranean diet helps with healthy weight loss and metabolism.	Ana knows how to eat healthy and lose weight with the Mediterranean diet.","Buildings should not be constructed on organic soils. All buildings should be constructed on stiff underlying soil with enough strength.","The launching of satellites while still contributing to national prestige is a significant economic activity. For each type of economic activity, there is some threshold for the launch cost per payload mass beyond which the economic activity will not be sustainable."], label="Select an example"),

   outputs = gr.outputs.Textbox(label="Syllogistic relation")

   )
iface.launch(debug=True,enable_queue=True))