File size: 2,231 Bytes
66d5fde
3d5ffd7
66d5fde
 
f96add9
51f3659
9e2c455
66d5fde
 
 
 
df8637c
66d5fde
51f3659
685440f
51f3659
 
bf3f8d0
 
 
 
66d5fde
 
de013f3
66d5fde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb7ebff
 
66d5fde
 
 
de013f3
8c321da
685440f
 
7b98d00
5d1990a
fb7ebff
3711e26
66d5fde
7b98d00
66d5fde
 
 
 
 
 
3d5ffd7
66d5fde
 
 
 
 
 
79eead8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

import gradio as gr
import os
import transformers
from transformers import AutoModel, AutoTokenizer
import numpy as np
import torch

model_names = ['python']

models = {}
model = AutoModel.from_pretrained(f'ZarahShibli/tmp_trainer',return_dict=False)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("device",device)
MAX_LEN = 200

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', return_tensors='pt')

target_cols = [ 'DevelopmentNotes', 'Expand','Parameters', 'Summary', 'Usage']


def predict(comment_sentence):
    print(comment_sentence)
    # Set the model in evaluation mode
    model.eval()
    
    fin_outputs=[] # create an empty list to store outputs
    
    # Tokenize the comment sentence using the BERT tokenizer and encode it with special tokens
    inputs = tokenizer.encode_plus(
        comment_sentence,
        truncation=True,
        add_special_tokens=True,
        max_length=MAX_LEN,
        padding='max_length',
        return_token_type_ids=True,
        return_tensors='pt'
    )
    
    # Retrieve the input ids, attention mask, and token type ids from the encoded inputs
    ids = inputs['input_ids'].to(device, dtype=torch.long)
    #mask = inputs['attention_mask'].to(device, dtype=torch.long)
    #token_type_ids = inputs['token_type_ids'].to(device, dtype=torch.long)
    
    # Forward pass through the model
    with torch.no_grad():
        outputs = model(ids)

    
        print(outputs)
        print(outputs[0])
        #outputs = torch.from_numpy(outputs).float()
        fin_outputs.extend(torch.sigmoid(outputs[0]).detach().numpy().tolist())
    print(fin_outputs[0])
    # Convert the outputs to boolean values based on the threshold
    outputs_boolean = np.array(fin_outputs[0]) >= 0.25
    
    # Get the indices where outputs are true
    true_indices = np.where(outputs_boolean)[1]
    
    # Map the indices to their corresponding categories
    predicted_categories = [target_cols[idx] for idx in true_indices]

    return predicted_categories
    
iface = gr.Interface(fn=predict, 
                     inputs="text",  #gr.inputs.Dropdown(model_names, label='class')], 
                     outputs="text",
                    )
iface.launch()