File size: 1,825 Bytes
8e4e842
dc42939
6ac1f1c
e42ea93
 
24a6037
dc42939
 
 
 
 
 
 
 
 
 
8e4e842
6ac1f1c
 
 
 
 
 
 
b60d96b
afa760b
6ac1f1c
 
 
 
 
 
6e534aa
6ac1f1c
24a6037
 
 
 
 
 
 
 
b60d96b
e4ecba7
7fe46a2
24a6037
b60d96b
6ac1f1c
 
 
 
 
 
 
 
b60d96b
 
6ac1f1c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch.nn.functional as F
import torch
import numpy as np

dataset_dict = load_dataset('HUPD/hupd',
    name='sample',
    data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", 
    icpr_label=None,
    train_filing_start_date='2016-01-01',
    train_filing_end_date='2016-01-21',
    val_filing_start_date='2016-01-22',
    val_filing_end_date='2016-01-31',
)

st.title("Patentability Score")

abstracts={}
claims={}


dataset = dataset_dict["train"].shuffle(seed=42).select(range(20))
  

for i in range(0, 20):

    abstracts[dataset['patent_number'][i]] = dataset['abstract'][i]
    claims[dataset['patent_number'][i]] = dataset['claims'][i]


def get_score(abstract):

    model = AutoModelForSequenceClassification.from_pretrained("arianasutanto/finetuned-distilbert")
    tokenizer = AutoTokenizer.from_pretrained("arianasutanto/finetuned-distilbert", pad_to_max_length=True)

    inputs = tokenizer(abstract, padding='max_length', truncation=True, return_tensors='pt')

    with torch.no_grad():
        logits = model(**inputs).logits

    predictions = F.softmax(logits, dim=1) #will get the probabilities of each label 0 and 1
    predictions = predictions.numpy()
    score = predictions[0][1] #get the probability of the label being 1 (patent accepted)

    return score




patent_num = st.selectbox("Choose a patent number", options=abstracts.keys())
if st.button("Submit"):
   abstract = st.text_area(label="Abstract",value=abstracts[patent_num])
   claim = st.text_area(label="Claims",value=claims[patent_num])
   patentability= get_score(abstract)
   st.write(f"The patentability score is: {patentability}")