File size: 2,304 Bytes
de2af77
 
3899bc2
06f56e7
29b2491
 
50112ef
29b2491
50112ef
 
 
 
 
 
 
 
 
 
 
fbb715a
90cdb3b
 
6b98038
36b05e8
0e3faaa
d2b5211
 
 
38c5010
d2b5211
3899bc2
d2b5211
3899bc2
29b2491
 
 
 
ac50fe3
9549aab
ac50fe3
9549aab
 
 
a9b1a7f
5d4ca87
9549aab
60777d6
 
 
 
 
29b2491
63f5e17
d2b5211
175905a
926cfaf
8069c5d
06cbf78
cb95638
 
20c4a0a
175905a
 
47d0c0c
cb95638
028c6c5
16cc011
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import streamlit as st
from datasets import load_dataset
from transformers import pipeline
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from datasets import load_dataset

dataset_dict = load_dataset('HUPD/hupd',
    name='sample',
    data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", 
    icpr_label=None,
    train_filing_start_date='2016-01-01',
    train_filing_end_date='2016-01-31',
    val_filing_start_date='2017-01-22',
    val_filing_end_date='2017-01-31',
)

df = pd.DataFrame.from_dict(dataset_dict["train"])
df = pd.DataFrame(df,columns =['patent_number','decision', 'abstract', 'claims','filing_date'])
#st.dataframe(df)
PAN = df['patent_number'].drop_duplicates()

st.title('Harvard USPTO Patentability Score')
#make_choice = st.sidebar.selectbox('Select the Patent Application Number:', PAN)

#####NEW
with st.form("patent-form"):
    make_choice = st.selectbox('Select the Patent Application Number:', PAN)
    submitted = st.form_submit_button(label='submit')
    
    if submitted:
        #st.write("Outside the form")
        model_name = "distilbert-base-uncased-finetuned-sst-2-english"
        model = AutoModelForSequenceClassification.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
        
        #abstract = df['abstract'].loc[df['patent_number'] == make_choice]

        decision = df['decision'].loc[df['patent_number'] == make_choice]
        #X_train = abstract.to_string()
        X_train = decision.to_string()
        #X_train = abstract.values.tolist()
        results = classifier(X_train, truncation=True)

        for result in results:
            print(result)
            score = result['score']
            print(score)
            st.write("The Patentability Score is:", score)

        
######NEW

pd.options.display.max_colwidth = 100000

abstract = df["abstract"].loc[df["patent_number"] == make_choice]
st.subheader(':red[Patent Application]')
st.subheader(':red[Abstract:]')
st.info(abstract)


claims = df["claims"].loc[df["patent_number"] == make_choice]
st.subheader(':red[Claim:]')
st.info(claims)