File size: 2,544 Bytes
de2af77 3899bc2 06f56e7 29b2491 50112ef 29b2491 50112ef fbb715a 90cdb3b 6b98038 36b05e8 0e3faaa d2b5211 38c5010 d2b5211 3899bc2 d2b5211 3899bc2 29b2491 39ae039 a9b1a7f 5d4ca87 29b2491 60777d6 29b2491 63f5e17 d2b5211 175905a 926cfaf 8069c5d 06cbf78 cb95638 20c4a0a 175905a 47d0c0c cb95638 028c6c5 2d662a2 d2b5211 16cc011 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import streamlit as st
from datasets import load_dataset
from transformers import pipeline
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from datasets import load_dataset
dataset_dict = load_dataset('HUPD/hupd',
name='sample',
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
icpr_label=None,
train_filing_start_date='2016-01-01',
train_filing_end_date='2016-01-31',
val_filing_start_date='2017-01-22',
val_filing_end_date='2017-01-31',
)
df = pd.DataFrame.from_dict(dataset_dict["train"])
df = pd.DataFrame(df,columns =['patent_number','decision', 'abstract', 'claims','filing_date'])
#st.dataframe(df)
PAN = df['patent_number'].drop_duplicates()
st.title('Harvard USPTO Patentability Score')
#make_choice = st.sidebar.selectbox('Select the Patent Application Number:', PAN)
#make_choice = st.sidebar.selectbox('Select the Patent Application Number:', PAN)
#####NEW
with st.form("patent-form"):
make_choice = st.selectbox('Select the Patent Application Number:', PAN)
submitted = st.form_submit_button(label='submit')
if submitted:
#st.write("Outside the form")
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
abstract = df['abstract'].loc[df['patent_number'] == make_choice]
X_train = abstract.to_string()
#X_train = abstract.values.tolist()
results = classifier(X_train, truncation=True)
#result = hupd_model(make_choice)[0]
#score = results['score']
for result in results:
print(result)
score = result['score']
print(score)
st.write("The Patentability Score is:", score)
######NEW
pd.options.display.max_colwidth = 100000
abstract = df["abstract"].loc[df["patent_number"] == make_choice]
st.subheader(':red[Patent Application]')
st.subheader(':red[Abstract:]')
st.info(abstract)
#st.markdown(f"Publication abstract is **{abstract}** π")
claims = df["claims"].loc[df["patent_number"] == make_choice]
st.subheader(':red[Claim:]')
st.info(claims)
#st.markdown(f"Publication Claim is **{claims}** π")
#form = st.form(key='patent-form')
#submit = form.sidebar.form_submit_button('Submit')
|